Clean up some minor white space issues in trans-decl.c and trans-expr.c
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobe53cc5683b4e3bb50ce77b8887311975752e554b
1 /* Reassociation for trees.
2 Copyright (C) 2005-2016 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "tm_p.h"
33 #include "ssa.h"
34 #include "optabs-tree.h"
35 #include "gimple-pretty-print.h"
36 #include "diagnostic-core.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "cfganal.h"
40 #include "gimple-fold.h"
41 #include "tree-eh.h"
42 #include "gimple-iterator.h"
43 #include "gimplify-me.h"
44 #include "tree-cfg.h"
45 #include "tree-ssa-loop.h"
46 #include "flags.h"
47 #include "tree-ssa.h"
48 #include "langhooks.h"
49 #include "cfgloop.h"
50 #include "params.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
59 It consists of five steps:
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_*
69 3. Optimization of the operand lists, eliminating things like a +
70 -a, a & a, etc.
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
79 5. Repropagate negates, as nothing else will clean it up ATM.
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
100 a (1), b (1), c (1), d (2), e (2)
103 We start with our merge worklist empty, and the ops list with all of
104 those on it.
106 You want to first merge all leaves of the same rank, as much as
107 possible.
109 So first build a binary op of
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
121 Then build a binary op of d + e
122 mergetmp2 = d + e
124 and put mergetmp2 on the merge worklist.
126 so merge worklist = {mergetmp, c, mergetmp2}
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
131 So we have
133 build binary op
134 mergetmp3 = mergetmp + c
136 worklist = {mergetmp2, mergetmp3}
138 mergetmp4 = mergetmp2 + mergetmp3
140 worklist = {mergetmp4}
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
151 So why don't we do this?
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
159 mergetmp = op1 + op2
160 newstmt = mergetmp + op3
162 instead of
163 mergetmp = op2 + op3
164 newstmt = mergetmp + op1
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
168 can do.
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
176 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
177 point 3a in the pass header comment. */
178 static bool reassoc_insert_powi_p;
180 /* Statistics */
181 static struct
183 int linearized;
184 int constants_eliminated;
185 int ops_eliminated;
186 int rewritten;
187 int pows_encountered;
188 int pows_created;
189 } reassociate_stats;
191 /* Operator, rank pair. */
192 struct operand_entry
194 unsigned int rank;
195 int id;
196 tree op;
197 unsigned int count;
200 static object_allocator<operand_entry> operand_entry_pool
201 ("operand entry pool");
203 /* This is used to assign a unique ID to each struct operand_entry
204 so that qsort results are identical on different hosts. */
205 static int next_operand_entry_id;
207 /* Starting rank number for a given basic block, so that we can rank
208 operations using unmovable instructions in that BB based on the bb
209 depth. */
210 static long *bb_rank;
212 /* Operand->rank hashtable. */
213 static hash_map<tree, long> *operand_rank;
215 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
216 all basic blocks the CFG should be adjusted - basic blocks
217 split right after that SSA_NAME's definition statement and before
218 the only use, which must be a bit ior. */
219 static vec<tree> reassoc_branch_fixups;
221 /* Forward decls. */
222 static long get_rank (tree);
223 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
225 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
226 possibly added by gsi_remove. */
228 bool
229 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
231 gimple *stmt = gsi_stmt (*gsi);
233 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
234 return gsi_remove (gsi, true);
236 gimple_stmt_iterator prev = *gsi;
237 gsi_prev (&prev);
238 unsigned uid = gimple_uid (stmt);
239 basic_block bb = gimple_bb (stmt);
240 bool ret = gsi_remove (gsi, true);
241 if (!gsi_end_p (prev))
242 gsi_next (&prev);
243 else
244 prev = gsi_start_bb (bb);
245 gimple *end_stmt = gsi_stmt (*gsi);
246 while ((stmt = gsi_stmt (prev)) != end_stmt)
248 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
249 gimple_set_uid (stmt, uid);
250 gsi_next (&prev);
252 return ret;
255 /* Bias amount for loop-carried phis. We want this to be larger than
256 the depth of any reassociation tree we can see, but not larger than
257 the rank difference between two blocks. */
258 #define PHI_LOOP_BIAS (1 << 15)
260 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
261 an innermost loop, and the phi has only a single use which is inside
262 the loop, then the rank is the block rank of the loop latch plus an
263 extra bias for the loop-carried dependence. This causes expressions
264 calculated into an accumulator variable to be independent for each
265 iteration of the loop. If STMT is some other phi, the rank is the
266 block rank of its containing block. */
267 static long
268 phi_rank (gimple *stmt)
270 basic_block bb = gimple_bb (stmt);
271 struct loop *father = bb->loop_father;
272 tree res;
273 unsigned i;
274 use_operand_p use;
275 gimple *use_stmt;
277 /* We only care about real loops (those with a latch). */
278 if (!father->latch)
279 return bb_rank[bb->index];
281 /* Interesting phis must be in headers of innermost loops. */
282 if (bb != father->header
283 || father->inner)
284 return bb_rank[bb->index];
286 /* Ignore virtual SSA_NAMEs. */
287 res = gimple_phi_result (stmt);
288 if (virtual_operand_p (res))
289 return bb_rank[bb->index];
291 /* The phi definition must have a single use, and that use must be
292 within the loop. Otherwise this isn't an accumulator pattern. */
293 if (!single_imm_use (res, &use, &use_stmt)
294 || gimple_bb (use_stmt)->loop_father != father)
295 return bb_rank[bb->index];
297 /* Look for phi arguments from within the loop. If found, bias this phi. */
298 for (i = 0; i < gimple_phi_num_args (stmt); i++)
300 tree arg = gimple_phi_arg_def (stmt, i);
301 if (TREE_CODE (arg) == SSA_NAME
302 && !SSA_NAME_IS_DEFAULT_DEF (arg))
304 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
305 if (gimple_bb (def_stmt)->loop_father == father)
306 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
310 /* Must be an uninteresting phi. */
311 return bb_rank[bb->index];
314 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
315 loop-carried dependence of an innermost loop, return TRUE; else
316 return FALSE. */
317 static bool
318 loop_carried_phi (tree exp)
320 gimple *phi_stmt;
321 long block_rank;
323 if (TREE_CODE (exp) != SSA_NAME
324 || SSA_NAME_IS_DEFAULT_DEF (exp))
325 return false;
327 phi_stmt = SSA_NAME_DEF_STMT (exp);
329 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
330 return false;
332 /* Non-loop-carried phis have block rank. Loop-carried phis have
333 an additional bias added in. If this phi doesn't have block rank,
334 it's biased and should not be propagated. */
335 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
337 if (phi_rank (phi_stmt) != block_rank)
338 return true;
340 return false;
343 /* Return the maximum of RANK and the rank that should be propagated
344 from expression OP. For most operands, this is just the rank of OP.
345 For loop-carried phis, the value is zero to avoid undoing the bias
346 in favor of the phi. */
347 static long
348 propagate_rank (long rank, tree op)
350 long op_rank;
352 if (loop_carried_phi (op))
353 return rank;
355 op_rank = get_rank (op);
357 return MAX (rank, op_rank);
360 /* Look up the operand rank structure for expression E. */
362 static inline long
363 find_operand_rank (tree e)
365 long *slot = operand_rank->get (e);
366 return slot ? *slot : -1;
369 /* Insert {E,RANK} into the operand rank hashtable. */
371 static inline void
372 insert_operand_rank (tree e, long rank)
374 gcc_assert (rank > 0);
375 gcc_assert (!operand_rank->put (e, rank));
378 /* Given an expression E, return the rank of the expression. */
380 static long
381 get_rank (tree e)
383 /* SSA_NAME's have the rank of the expression they are the result
385 For globals and uninitialized values, the rank is 0.
386 For function arguments, use the pre-setup rank.
387 For PHI nodes, stores, asm statements, etc, we use the rank of
388 the BB.
389 For simple operations, the rank is the maximum rank of any of
390 its operands, or the bb_rank, whichever is less.
391 I make no claims that this is optimal, however, it gives good
392 results. */
394 /* We make an exception to the normal ranking system to break
395 dependences of accumulator variables in loops. Suppose we
396 have a simple one-block loop containing:
398 x_1 = phi(x_0, x_2)
399 b = a + x_1
400 c = b + d
401 x_2 = c + e
403 As shown, each iteration of the calculation into x is fully
404 dependent upon the iteration before it. We would prefer to
405 see this in the form:
407 x_1 = phi(x_0, x_2)
408 b = a + d
409 c = b + e
410 x_2 = c + x_1
412 If the loop is unrolled, the calculations of b and c from
413 different iterations can be interleaved.
415 To obtain this result during reassociation, we bias the rank
416 of the phi definition x_1 upward, when it is recognized as an
417 accumulator pattern. The artificial rank causes it to be
418 added last, providing the desired independence. */
420 if (TREE_CODE (e) == SSA_NAME)
422 ssa_op_iter iter;
423 gimple *stmt;
424 long rank;
425 tree op;
427 if (SSA_NAME_IS_DEFAULT_DEF (e))
428 return find_operand_rank (e);
430 stmt = SSA_NAME_DEF_STMT (e);
431 if (gimple_code (stmt) == GIMPLE_PHI)
432 return phi_rank (stmt);
434 if (!is_gimple_assign (stmt))
435 return bb_rank[gimple_bb (stmt)->index];
437 /* If we already have a rank for this expression, use that. */
438 rank = find_operand_rank (e);
439 if (rank != -1)
440 return rank;
442 /* Otherwise, find the maximum rank for the operands. As an
443 exception, remove the bias from loop-carried phis when propagating
444 the rank so that dependent operations are not also biased. */
445 /* Simply walk over all SSA uses - this takes advatage of the
446 fact that non-SSA operands are is_gimple_min_invariant and
447 thus have rank 0. */
448 rank = 0;
449 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
450 rank = propagate_rank (rank, op);
452 if (dump_file && (dump_flags & TDF_DETAILS))
454 fprintf (dump_file, "Rank for ");
455 print_generic_expr (dump_file, e, 0);
456 fprintf (dump_file, " is %ld\n", (rank + 1));
459 /* Note the rank in the hashtable so we don't recompute it. */
460 insert_operand_rank (e, (rank + 1));
461 return (rank + 1);
464 /* Constants, globals, etc., are rank 0 */
465 return 0;
469 /* We want integer ones to end up last no matter what, since they are
470 the ones we can do the most with. */
471 #define INTEGER_CONST_TYPE 1 << 3
472 #define FLOAT_CONST_TYPE 1 << 2
473 #define OTHER_CONST_TYPE 1 << 1
475 /* Classify an invariant tree into integer, float, or other, so that
476 we can sort them to be near other constants of the same type. */
477 static inline int
478 constant_type (tree t)
480 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
481 return INTEGER_CONST_TYPE;
482 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
483 return FLOAT_CONST_TYPE;
484 else
485 return OTHER_CONST_TYPE;
488 /* qsort comparison function to sort operand entries PA and PB by rank
489 so that the sorted array is ordered by rank in decreasing order. */
490 static int
491 sort_by_operand_rank (const void *pa, const void *pb)
493 const operand_entry *oea = *(const operand_entry *const *)pa;
494 const operand_entry *oeb = *(const operand_entry *const *)pb;
496 /* It's nicer for optimize_expression if constants that are likely
497 to fold when added/multiplied//whatever are put next to each
498 other. Since all constants have rank 0, order them by type. */
499 if (oeb->rank == 0 && oea->rank == 0)
501 if (constant_type (oeb->op) != constant_type (oea->op))
502 return constant_type (oeb->op) - constant_type (oea->op);
503 else
504 /* To make sorting result stable, we use unique IDs to determine
505 order. */
506 return oeb->id - oea->id;
509 /* Lastly, make sure the versions that are the same go next to each
510 other. */
511 if ((oeb->rank - oea->rank == 0)
512 && TREE_CODE (oea->op) == SSA_NAME
513 && TREE_CODE (oeb->op) == SSA_NAME)
515 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
516 versions of removed SSA_NAMEs, so if possible, prefer to sort
517 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
518 See PR60418. */
519 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
520 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
521 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
523 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
524 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
525 basic_block bba = gimple_bb (stmta);
526 basic_block bbb = gimple_bb (stmtb);
527 if (bbb != bba)
529 if (bb_rank[bbb->index] != bb_rank[bba->index])
530 return bb_rank[bbb->index] - bb_rank[bba->index];
532 else
534 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
535 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
536 if (da != db)
537 return da ? 1 : -1;
541 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
542 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
543 else
544 return oeb->id - oea->id;
547 if (oeb->rank != oea->rank)
548 return oeb->rank - oea->rank;
549 else
550 return oeb->id - oea->id;
553 /* Add an operand entry to *OPS for the tree operand OP. */
555 static void
556 add_to_ops_vec (vec<operand_entry *> *ops, tree op)
558 operand_entry *oe = operand_entry_pool.allocate ();
560 oe->op = op;
561 oe->rank = get_rank (op);
562 oe->id = next_operand_entry_id++;
563 oe->count = 1;
564 ops->safe_push (oe);
567 /* Add an operand entry to *OPS for the tree operand OP with repeat
568 count REPEAT. */
570 static void
571 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
572 HOST_WIDE_INT repeat)
574 operand_entry *oe = operand_entry_pool.allocate ();
576 oe->op = op;
577 oe->rank = get_rank (op);
578 oe->id = next_operand_entry_id++;
579 oe->count = repeat;
580 ops->safe_push (oe);
582 reassociate_stats.pows_encountered++;
585 /* Return true if STMT is reassociable operation containing a binary
586 operation with tree code CODE, and is inside LOOP. */
588 static bool
589 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
591 basic_block bb = gimple_bb (stmt);
593 if (gimple_bb (stmt) == NULL)
594 return false;
596 if (!flow_bb_inside_loop_p (loop, bb))
597 return false;
599 if (is_gimple_assign (stmt)
600 && gimple_assign_rhs_code (stmt) == code
601 && has_single_use (gimple_assign_lhs (stmt)))
602 return true;
604 return false;
608 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
609 operand of the negate operation. Otherwise, return NULL. */
611 static tree
612 get_unary_op (tree name, enum tree_code opcode)
614 gimple *stmt = SSA_NAME_DEF_STMT (name);
616 if (!is_gimple_assign (stmt))
617 return NULL_TREE;
619 if (gimple_assign_rhs_code (stmt) == opcode)
620 return gimple_assign_rhs1 (stmt);
621 return NULL_TREE;
624 /* If CURR and LAST are a pair of ops that OPCODE allows us to
625 eliminate through equivalences, do so, remove them from OPS, and
626 return true. Otherwise, return false. */
628 static bool
629 eliminate_duplicate_pair (enum tree_code opcode,
630 vec<operand_entry *> *ops,
631 bool *all_done,
632 unsigned int i,
633 operand_entry *curr,
634 operand_entry *last)
637 /* If we have two of the same op, and the opcode is & |, min, or max,
638 we can eliminate one of them.
639 If we have two of the same op, and the opcode is ^, we can
640 eliminate both of them. */
642 if (last && last->op == curr->op)
644 switch (opcode)
646 case MAX_EXPR:
647 case MIN_EXPR:
648 case BIT_IOR_EXPR:
649 case BIT_AND_EXPR:
650 if (dump_file && (dump_flags & TDF_DETAILS))
652 fprintf (dump_file, "Equivalence: ");
653 print_generic_expr (dump_file, curr->op, 0);
654 fprintf (dump_file, " [&|minmax] ");
655 print_generic_expr (dump_file, last->op, 0);
656 fprintf (dump_file, " -> ");
657 print_generic_stmt (dump_file, last->op, 0);
660 ops->ordered_remove (i);
661 reassociate_stats.ops_eliminated ++;
663 return true;
665 case BIT_XOR_EXPR:
666 if (dump_file && (dump_flags & TDF_DETAILS))
668 fprintf (dump_file, "Equivalence: ");
669 print_generic_expr (dump_file, curr->op, 0);
670 fprintf (dump_file, " ^ ");
671 print_generic_expr (dump_file, last->op, 0);
672 fprintf (dump_file, " -> nothing\n");
675 reassociate_stats.ops_eliminated += 2;
677 if (ops->length () == 2)
679 ops->create (0);
680 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
681 *all_done = true;
683 else
685 ops->ordered_remove (i-1);
686 ops->ordered_remove (i-1);
689 return true;
691 default:
692 break;
695 return false;
698 static vec<tree> plus_negates;
700 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
701 expression, look in OPS for a corresponding positive operation to cancel
702 it out. If we find one, remove the other from OPS, replace
703 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
704 return false. */
706 static bool
707 eliminate_plus_minus_pair (enum tree_code opcode,
708 vec<operand_entry *> *ops,
709 unsigned int currindex,
710 operand_entry *curr)
712 tree negateop;
713 tree notop;
714 unsigned int i;
715 operand_entry *oe;
717 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
718 return false;
720 negateop = get_unary_op (curr->op, NEGATE_EXPR);
721 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
722 if (negateop == NULL_TREE && notop == NULL_TREE)
723 return false;
725 /* Any non-negated version will have a rank that is one less than
726 the current rank. So once we hit those ranks, if we don't find
727 one, we can stop. */
729 for (i = currindex + 1;
730 ops->iterate (i, &oe)
731 && oe->rank >= curr->rank - 1 ;
732 i++)
734 if (oe->op == negateop)
737 if (dump_file && (dump_flags & TDF_DETAILS))
739 fprintf (dump_file, "Equivalence: ");
740 print_generic_expr (dump_file, negateop, 0);
741 fprintf (dump_file, " + -");
742 print_generic_expr (dump_file, oe->op, 0);
743 fprintf (dump_file, " -> 0\n");
746 ops->ordered_remove (i);
747 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
748 ops->ordered_remove (currindex);
749 reassociate_stats.ops_eliminated ++;
751 return true;
753 else if (oe->op == notop)
755 tree op_type = TREE_TYPE (oe->op);
757 if (dump_file && (dump_flags & TDF_DETAILS))
759 fprintf (dump_file, "Equivalence: ");
760 print_generic_expr (dump_file, notop, 0);
761 fprintf (dump_file, " + ~");
762 print_generic_expr (dump_file, oe->op, 0);
763 fprintf (dump_file, " -> -1\n");
766 ops->ordered_remove (i);
767 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
768 ops->ordered_remove (currindex);
769 reassociate_stats.ops_eliminated ++;
771 return true;
775 /* CURR->OP is a negate expr in a plus expr: save it for later
776 inspection in repropagate_negates(). */
777 if (negateop != NULL_TREE)
778 plus_negates.safe_push (curr->op);
780 return false;
783 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
784 bitwise not expression, look in OPS for a corresponding operand to
785 cancel it out. If we find one, remove the other from OPS, replace
786 OPS[CURRINDEX] with 0, and return true. Otherwise, return
787 false. */
789 static bool
790 eliminate_not_pairs (enum tree_code opcode,
791 vec<operand_entry *> *ops,
792 unsigned int currindex,
793 operand_entry *curr)
795 tree notop;
796 unsigned int i;
797 operand_entry *oe;
799 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
800 || TREE_CODE (curr->op) != SSA_NAME)
801 return false;
803 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
804 if (notop == NULL_TREE)
805 return false;
807 /* Any non-not version will have a rank that is one less than
808 the current rank. So once we hit those ranks, if we don't find
809 one, we can stop. */
811 for (i = currindex + 1;
812 ops->iterate (i, &oe)
813 && oe->rank >= curr->rank - 1;
814 i++)
816 if (oe->op == notop)
818 if (dump_file && (dump_flags & TDF_DETAILS))
820 fprintf (dump_file, "Equivalence: ");
821 print_generic_expr (dump_file, notop, 0);
822 if (opcode == BIT_AND_EXPR)
823 fprintf (dump_file, " & ~");
824 else if (opcode == BIT_IOR_EXPR)
825 fprintf (dump_file, " | ~");
826 print_generic_expr (dump_file, oe->op, 0);
827 if (opcode == BIT_AND_EXPR)
828 fprintf (dump_file, " -> 0\n");
829 else if (opcode == BIT_IOR_EXPR)
830 fprintf (dump_file, " -> -1\n");
833 if (opcode == BIT_AND_EXPR)
834 oe->op = build_zero_cst (TREE_TYPE (oe->op));
835 else if (opcode == BIT_IOR_EXPR)
836 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
838 reassociate_stats.ops_eliminated += ops->length () - 1;
839 ops->truncate (0);
840 ops->quick_push (oe);
841 return true;
845 return false;
848 /* Use constant value that may be present in OPS to try to eliminate
849 operands. Note that this function is only really used when we've
850 eliminated ops for other reasons, or merged constants. Across
851 single statements, fold already does all of this, plus more. There
852 is little point in duplicating logic, so I've only included the
853 identities that I could ever construct testcases to trigger. */
855 static void
856 eliminate_using_constants (enum tree_code opcode,
857 vec<operand_entry *> *ops)
859 operand_entry *oelast = ops->last ();
860 tree type = TREE_TYPE (oelast->op);
862 if (oelast->rank == 0
863 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
865 switch (opcode)
867 case BIT_AND_EXPR:
868 if (integer_zerop (oelast->op))
870 if (ops->length () != 1)
872 if (dump_file && (dump_flags & TDF_DETAILS))
873 fprintf (dump_file, "Found & 0, removing all other ops\n");
875 reassociate_stats.ops_eliminated += ops->length () - 1;
877 ops->truncate (0);
878 ops->quick_push (oelast);
879 return;
882 else if (integer_all_onesp (oelast->op))
884 if (ops->length () != 1)
886 if (dump_file && (dump_flags & TDF_DETAILS))
887 fprintf (dump_file, "Found & -1, removing\n");
888 ops->pop ();
889 reassociate_stats.ops_eliminated++;
892 break;
893 case BIT_IOR_EXPR:
894 if (integer_all_onesp (oelast->op))
896 if (ops->length () != 1)
898 if (dump_file && (dump_flags & TDF_DETAILS))
899 fprintf (dump_file, "Found | -1, removing all other ops\n");
901 reassociate_stats.ops_eliminated += ops->length () - 1;
903 ops->truncate (0);
904 ops->quick_push (oelast);
905 return;
908 else if (integer_zerop (oelast->op))
910 if (ops->length () != 1)
912 if (dump_file && (dump_flags & TDF_DETAILS))
913 fprintf (dump_file, "Found | 0, removing\n");
914 ops->pop ();
915 reassociate_stats.ops_eliminated++;
918 break;
919 case MULT_EXPR:
920 if (integer_zerop (oelast->op)
921 || (FLOAT_TYPE_P (type)
922 && !HONOR_NANS (type)
923 && !HONOR_SIGNED_ZEROS (type)
924 && real_zerop (oelast->op)))
926 if (ops->length () != 1)
928 if (dump_file && (dump_flags & TDF_DETAILS))
929 fprintf (dump_file, "Found * 0, removing all other ops\n");
931 reassociate_stats.ops_eliminated += ops->length () - 1;
932 ops->truncate (1);
933 ops->quick_push (oelast);
934 return;
937 else if (integer_onep (oelast->op)
938 || (FLOAT_TYPE_P (type)
939 && !HONOR_SNANS (type)
940 && real_onep (oelast->op)))
942 if (ops->length () != 1)
944 if (dump_file && (dump_flags & TDF_DETAILS))
945 fprintf (dump_file, "Found * 1, removing\n");
946 ops->pop ();
947 reassociate_stats.ops_eliminated++;
948 return;
951 break;
952 case BIT_XOR_EXPR:
953 case PLUS_EXPR:
954 case MINUS_EXPR:
955 if (integer_zerop (oelast->op)
956 || (FLOAT_TYPE_P (type)
957 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
958 && fold_real_zero_addition_p (type, oelast->op,
959 opcode == MINUS_EXPR)))
961 if (ops->length () != 1)
963 if (dump_file && (dump_flags & TDF_DETAILS))
964 fprintf (dump_file, "Found [|^+] 0, removing\n");
965 ops->pop ();
966 reassociate_stats.ops_eliminated++;
967 return;
970 break;
971 default:
972 break;
978 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
979 bool, bool);
981 /* Structure for tracking and counting operands. */
982 struct oecount {
983 int cnt;
984 int id;
985 enum tree_code oecode;
986 tree op;
990 /* The heap for the oecount hashtable and the sorted list of operands. */
991 static vec<oecount> cvec;
994 /* Oecount hashtable helpers. */
996 struct oecount_hasher : int_hash <int, 0, 1>
998 static inline hashval_t hash (int);
999 static inline bool equal (int, int);
1002 /* Hash function for oecount. */
1004 inline hashval_t
1005 oecount_hasher::hash (int p)
1007 const oecount *c = &cvec[p - 42];
1008 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1011 /* Comparison function for oecount. */
1013 inline bool
1014 oecount_hasher::equal (int p1, int p2)
1016 const oecount *c1 = &cvec[p1 - 42];
1017 const oecount *c2 = &cvec[p2 - 42];
1018 return (c1->oecode == c2->oecode
1019 && c1->op == c2->op);
1022 /* Comparison function for qsort sorting oecount elements by count. */
1024 static int
1025 oecount_cmp (const void *p1, const void *p2)
1027 const oecount *c1 = (const oecount *)p1;
1028 const oecount *c2 = (const oecount *)p2;
1029 if (c1->cnt != c2->cnt)
1030 return c1->cnt - c2->cnt;
1031 else
1032 /* If counts are identical, use unique IDs to stabilize qsort. */
1033 return c1->id - c2->id;
1036 /* Return TRUE iff STMT represents a builtin call that raises OP
1037 to some exponent. */
1039 static bool
1040 stmt_is_power_of_op (gimple *stmt, tree op)
1042 if (!is_gimple_call (stmt))
1043 return false;
1045 switch (gimple_call_combined_fn (stmt))
1047 CASE_CFN_POW:
1048 CASE_CFN_POWI:
1049 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1051 default:
1052 return false;
1056 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1057 in place and return the result. Assumes that stmt_is_power_of_op
1058 was previously called for STMT and returned TRUE. */
1060 static HOST_WIDE_INT
1061 decrement_power (gimple *stmt)
1063 REAL_VALUE_TYPE c, cint;
1064 HOST_WIDE_INT power;
1065 tree arg1;
1067 switch (gimple_call_combined_fn (stmt))
1069 CASE_CFN_POW:
1070 arg1 = gimple_call_arg (stmt, 1);
1071 c = TREE_REAL_CST (arg1);
1072 power = real_to_integer (&c) - 1;
1073 real_from_integer (&cint, VOIDmode, power, SIGNED);
1074 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1075 return power;
1077 CASE_CFN_POWI:
1078 arg1 = gimple_call_arg (stmt, 1);
1079 power = TREE_INT_CST_LOW (arg1) - 1;
1080 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1081 return power;
1083 default:
1084 gcc_unreachable ();
1088 /* Find the single immediate use of STMT's LHS, and replace it
1089 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1090 replace *DEF with OP as well. */
1092 static void
1093 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1095 tree lhs;
1096 gimple *use_stmt;
1097 use_operand_p use;
1098 gimple_stmt_iterator gsi;
1100 if (is_gimple_call (stmt))
1101 lhs = gimple_call_lhs (stmt);
1102 else
1103 lhs = gimple_assign_lhs (stmt);
1105 gcc_assert (has_single_use (lhs));
1106 single_imm_use (lhs, &use, &use_stmt);
1107 if (lhs == *def)
1108 *def = op;
1109 SET_USE (use, op);
1110 if (TREE_CODE (op) != SSA_NAME)
1111 update_stmt (use_stmt);
1112 gsi = gsi_for_stmt (stmt);
1113 unlink_stmt_vdef (stmt);
1114 reassoc_remove_stmt (&gsi);
1115 release_defs (stmt);
1118 /* Walks the linear chain with result *DEF searching for an operation
1119 with operand OP and code OPCODE removing that from the chain. *DEF
1120 is updated if there is only one operand but no operation left. */
1122 static void
1123 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1125 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1129 tree name;
1131 if (opcode == MULT_EXPR
1132 && stmt_is_power_of_op (stmt, op))
1134 if (decrement_power (stmt) == 1)
1135 propagate_op_to_single_use (op, stmt, def);
1136 return;
1139 name = gimple_assign_rhs1 (stmt);
1141 /* If this is the operation we look for and one of the operands
1142 is ours simply propagate the other operand into the stmts
1143 single use. */
1144 if (gimple_assign_rhs_code (stmt) == opcode
1145 && (name == op
1146 || gimple_assign_rhs2 (stmt) == op))
1148 if (name == op)
1149 name = gimple_assign_rhs2 (stmt);
1150 propagate_op_to_single_use (name, stmt, def);
1151 return;
1154 /* We might have a multiply of two __builtin_pow* calls, and
1155 the operand might be hiding in the rightmost one. */
1156 if (opcode == MULT_EXPR
1157 && gimple_assign_rhs_code (stmt) == opcode
1158 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1159 && has_single_use (gimple_assign_rhs2 (stmt)))
1161 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1162 if (stmt_is_power_of_op (stmt2, op))
1164 if (decrement_power (stmt2) == 1)
1165 propagate_op_to_single_use (op, stmt2, def);
1166 return;
1170 /* Continue walking the chain. */
1171 gcc_assert (name != op
1172 && TREE_CODE (name) == SSA_NAME);
1173 stmt = SSA_NAME_DEF_STMT (name);
1175 while (1);
1178 /* Returns true if statement S1 dominates statement S2. Like
1179 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1181 static bool
1182 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1184 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1186 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1187 SSA_NAME. Assume it lives at the beginning of function and
1188 thus dominates everything. */
1189 if (!bb1 || s1 == s2)
1190 return true;
1192 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1193 if (!bb2)
1194 return false;
1196 if (bb1 == bb2)
1198 /* PHIs in the same basic block are assumed to be
1199 executed all in parallel, if only one stmt is a PHI,
1200 it dominates the other stmt in the same basic block. */
1201 if (gimple_code (s1) == GIMPLE_PHI)
1202 return true;
1204 if (gimple_code (s2) == GIMPLE_PHI)
1205 return false;
1207 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1209 if (gimple_uid (s1) < gimple_uid (s2))
1210 return true;
1212 if (gimple_uid (s1) > gimple_uid (s2))
1213 return false;
1215 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1216 unsigned int uid = gimple_uid (s1);
1217 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1219 gimple *s = gsi_stmt (gsi);
1220 if (gimple_uid (s) != uid)
1221 break;
1222 if (s == s2)
1223 return true;
1226 return false;
1229 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1232 /* Insert STMT after INSERT_POINT. */
1234 static void
1235 insert_stmt_after (gimple *stmt, gimple *insert_point)
1237 gimple_stmt_iterator gsi;
1238 basic_block bb;
1240 if (gimple_code (insert_point) == GIMPLE_PHI)
1241 bb = gimple_bb (insert_point);
1242 else if (!stmt_ends_bb_p (insert_point))
1244 gsi = gsi_for_stmt (insert_point);
1245 gimple_set_uid (stmt, gimple_uid (insert_point));
1246 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1247 return;
1249 else
1250 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1251 thus if it must end a basic block, it should be a call that can
1252 throw, or some assignment that can throw. If it throws, the LHS
1253 of it will not be initialized though, so only valid places using
1254 the SSA_NAME should be dominated by the fallthru edge. */
1255 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1256 gsi = gsi_after_labels (bb);
1257 if (gsi_end_p (gsi))
1259 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1260 gimple_set_uid (stmt,
1261 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1263 else
1264 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1265 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1268 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1269 the result. Places the statement after the definition of either
1270 OP1 or OP2. Returns the new statement. */
1272 static gimple *
1273 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1275 gimple *op1def = NULL, *op2def = NULL;
1276 gimple_stmt_iterator gsi;
1277 tree op;
1278 gassign *sum;
1280 /* Create the addition statement. */
1281 op = make_ssa_name (type);
1282 sum = gimple_build_assign (op, opcode, op1, op2);
1284 /* Find an insertion place and insert. */
1285 if (TREE_CODE (op1) == SSA_NAME)
1286 op1def = SSA_NAME_DEF_STMT (op1);
1287 if (TREE_CODE (op2) == SSA_NAME)
1288 op2def = SSA_NAME_DEF_STMT (op2);
1289 if ((!op1def || gimple_nop_p (op1def))
1290 && (!op2def || gimple_nop_p (op2def)))
1292 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1293 if (gsi_end_p (gsi))
1295 gimple_stmt_iterator gsi2
1296 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1297 gimple_set_uid (sum,
1298 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1300 else
1301 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1302 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1304 else
1306 gimple *insert_point;
1307 if ((!op1def || gimple_nop_p (op1def))
1308 || (op2def && !gimple_nop_p (op2def)
1309 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1310 insert_point = op2def;
1311 else
1312 insert_point = op1def;
1313 insert_stmt_after (sum, insert_point);
1315 update_stmt (sum);
1317 return sum;
1320 /* Perform un-distribution of divisions and multiplications.
1321 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1322 to (A + B) / X for real X.
1324 The algorithm is organized as follows.
1326 - First we walk the addition chain *OPS looking for summands that
1327 are defined by a multiplication or a real division. This results
1328 in the candidates bitmap with relevant indices into *OPS.
1330 - Second we build the chains of multiplications or divisions for
1331 these candidates, counting the number of occurrences of (operand, code)
1332 pairs in all of the candidates chains.
1334 - Third we sort the (operand, code) pairs by number of occurrence and
1335 process them starting with the pair with the most uses.
1337 * For each such pair we walk the candidates again to build a
1338 second candidate bitmap noting all multiplication/division chains
1339 that have at least one occurrence of (operand, code).
1341 * We build an alternate addition chain only covering these
1342 candidates with one (operand, code) operation removed from their
1343 multiplication/division chain.
1345 * The first candidate gets replaced by the alternate addition chain
1346 multiplied/divided by the operand.
1348 * All candidate chains get disabled for further processing and
1349 processing of (operand, code) pairs continues.
1351 The alternate addition chains built are re-processed by the main
1352 reassociation algorithm which allows optimizing a * x * y + b * y * x
1353 to (a + b ) * x * y in one invocation of the reassociation pass. */
1355 static bool
1356 undistribute_ops_list (enum tree_code opcode,
1357 vec<operand_entry *> *ops, struct loop *loop)
1359 unsigned int length = ops->length ();
1360 operand_entry *oe1;
1361 unsigned i, j;
1362 sbitmap candidates, candidates2;
1363 unsigned nr_candidates, nr_candidates2;
1364 sbitmap_iterator sbi0;
1365 vec<operand_entry *> *subops;
1366 bool changed = false;
1367 int next_oecount_id = 0;
1369 if (length <= 1
1370 || opcode != PLUS_EXPR)
1371 return false;
1373 /* Build a list of candidates to process. */
1374 candidates = sbitmap_alloc (length);
1375 bitmap_clear (candidates);
1376 nr_candidates = 0;
1377 FOR_EACH_VEC_ELT (*ops, i, oe1)
1379 enum tree_code dcode;
1380 gimple *oe1def;
1382 if (TREE_CODE (oe1->op) != SSA_NAME)
1383 continue;
1384 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1385 if (!is_gimple_assign (oe1def))
1386 continue;
1387 dcode = gimple_assign_rhs_code (oe1def);
1388 if ((dcode != MULT_EXPR
1389 && dcode != RDIV_EXPR)
1390 || !is_reassociable_op (oe1def, dcode, loop))
1391 continue;
1393 bitmap_set_bit (candidates, i);
1394 nr_candidates++;
1397 if (nr_candidates < 2)
1399 sbitmap_free (candidates);
1400 return false;
1403 if (dump_file && (dump_flags & TDF_DETAILS))
1405 fprintf (dump_file, "searching for un-distribute opportunities ");
1406 print_generic_expr (dump_file,
1407 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1408 fprintf (dump_file, " %d\n", nr_candidates);
1411 /* Build linearized sub-operand lists and the counting table. */
1412 cvec.create (0);
1414 hash_table<oecount_hasher> ctable (15);
1416 /* ??? Macro arguments cannot have multi-argument template types in
1417 them. This typedef is needed to workaround that limitation. */
1418 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1419 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1420 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1422 gimple *oedef;
1423 enum tree_code oecode;
1424 unsigned j;
1426 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1427 oecode = gimple_assign_rhs_code (oedef);
1428 linearize_expr_tree (&subops[i], oedef,
1429 associative_tree_code (oecode), false);
1431 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1433 oecount c;
1434 int *slot;
1435 int idx;
1436 c.oecode = oecode;
1437 c.cnt = 1;
1438 c.id = next_oecount_id++;
1439 c.op = oe1->op;
1440 cvec.safe_push (c);
1441 idx = cvec.length () + 41;
1442 slot = ctable.find_slot (idx, INSERT);
1443 if (!*slot)
1445 *slot = idx;
1447 else
1449 cvec.pop ();
1450 cvec[*slot - 42].cnt++;
1455 /* Sort the counting table. */
1456 cvec.qsort (oecount_cmp);
1458 if (dump_file && (dump_flags & TDF_DETAILS))
1460 oecount *c;
1461 fprintf (dump_file, "Candidates:\n");
1462 FOR_EACH_VEC_ELT (cvec, j, c)
1464 fprintf (dump_file, " %u %s: ", c->cnt,
1465 c->oecode == MULT_EXPR
1466 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1467 print_generic_expr (dump_file, c->op, 0);
1468 fprintf (dump_file, "\n");
1472 /* Process the (operand, code) pairs in order of most occurrence. */
1473 candidates2 = sbitmap_alloc (length);
1474 while (!cvec.is_empty ())
1476 oecount *c = &cvec.last ();
1477 if (c->cnt < 2)
1478 break;
1480 /* Now collect the operands in the outer chain that contain
1481 the common operand in their inner chain. */
1482 bitmap_clear (candidates2);
1483 nr_candidates2 = 0;
1484 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1486 gimple *oedef;
1487 enum tree_code oecode;
1488 unsigned j;
1489 tree op = (*ops)[i]->op;
1491 /* If we undistributed in this chain already this may be
1492 a constant. */
1493 if (TREE_CODE (op) != SSA_NAME)
1494 continue;
1496 oedef = SSA_NAME_DEF_STMT (op);
1497 oecode = gimple_assign_rhs_code (oedef);
1498 if (oecode != c->oecode)
1499 continue;
1501 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1503 if (oe1->op == c->op)
1505 bitmap_set_bit (candidates2, i);
1506 ++nr_candidates2;
1507 break;
1512 if (nr_candidates2 >= 2)
1514 operand_entry *oe1, *oe2;
1515 gimple *prod;
1516 int first = bitmap_first_set_bit (candidates2);
1518 /* Build the new addition chain. */
1519 oe1 = (*ops)[first];
1520 if (dump_file && (dump_flags & TDF_DETAILS))
1522 fprintf (dump_file, "Building (");
1523 print_generic_expr (dump_file, oe1->op, 0);
1525 zero_one_operation (&oe1->op, c->oecode, c->op);
1526 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1528 gimple *sum;
1529 oe2 = (*ops)[i];
1530 if (dump_file && (dump_flags & TDF_DETAILS))
1532 fprintf (dump_file, " + ");
1533 print_generic_expr (dump_file, oe2->op, 0);
1535 zero_one_operation (&oe2->op, c->oecode, c->op);
1536 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1537 oe1->op, oe2->op, opcode);
1538 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1539 oe2->rank = 0;
1540 oe1->op = gimple_get_lhs (sum);
1543 /* Apply the multiplication/division. */
1544 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1545 oe1->op, c->op, c->oecode);
1546 if (dump_file && (dump_flags & TDF_DETAILS))
1548 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1549 print_generic_expr (dump_file, c->op, 0);
1550 fprintf (dump_file, "\n");
1553 /* Record it in the addition chain and disable further
1554 undistribution with this op. */
1555 oe1->op = gimple_assign_lhs (prod);
1556 oe1->rank = get_rank (oe1->op);
1557 subops[first].release ();
1559 changed = true;
1562 cvec.pop ();
1565 for (i = 0; i < ops->length (); ++i)
1566 subops[i].release ();
1567 free (subops);
1568 cvec.release ();
1569 sbitmap_free (candidates);
1570 sbitmap_free (candidates2);
1572 return changed;
1575 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1576 expression, examine the other OPS to see if any of them are comparisons
1577 of the same values, which we may be able to combine or eliminate.
1578 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1580 static bool
1581 eliminate_redundant_comparison (enum tree_code opcode,
1582 vec<operand_entry *> *ops,
1583 unsigned int currindex,
1584 operand_entry *curr)
1586 tree op1, op2;
1587 enum tree_code lcode, rcode;
1588 gimple *def1, *def2;
1589 int i;
1590 operand_entry *oe;
1592 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1593 return false;
1595 /* Check that CURR is a comparison. */
1596 if (TREE_CODE (curr->op) != SSA_NAME)
1597 return false;
1598 def1 = SSA_NAME_DEF_STMT (curr->op);
1599 if (!is_gimple_assign (def1))
1600 return false;
1601 lcode = gimple_assign_rhs_code (def1);
1602 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1603 return false;
1604 op1 = gimple_assign_rhs1 (def1);
1605 op2 = gimple_assign_rhs2 (def1);
1607 /* Now look for a similar comparison in the remaining OPS. */
1608 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1610 tree t;
1612 if (TREE_CODE (oe->op) != SSA_NAME)
1613 continue;
1614 def2 = SSA_NAME_DEF_STMT (oe->op);
1615 if (!is_gimple_assign (def2))
1616 continue;
1617 rcode = gimple_assign_rhs_code (def2);
1618 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1619 continue;
1621 /* If we got here, we have a match. See if we can combine the
1622 two comparisons. */
1623 if (opcode == BIT_IOR_EXPR)
1624 t = maybe_fold_or_comparisons (lcode, op1, op2,
1625 rcode, gimple_assign_rhs1 (def2),
1626 gimple_assign_rhs2 (def2));
1627 else
1628 t = maybe_fold_and_comparisons (lcode, op1, op2,
1629 rcode, gimple_assign_rhs1 (def2),
1630 gimple_assign_rhs2 (def2));
1631 if (!t)
1632 continue;
1634 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1635 always give us a boolean_type_node value back. If the original
1636 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1637 we need to convert. */
1638 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1639 t = fold_convert (TREE_TYPE (curr->op), t);
1641 if (TREE_CODE (t) != INTEGER_CST
1642 && !operand_equal_p (t, curr->op, 0))
1644 enum tree_code subcode;
1645 tree newop1, newop2;
1646 if (!COMPARISON_CLASS_P (t))
1647 continue;
1648 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1649 STRIP_USELESS_TYPE_CONVERSION (newop1);
1650 STRIP_USELESS_TYPE_CONVERSION (newop2);
1651 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1652 continue;
1655 if (dump_file && (dump_flags & TDF_DETAILS))
1657 fprintf (dump_file, "Equivalence: ");
1658 print_generic_expr (dump_file, curr->op, 0);
1659 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1660 print_generic_expr (dump_file, oe->op, 0);
1661 fprintf (dump_file, " -> ");
1662 print_generic_expr (dump_file, t, 0);
1663 fprintf (dump_file, "\n");
1666 /* Now we can delete oe, as it has been subsumed by the new combined
1667 expression t. */
1668 ops->ordered_remove (i);
1669 reassociate_stats.ops_eliminated ++;
1671 /* If t is the same as curr->op, we're done. Otherwise we must
1672 replace curr->op with t. Special case is if we got a constant
1673 back, in which case we add it to the end instead of in place of
1674 the current entry. */
1675 if (TREE_CODE (t) == INTEGER_CST)
1677 ops->ordered_remove (currindex);
1678 add_to_ops_vec (ops, t);
1680 else if (!operand_equal_p (t, curr->op, 0))
1682 gimple *sum;
1683 enum tree_code subcode;
1684 tree newop1;
1685 tree newop2;
1686 gcc_assert (COMPARISON_CLASS_P (t));
1687 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1688 STRIP_USELESS_TYPE_CONVERSION (newop1);
1689 STRIP_USELESS_TYPE_CONVERSION (newop2);
1690 gcc_checking_assert (is_gimple_val (newop1)
1691 && is_gimple_val (newop2));
1692 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1693 curr->op = gimple_get_lhs (sum);
1695 return true;
1698 return false;
1701 /* Perform various identities and other optimizations on the list of
1702 operand entries, stored in OPS. The tree code for the binary
1703 operation between all the operands is OPCODE. */
1705 static void
1706 optimize_ops_list (enum tree_code opcode,
1707 vec<operand_entry *> *ops)
1709 unsigned int length = ops->length ();
1710 unsigned int i;
1711 operand_entry *oe;
1712 operand_entry *oelast = NULL;
1713 bool iterate = false;
1715 if (length == 1)
1716 return;
1718 oelast = ops->last ();
1720 /* If the last two are constants, pop the constants off, merge them
1721 and try the next two. */
1722 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1724 operand_entry *oelm1 = (*ops)[length - 2];
1726 if (oelm1->rank == 0
1727 && is_gimple_min_invariant (oelm1->op)
1728 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1729 TREE_TYPE (oelast->op)))
1731 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1732 oelm1->op, oelast->op);
1734 if (folded && is_gimple_min_invariant (folded))
1736 if (dump_file && (dump_flags & TDF_DETAILS))
1737 fprintf (dump_file, "Merging constants\n");
1739 ops->pop ();
1740 ops->pop ();
1742 add_to_ops_vec (ops, folded);
1743 reassociate_stats.constants_eliminated++;
1745 optimize_ops_list (opcode, ops);
1746 return;
1751 eliminate_using_constants (opcode, ops);
1752 oelast = NULL;
1754 for (i = 0; ops->iterate (i, &oe);)
1756 bool done = false;
1758 if (eliminate_not_pairs (opcode, ops, i, oe))
1759 return;
1760 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1761 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1762 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1764 if (done)
1765 return;
1766 iterate = true;
1767 oelast = NULL;
1768 continue;
1770 oelast = oe;
1771 i++;
1774 length = ops->length ();
1775 oelast = ops->last ();
1777 if (iterate)
1778 optimize_ops_list (opcode, ops);
1781 /* The following functions are subroutines to optimize_range_tests and allow
1782 it to try to change a logical combination of comparisons into a range
1783 test.
1785 For example, both
1786 X == 2 || X == 5 || X == 3 || X == 4
1788 X >= 2 && X <= 5
1789 are converted to
1790 (unsigned) (X - 2) <= 3
1792 For more information see comments above fold_test_range in fold-const.c,
1793 this implementation is for GIMPLE. */
1795 struct range_entry
1797 tree exp;
1798 tree low;
1799 tree high;
1800 bool in_p;
1801 bool strict_overflow_p;
1802 unsigned int idx, next;
1805 /* This is similar to make_range in fold-const.c, but on top of
1806 GIMPLE instead of trees. If EXP is non-NULL, it should be
1807 an SSA_NAME and STMT argument is ignored, otherwise STMT
1808 argument should be a GIMPLE_COND. */
1810 static void
1811 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
1813 int in_p;
1814 tree low, high;
1815 bool is_bool, strict_overflow_p;
1817 r->exp = NULL_TREE;
1818 r->in_p = false;
1819 r->strict_overflow_p = false;
1820 r->low = NULL_TREE;
1821 r->high = NULL_TREE;
1822 if (exp != NULL_TREE
1823 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1824 return;
1826 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1827 and see if we can refine the range. Some of the cases below may not
1828 happen, but it doesn't seem worth worrying about this. We "continue"
1829 the outer loop when we've changed something; otherwise we "break"
1830 the switch, which will "break" the while. */
1831 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1832 high = low;
1833 in_p = 0;
1834 strict_overflow_p = false;
1835 is_bool = false;
1836 if (exp == NULL_TREE)
1837 is_bool = true;
1838 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1840 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1841 is_bool = true;
1842 else
1843 return;
1845 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1846 is_bool = true;
1848 while (1)
1850 enum tree_code code;
1851 tree arg0, arg1, exp_type;
1852 tree nexp;
1853 location_t loc;
1855 if (exp != NULL_TREE)
1857 if (TREE_CODE (exp) != SSA_NAME
1858 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1859 break;
1861 stmt = SSA_NAME_DEF_STMT (exp);
1862 if (!is_gimple_assign (stmt))
1863 break;
1865 code = gimple_assign_rhs_code (stmt);
1866 arg0 = gimple_assign_rhs1 (stmt);
1867 arg1 = gimple_assign_rhs2 (stmt);
1868 exp_type = TREE_TYPE (exp);
1870 else
1872 code = gimple_cond_code (stmt);
1873 arg0 = gimple_cond_lhs (stmt);
1874 arg1 = gimple_cond_rhs (stmt);
1875 exp_type = boolean_type_node;
1878 if (TREE_CODE (arg0) != SSA_NAME)
1879 break;
1880 loc = gimple_location (stmt);
1881 switch (code)
1883 case BIT_NOT_EXPR:
1884 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1885 /* Ensure the range is either +[-,0], +[0,0],
1886 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1887 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1888 or similar expression of unconditional true or
1889 false, it should not be negated. */
1890 && ((high && integer_zerop (high))
1891 || (low && integer_onep (low))))
1893 in_p = !in_p;
1894 exp = arg0;
1895 continue;
1897 break;
1898 case SSA_NAME:
1899 exp = arg0;
1900 continue;
1901 CASE_CONVERT:
1902 if (is_bool)
1903 goto do_default;
1904 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1906 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1907 is_bool = true;
1908 else
1909 return;
1911 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1912 is_bool = true;
1913 goto do_default;
1914 case EQ_EXPR:
1915 case NE_EXPR:
1916 case LT_EXPR:
1917 case LE_EXPR:
1918 case GE_EXPR:
1919 case GT_EXPR:
1920 is_bool = true;
1921 /* FALLTHRU */
1922 default:
1923 if (!is_bool)
1924 return;
1925 do_default:
1926 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1927 &low, &high, &in_p,
1928 &strict_overflow_p);
1929 if (nexp != NULL_TREE)
1931 exp = nexp;
1932 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1933 continue;
1935 break;
1937 break;
1939 if (is_bool)
1941 r->exp = exp;
1942 r->in_p = in_p;
1943 r->low = low;
1944 r->high = high;
1945 r->strict_overflow_p = strict_overflow_p;
1949 /* Comparison function for qsort. Sort entries
1950 without SSA_NAME exp first, then with SSA_NAMEs sorted
1951 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1952 by increasing ->low and if ->low is the same, by increasing
1953 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1954 maximum. */
1956 static int
1957 range_entry_cmp (const void *a, const void *b)
1959 const struct range_entry *p = (const struct range_entry *) a;
1960 const struct range_entry *q = (const struct range_entry *) b;
1962 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1964 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1966 /* Group range_entries for the same SSA_NAME together. */
1967 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1968 return -1;
1969 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1970 return 1;
1971 /* If ->low is different, NULL low goes first, then by
1972 ascending low. */
1973 if (p->low != NULL_TREE)
1975 if (q->low != NULL_TREE)
1977 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1978 p->low, q->low);
1979 if (tem && integer_onep (tem))
1980 return -1;
1981 tem = fold_binary (GT_EXPR, boolean_type_node,
1982 p->low, q->low);
1983 if (tem && integer_onep (tem))
1984 return 1;
1986 else
1987 return 1;
1989 else if (q->low != NULL_TREE)
1990 return -1;
1991 /* If ->high is different, NULL high goes last, before that by
1992 ascending high. */
1993 if (p->high != NULL_TREE)
1995 if (q->high != NULL_TREE)
1997 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1998 p->high, q->high);
1999 if (tem && integer_onep (tem))
2000 return -1;
2001 tem = fold_binary (GT_EXPR, boolean_type_node,
2002 p->high, q->high);
2003 if (tem && integer_onep (tem))
2004 return 1;
2006 else
2007 return -1;
2009 else if (q->high != NULL_TREE)
2010 return 1;
2011 /* If both ranges are the same, sort below by ascending idx. */
2013 else
2014 return 1;
2016 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2017 return -1;
2019 if (p->idx < q->idx)
2020 return -1;
2021 else
2023 gcc_checking_assert (p->idx > q->idx);
2024 return 1;
2028 /* Helper routine of optimize_range_test.
2029 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2030 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2031 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2032 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2033 an array of COUNT pointers to other ranges. Return
2034 true if the range merge has been successful.
2035 If OPCODE is ERROR_MARK, this is called from within
2036 maybe_optimize_range_tests and is performing inter-bb range optimization.
2037 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2038 oe->rank. */
2040 static bool
2041 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2042 struct range_entry **otherrangep,
2043 unsigned int count, enum tree_code opcode,
2044 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2045 bool in_p, tree low, tree high, bool strict_overflow_p)
2047 operand_entry *oe = (*ops)[range->idx];
2048 tree op = oe->op;
2049 gimple *stmt = op ? SSA_NAME_DEF_STMT (op) :
2050 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2051 location_t loc = gimple_location (stmt);
2052 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2053 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2054 in_p, low, high);
2055 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2056 gimple_stmt_iterator gsi;
2057 unsigned int i;
2059 if (tem == NULL_TREE)
2060 return false;
2062 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2063 warning_at (loc, OPT_Wstrict_overflow,
2064 "assuming signed overflow does not occur "
2065 "when simplifying range test");
2067 if (dump_file && (dump_flags & TDF_DETAILS))
2069 struct range_entry *r;
2070 fprintf (dump_file, "Optimizing range tests ");
2071 print_generic_expr (dump_file, range->exp, 0);
2072 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2073 print_generic_expr (dump_file, range->low, 0);
2074 fprintf (dump_file, ", ");
2075 print_generic_expr (dump_file, range->high, 0);
2076 fprintf (dump_file, "]");
2077 for (i = 0; i < count; i++)
2079 if (otherrange)
2080 r = otherrange + i;
2081 else
2082 r = otherrangep[i];
2083 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2084 print_generic_expr (dump_file, r->low, 0);
2085 fprintf (dump_file, ", ");
2086 print_generic_expr (dump_file, r->high, 0);
2087 fprintf (dump_file, "]");
2089 fprintf (dump_file, "\n into ");
2090 print_generic_expr (dump_file, tem, 0);
2091 fprintf (dump_file, "\n");
2094 if (opcode == BIT_IOR_EXPR
2095 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2096 tem = invert_truthvalue_loc (loc, tem);
2098 tem = fold_convert_loc (loc, optype, tem);
2099 gsi = gsi_for_stmt (stmt);
2100 unsigned int uid = gimple_uid (stmt);
2101 /* In rare cases range->exp can be equal to lhs of stmt.
2102 In that case we have to insert after the stmt rather then before
2103 it. If stmt is a PHI, insert it at the start of the basic block. */
2104 if (op != range->exp)
2106 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2107 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2108 GSI_SAME_STMT);
2109 gsi_prev (&gsi);
2111 else if (gimple_code (stmt) != GIMPLE_PHI)
2113 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2114 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2115 GSI_CONTINUE_LINKING);
2117 else
2119 gsi = gsi_after_labels (gimple_bb (stmt));
2120 if (!gsi_end_p (gsi))
2121 uid = gimple_uid (gsi_stmt (gsi));
2122 else
2124 gsi = gsi_start_bb (gimple_bb (stmt));
2125 uid = 1;
2126 while (!gsi_end_p (gsi))
2128 uid = gimple_uid (gsi_stmt (gsi));
2129 gsi_next (&gsi);
2132 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2133 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2134 GSI_SAME_STMT);
2135 if (gsi_end_p (gsi))
2136 gsi = gsi_last_bb (gimple_bb (stmt));
2137 else
2138 gsi_prev (&gsi);
2140 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2141 if (gimple_uid (gsi_stmt (gsi)))
2142 break;
2143 else
2144 gimple_set_uid (gsi_stmt (gsi), uid);
2146 oe->op = tem;
2147 range->exp = exp;
2148 range->low = low;
2149 range->high = high;
2150 range->in_p = in_p;
2151 range->strict_overflow_p = false;
2153 for (i = 0; i < count; i++)
2155 if (otherrange)
2156 range = otherrange + i;
2157 else
2158 range = otherrangep[i];
2159 oe = (*ops)[range->idx];
2160 /* Now change all the other range test immediate uses, so that
2161 those tests will be optimized away. */
2162 if (opcode == ERROR_MARK)
2164 if (oe->op)
2165 oe->op = build_int_cst (TREE_TYPE (oe->op),
2166 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2167 else
2168 oe->op = (oe->rank == BIT_IOR_EXPR
2169 ? boolean_false_node : boolean_true_node);
2171 else
2172 oe->op = error_mark_node;
2173 range->exp = NULL_TREE;
2175 return true;
2178 /* Optimize X == CST1 || X == CST2
2179 if popcount (CST1 ^ CST2) == 1 into
2180 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2181 Similarly for ranges. E.g.
2182 X != 2 && X != 3 && X != 10 && X != 11
2183 will be transformed by the previous optimization into
2184 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2185 and this loop can transform that into
2186 !(((X & ~8) - 2U) <= 1U). */
2188 static bool
2189 optimize_range_tests_xor (enum tree_code opcode, tree type,
2190 tree lowi, tree lowj, tree highi, tree highj,
2191 vec<operand_entry *> *ops,
2192 struct range_entry *rangei,
2193 struct range_entry *rangej)
2195 tree lowxor, highxor, tem, exp;
2196 /* Check lowi ^ lowj == highi ^ highj and
2197 popcount (lowi ^ lowj) == 1. */
2198 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2199 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2200 return false;
2201 if (!integer_pow2p (lowxor))
2202 return false;
2203 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2204 if (!tree_int_cst_equal (lowxor, highxor))
2205 return false;
2207 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2208 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2209 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2210 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2211 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2212 NULL, rangei->in_p, lowj, highj,
2213 rangei->strict_overflow_p
2214 || rangej->strict_overflow_p))
2215 return true;
2216 return false;
2219 /* Optimize X == CST1 || X == CST2
2220 if popcount (CST2 - CST1) == 1 into
2221 ((X - CST1) & ~(CST2 - CST1)) == 0.
2222 Similarly for ranges. E.g.
2223 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2224 || X == 75 || X == 45
2225 will be transformed by the previous optimization into
2226 (X - 43U) <= 3U || (X - 75U) <= 3U
2227 and this loop can transform that into
2228 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2229 static bool
2230 optimize_range_tests_diff (enum tree_code opcode, tree type,
2231 tree lowi, tree lowj, tree highi, tree highj,
2232 vec<operand_entry *> *ops,
2233 struct range_entry *rangei,
2234 struct range_entry *rangej)
2236 tree tem1, tem2, mask;
2237 /* Check highi - lowi == highj - lowj. */
2238 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2239 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2240 return false;
2241 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2242 if (!tree_int_cst_equal (tem1, tem2))
2243 return false;
2244 /* Check popcount (lowj - lowi) == 1. */
2245 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2246 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2247 return false;
2248 if (!integer_pow2p (tem1))
2249 return false;
2251 type = unsigned_type_for (type);
2252 tem1 = fold_convert (type, tem1);
2253 tem2 = fold_convert (type, tem2);
2254 lowi = fold_convert (type, lowi);
2255 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2256 tem1 = fold_binary (MINUS_EXPR, type,
2257 fold_convert (type, rangei->exp), lowi);
2258 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2259 lowj = build_int_cst (type, 0);
2260 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2261 NULL, rangei->in_p, lowj, tem2,
2262 rangei->strict_overflow_p
2263 || rangej->strict_overflow_p))
2264 return true;
2265 return false;
2268 /* It does some common checks for function optimize_range_tests_xor and
2269 optimize_range_tests_diff.
2270 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2271 Else it calls optimize_range_tests_diff. */
2273 static bool
2274 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2275 bool optimize_xor, vec<operand_entry *> *ops,
2276 struct range_entry *ranges)
2278 int i, j;
2279 bool any_changes = false;
2280 for (i = first; i < length; i++)
2282 tree lowi, highi, lowj, highj, type, tem;
2284 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2285 continue;
2286 type = TREE_TYPE (ranges[i].exp);
2287 if (!INTEGRAL_TYPE_P (type))
2288 continue;
2289 lowi = ranges[i].low;
2290 if (lowi == NULL_TREE)
2291 lowi = TYPE_MIN_VALUE (type);
2292 highi = ranges[i].high;
2293 if (highi == NULL_TREE)
2294 continue;
2295 for (j = i + 1; j < length && j < i + 64; j++)
2297 bool changes;
2298 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2299 continue;
2300 lowj = ranges[j].low;
2301 if (lowj == NULL_TREE)
2302 continue;
2303 highj = ranges[j].high;
2304 if (highj == NULL_TREE)
2305 highj = TYPE_MAX_VALUE (type);
2306 /* Check lowj > highi. */
2307 tem = fold_binary (GT_EXPR, boolean_type_node,
2308 lowj, highi);
2309 if (tem == NULL_TREE || !integer_onep (tem))
2310 continue;
2311 if (optimize_xor)
2312 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2313 highi, highj, ops,
2314 ranges + i, ranges + j);
2315 else
2316 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2317 highi, highj, ops,
2318 ranges + i, ranges + j);
2319 if (changes)
2321 any_changes = true;
2322 break;
2326 return any_changes;
2329 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2330 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2331 EXP on success, NULL otherwise. */
2333 static tree
2334 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2335 wide_int *mask, tree *totallowp)
2337 tree tem = int_const_binop (MINUS_EXPR, high, low);
2338 if (tem == NULL_TREE
2339 || TREE_CODE (tem) != INTEGER_CST
2340 || TREE_OVERFLOW (tem)
2341 || tree_int_cst_sgn (tem) == -1
2342 || compare_tree_int (tem, prec) != -1)
2343 return NULL_TREE;
2345 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2346 *mask = wi::shifted_mask (0, max, false, prec);
2347 if (TREE_CODE (exp) == BIT_AND_EXPR
2348 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2350 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2351 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2352 if (wi::popcount (msk) == 1
2353 && wi::ltu_p (msk, prec - max))
2355 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2356 max += msk.to_uhwi ();
2357 exp = TREE_OPERAND (exp, 0);
2358 if (integer_zerop (low)
2359 && TREE_CODE (exp) == PLUS_EXPR
2360 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2362 tree ret = TREE_OPERAND (exp, 0);
2363 STRIP_NOPS (ret);
2364 widest_int bias
2365 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2366 TYPE_PRECISION (TREE_TYPE (low))));
2367 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2368 if (totallowp)
2370 *totallowp = tbias;
2371 return ret;
2373 else if (!tree_int_cst_lt (totallow, tbias))
2374 return NULL_TREE;
2375 bias = wi::to_widest (tbias);
2376 bias -= wi::to_widest (totallow);
2377 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2379 *mask = wi::lshift (*mask, bias);
2380 return ret;
2385 if (totallowp)
2386 return exp;
2387 if (!tree_int_cst_lt (totallow, low))
2388 return exp;
2389 tem = int_const_binop (MINUS_EXPR, low, totallow);
2390 if (tem == NULL_TREE
2391 || TREE_CODE (tem) != INTEGER_CST
2392 || TREE_OVERFLOW (tem)
2393 || compare_tree_int (tem, prec - max) == 1)
2394 return NULL_TREE;
2396 *mask = wi::lshift (*mask, wi::to_widest (tem));
2397 return exp;
2400 /* Attempt to optimize small range tests using bit test.
2401 E.g.
2402 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2403 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2404 has been by earlier optimizations optimized into:
2405 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2406 As all the 43 through 82 range is less than 64 numbers,
2407 for 64-bit word targets optimize that into:
2408 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2410 static bool
2411 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2412 vec<operand_entry *> *ops,
2413 struct range_entry *ranges)
2415 int i, j;
2416 bool any_changes = false;
2417 int prec = GET_MODE_BITSIZE (word_mode);
2418 auto_vec<struct range_entry *, 64> candidates;
2420 for (i = first; i < length - 2; i++)
2422 tree lowi, highi, lowj, highj, type;
2424 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2425 continue;
2426 type = TREE_TYPE (ranges[i].exp);
2427 if (!INTEGRAL_TYPE_P (type))
2428 continue;
2429 lowi = ranges[i].low;
2430 if (lowi == NULL_TREE)
2431 lowi = TYPE_MIN_VALUE (type);
2432 highi = ranges[i].high;
2433 if (highi == NULL_TREE)
2434 continue;
2435 wide_int mask;
2436 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2437 highi, &mask, &lowi);
2438 if (exp == NULL_TREE)
2439 continue;
2440 bool strict_overflow_p = ranges[i].strict_overflow_p;
2441 candidates.truncate (0);
2442 int end = MIN (i + 64, length);
2443 for (j = i + 1; j < end; j++)
2445 tree exp2;
2446 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2447 continue;
2448 if (ranges[j].exp == exp)
2450 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2452 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2453 if (exp2 == exp)
2455 else if (TREE_CODE (exp2) == PLUS_EXPR)
2457 exp2 = TREE_OPERAND (exp2, 0);
2458 STRIP_NOPS (exp2);
2459 if (exp2 != exp)
2460 continue;
2462 else
2463 continue;
2465 else
2466 continue;
2467 lowj = ranges[j].low;
2468 if (lowj == NULL_TREE)
2469 continue;
2470 highj = ranges[j].high;
2471 if (highj == NULL_TREE)
2472 highj = TYPE_MAX_VALUE (type);
2473 wide_int mask2;
2474 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2475 highj, &mask2, NULL);
2476 if (exp2 != exp)
2477 continue;
2478 mask |= mask2;
2479 strict_overflow_p |= ranges[j].strict_overflow_p;
2480 candidates.safe_push (&ranges[j]);
2483 /* If we need otherwise 3 or more comparisons, use a bit test. */
2484 if (candidates.length () >= 2)
2486 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2487 wi::to_widest (lowi)
2488 + prec - 1 - wi::clz (mask));
2489 operand_entry *oe = (*ops)[ranges[i].idx];
2490 tree op = oe->op;
2491 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2492 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2493 location_t loc = gimple_location (stmt);
2494 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2496 /* See if it isn't cheaper to pretend the minimum value of the
2497 range is 0, if maximum value is small enough.
2498 We can avoid then subtraction of the minimum value, but the
2499 mask constant could be perhaps more expensive. */
2500 if (compare_tree_int (lowi, 0) > 0
2501 && compare_tree_int (high, prec) < 0)
2503 int cost_diff;
2504 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2505 rtx reg = gen_raw_REG (word_mode, 10000);
2506 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2507 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2508 GEN_INT (-m)), speed_p);
2509 rtx r = immed_wide_int_const (mask, word_mode);
2510 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2511 word_mode, speed_p);
2512 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2513 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2514 word_mode, speed_p);
2515 if (cost_diff > 0)
2517 mask = wi::lshift (mask, m);
2518 lowi = build_zero_cst (TREE_TYPE (lowi));
2522 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2523 false, lowi, high);
2524 if (tem == NULL_TREE || is_gimple_val (tem))
2525 continue;
2526 tree etype = unsigned_type_for (TREE_TYPE (exp));
2527 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2528 fold_convert_loc (loc, etype, exp),
2529 fold_convert_loc (loc, etype, lowi));
2530 exp = fold_convert_loc (loc, integer_type_node, exp);
2531 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2532 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2533 build_int_cst (word_type, 1), exp);
2534 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2535 wide_int_to_tree (word_type, mask));
2536 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2537 build_zero_cst (word_type));
2538 if (is_gimple_val (exp))
2539 continue;
2541 /* The shift might have undefined behavior if TEM is true,
2542 but reassociate_bb isn't prepared to have basic blocks
2543 split when it is running. So, temporarily emit a code
2544 with BIT_IOR_EXPR instead of &&, and fix it up in
2545 branch_fixup. */
2546 gimple_seq seq;
2547 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2548 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2549 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2550 gimple_seq seq2;
2551 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2552 gimple_seq_add_seq_without_update (&seq, seq2);
2553 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2554 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2555 gimple *g = gimple_build_assign (make_ssa_name (optype),
2556 BIT_IOR_EXPR, tem, exp);
2557 gimple_set_location (g, loc);
2558 gimple_seq_add_stmt_without_update (&seq, g);
2559 exp = gimple_assign_lhs (g);
2560 tree val = build_zero_cst (optype);
2561 if (update_range_test (&ranges[i], NULL, candidates.address (),
2562 candidates.length (), opcode, ops, exp,
2563 seq, false, val, val, strict_overflow_p))
2565 any_changes = true;
2566 reassoc_branch_fixups.safe_push (tem);
2568 else
2569 gimple_seq_discard (seq);
2572 return any_changes;
2575 /* Optimize range tests, similarly how fold_range_test optimizes
2576 it on trees. The tree code for the binary
2577 operation between all the operands is OPCODE.
2578 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2579 maybe_optimize_range_tests for inter-bb range optimization.
2580 In that case if oe->op is NULL, oe->id is bb->index whose
2581 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2582 the actual opcode. */
2584 static bool
2585 optimize_range_tests (enum tree_code opcode,
2586 vec<operand_entry *> *ops)
2588 unsigned int length = ops->length (), i, j, first;
2589 operand_entry *oe;
2590 struct range_entry *ranges;
2591 bool any_changes = false;
2593 if (length == 1)
2594 return false;
2596 ranges = XNEWVEC (struct range_entry, length);
2597 for (i = 0; i < length; i++)
2599 oe = (*ops)[i];
2600 ranges[i].idx = i;
2601 init_range_entry (ranges + i, oe->op,
2602 oe->op ? NULL :
2603 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2604 /* For | invert it now, we will invert it again before emitting
2605 the optimized expression. */
2606 if (opcode == BIT_IOR_EXPR
2607 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2608 ranges[i].in_p = !ranges[i].in_p;
2611 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2612 for (i = 0; i < length; i++)
2613 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2614 break;
2616 /* Try to merge ranges. */
2617 for (first = i; i < length; i++)
2619 tree low = ranges[i].low;
2620 tree high = ranges[i].high;
2621 int in_p = ranges[i].in_p;
2622 bool strict_overflow_p = ranges[i].strict_overflow_p;
2623 int update_fail_count = 0;
2625 for (j = i + 1; j < length; j++)
2627 if (ranges[i].exp != ranges[j].exp)
2628 break;
2629 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2630 ranges[j].in_p, ranges[j].low, ranges[j].high))
2631 break;
2632 strict_overflow_p |= ranges[j].strict_overflow_p;
2635 if (j == i + 1)
2636 continue;
2638 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2639 opcode, ops, ranges[i].exp, NULL, in_p,
2640 low, high, strict_overflow_p))
2642 i = j - 1;
2643 any_changes = true;
2645 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2646 while update_range_test would fail. */
2647 else if (update_fail_count == 64)
2648 i = j - 1;
2649 else
2650 ++update_fail_count;
2653 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2654 ops, ranges);
2656 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2657 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2658 ops, ranges);
2659 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2660 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2661 ops, ranges);
2663 if (any_changes && opcode != ERROR_MARK)
2665 j = 0;
2666 FOR_EACH_VEC_ELT (*ops, i, oe)
2668 if (oe->op == error_mark_node)
2669 continue;
2670 else if (i != j)
2671 (*ops)[j] = oe;
2672 j++;
2674 ops->truncate (j);
2677 XDELETEVEC (ranges);
2678 return any_changes;
2681 /* Return true if STMT is a cast like:
2682 <bb N>:
2684 _123 = (int) _234;
2686 <bb M>:
2687 # _345 = PHI <_123(N), 1(...), 1(...)>
2688 where _234 has bool type, _123 has single use and
2689 bb N has a single successor M. This is commonly used in
2690 the last block of a range test. */
2692 static bool
2693 final_range_test_p (gimple *stmt)
2695 basic_block bb, rhs_bb;
2696 edge e;
2697 tree lhs, rhs;
2698 use_operand_p use_p;
2699 gimple *use_stmt;
2701 if (!gimple_assign_cast_p (stmt))
2702 return false;
2703 bb = gimple_bb (stmt);
2704 if (!single_succ_p (bb))
2705 return false;
2706 e = single_succ_edge (bb);
2707 if (e->flags & EDGE_COMPLEX)
2708 return false;
2710 lhs = gimple_assign_lhs (stmt);
2711 rhs = gimple_assign_rhs1 (stmt);
2712 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2713 || TREE_CODE (rhs) != SSA_NAME
2714 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2715 return false;
2717 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2718 if (!single_imm_use (lhs, &use_p, &use_stmt))
2719 return false;
2721 if (gimple_code (use_stmt) != GIMPLE_PHI
2722 || gimple_bb (use_stmt) != e->dest)
2723 return false;
2725 /* And that the rhs is defined in the same loop. */
2726 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2727 if (rhs_bb == NULL
2728 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2729 return false;
2731 return true;
2734 /* Return true if BB is suitable basic block for inter-bb range test
2735 optimization. If BACKWARD is true, BB should be the only predecessor
2736 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2737 or compared with to find a common basic block to which all conditions
2738 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2739 be the only predecessor of BB. */
2741 static bool
2742 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2743 bool backward)
2745 edge_iterator ei, ei2;
2746 edge e, e2;
2747 gimple *stmt;
2748 gphi_iterator gsi;
2749 bool other_edge_seen = false;
2750 bool is_cond;
2752 if (test_bb == bb)
2753 return false;
2754 /* Check last stmt first. */
2755 stmt = last_stmt (bb);
2756 if (stmt == NULL
2757 || (gimple_code (stmt) != GIMPLE_COND
2758 && (backward || !final_range_test_p (stmt)))
2759 || gimple_visited_p (stmt)
2760 || stmt_could_throw_p (stmt)
2761 || *other_bb == bb)
2762 return false;
2763 is_cond = gimple_code (stmt) == GIMPLE_COND;
2764 if (is_cond)
2766 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2767 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2768 to *OTHER_BB (if not set yet, try to find it out). */
2769 if (EDGE_COUNT (bb->succs) != 2)
2770 return false;
2771 FOR_EACH_EDGE (e, ei, bb->succs)
2773 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2774 return false;
2775 if (e->dest == test_bb)
2777 if (backward)
2778 continue;
2779 else
2780 return false;
2782 if (e->dest == bb)
2783 return false;
2784 if (*other_bb == NULL)
2786 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2787 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2788 return false;
2789 else if (e->dest == e2->dest)
2790 *other_bb = e->dest;
2791 if (*other_bb == NULL)
2792 return false;
2794 if (e->dest == *other_bb)
2795 other_edge_seen = true;
2796 else if (backward)
2797 return false;
2799 if (*other_bb == NULL || !other_edge_seen)
2800 return false;
2802 else if (single_succ (bb) != *other_bb)
2803 return false;
2805 /* Now check all PHIs of *OTHER_BB. */
2806 e = find_edge (bb, *other_bb);
2807 e2 = find_edge (test_bb, *other_bb);
2808 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2810 gphi *phi = gsi.phi ();
2811 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2812 corresponding to BB and TEST_BB predecessor must be the same. */
2813 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2814 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2816 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2817 one of the PHIs should have the lhs of the last stmt in
2818 that block as PHI arg and that PHI should have 0 or 1
2819 corresponding to it in all other range test basic blocks
2820 considered. */
2821 if (!is_cond)
2823 if (gimple_phi_arg_def (phi, e->dest_idx)
2824 == gimple_assign_lhs (stmt)
2825 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2826 || integer_onep (gimple_phi_arg_def (phi,
2827 e2->dest_idx))))
2828 continue;
2830 else
2832 gimple *test_last = last_stmt (test_bb);
2833 if (gimple_code (test_last) != GIMPLE_COND
2834 && gimple_phi_arg_def (phi, e2->dest_idx)
2835 == gimple_assign_lhs (test_last)
2836 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2837 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2838 continue;
2841 return false;
2844 return true;
2847 /* Return true if BB doesn't have side-effects that would disallow
2848 range test optimization, all SSA_NAMEs set in the bb are consumed
2849 in the bb and there are no PHIs. */
2851 static bool
2852 no_side_effect_bb (basic_block bb)
2854 gimple_stmt_iterator gsi;
2855 gimple *last;
2857 if (!gimple_seq_empty_p (phi_nodes (bb)))
2858 return false;
2859 last = last_stmt (bb);
2860 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2862 gimple *stmt = gsi_stmt (gsi);
2863 tree lhs;
2864 imm_use_iterator imm_iter;
2865 use_operand_p use_p;
2867 if (is_gimple_debug (stmt))
2868 continue;
2869 if (gimple_has_side_effects (stmt))
2870 return false;
2871 if (stmt == last)
2872 return true;
2873 if (!is_gimple_assign (stmt))
2874 return false;
2875 lhs = gimple_assign_lhs (stmt);
2876 if (TREE_CODE (lhs) != SSA_NAME)
2877 return false;
2878 if (gimple_assign_rhs_could_trap_p (stmt))
2879 return false;
2880 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2882 gimple *use_stmt = USE_STMT (use_p);
2883 if (is_gimple_debug (use_stmt))
2884 continue;
2885 if (gimple_bb (use_stmt) != bb)
2886 return false;
2889 return false;
2892 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2893 return true and fill in *OPS recursively. */
2895 static bool
2896 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
2897 struct loop *loop)
2899 gimple *stmt = SSA_NAME_DEF_STMT (var);
2900 tree rhs[2];
2901 int i;
2903 if (!is_reassociable_op (stmt, code, loop))
2904 return false;
2906 rhs[0] = gimple_assign_rhs1 (stmt);
2907 rhs[1] = gimple_assign_rhs2 (stmt);
2908 gimple_set_visited (stmt, true);
2909 for (i = 0; i < 2; i++)
2910 if (TREE_CODE (rhs[i]) == SSA_NAME
2911 && !get_ops (rhs[i], code, ops, loop)
2912 && has_single_use (rhs[i]))
2914 operand_entry *oe = operand_entry_pool.allocate ();
2916 oe->op = rhs[i];
2917 oe->rank = code;
2918 oe->id = 0;
2919 oe->count = 1;
2920 ops->safe_push (oe);
2922 return true;
2925 /* Find the ops that were added by get_ops starting from VAR, see if
2926 they were changed during update_range_test and if yes, create new
2927 stmts. */
2929 static tree
2930 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
2931 unsigned int *pidx, struct loop *loop)
2933 gimple *stmt = SSA_NAME_DEF_STMT (var);
2934 tree rhs[4];
2935 int i;
2937 if (!is_reassociable_op (stmt, code, loop))
2938 return NULL;
2940 rhs[0] = gimple_assign_rhs1 (stmt);
2941 rhs[1] = gimple_assign_rhs2 (stmt);
2942 rhs[2] = rhs[0];
2943 rhs[3] = rhs[1];
2944 for (i = 0; i < 2; i++)
2945 if (TREE_CODE (rhs[i]) == SSA_NAME)
2947 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2948 if (rhs[2 + i] == NULL_TREE)
2950 if (has_single_use (rhs[i]))
2951 rhs[2 + i] = ops[(*pidx)++]->op;
2952 else
2953 rhs[2 + i] = rhs[i];
2956 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2957 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2959 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2960 var = make_ssa_name (TREE_TYPE (var));
2961 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
2962 rhs[2], rhs[3]);
2963 gimple_set_uid (g, gimple_uid (stmt));
2964 gimple_set_visited (g, true);
2965 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2967 return var;
2970 /* Structure to track the initial value passed to get_ops and
2971 the range in the ops vector for each basic block. */
2973 struct inter_bb_range_test_entry
2975 tree op;
2976 unsigned int first_idx, last_idx;
2979 /* Inter-bb range test optimization.
2981 Returns TRUE if a gimple conditional is optimized to a true/false,
2982 otherwise return FALSE.
2984 This indicates to the caller that it should run a CFG cleanup pass
2985 once reassociation is completed. */
2987 static bool
2988 maybe_optimize_range_tests (gimple *stmt)
2990 basic_block first_bb = gimple_bb (stmt);
2991 basic_block last_bb = first_bb;
2992 basic_block other_bb = NULL;
2993 basic_block bb;
2994 edge_iterator ei;
2995 edge e;
2996 auto_vec<operand_entry *> ops;
2997 auto_vec<inter_bb_range_test_entry> bbinfo;
2998 bool any_changes = false;
2999 bool cfg_cleanup_needed = false;
3001 /* Consider only basic blocks that end with GIMPLE_COND or
3002 a cast statement satisfying final_range_test_p. All
3003 but the last bb in the first_bb .. last_bb range
3004 should end with GIMPLE_COND. */
3005 if (gimple_code (stmt) == GIMPLE_COND)
3007 if (EDGE_COUNT (first_bb->succs) != 2)
3008 return cfg_cleanup_needed;
3010 else if (final_range_test_p (stmt))
3011 other_bb = single_succ (first_bb);
3012 else
3013 return cfg_cleanup_needed;
3015 if (stmt_could_throw_p (stmt))
3016 return cfg_cleanup_needed;
3018 /* As relative ordering of post-dominator sons isn't fixed,
3019 maybe_optimize_range_tests can be called first on any
3020 bb in the range we want to optimize. So, start searching
3021 backwards, if first_bb can be set to a predecessor. */
3022 while (single_pred_p (first_bb))
3024 basic_block pred_bb = single_pred (first_bb);
3025 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3026 break;
3027 if (!no_side_effect_bb (first_bb))
3028 break;
3029 first_bb = pred_bb;
3031 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3032 Before starting forward search in last_bb successors, find
3033 out the other_bb. */
3034 if (first_bb == last_bb)
3036 other_bb = NULL;
3037 /* As non-GIMPLE_COND last stmt always terminates the range,
3038 if forward search didn't discover anything, just give up. */
3039 if (gimple_code (stmt) != GIMPLE_COND)
3040 return cfg_cleanup_needed;
3041 /* Look at both successors. Either it ends with a GIMPLE_COND
3042 and satisfies suitable_cond_bb, or ends with a cast and
3043 other_bb is that cast's successor. */
3044 FOR_EACH_EDGE (e, ei, first_bb->succs)
3045 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3046 || e->dest == first_bb)
3047 return cfg_cleanup_needed;
3048 else if (single_pred_p (e->dest))
3050 stmt = last_stmt (e->dest);
3051 if (stmt
3052 && gimple_code (stmt) == GIMPLE_COND
3053 && EDGE_COUNT (e->dest->succs) == 2)
3055 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3056 break;
3057 else
3058 other_bb = NULL;
3060 else if (stmt
3061 && final_range_test_p (stmt)
3062 && find_edge (first_bb, single_succ (e->dest)))
3064 other_bb = single_succ (e->dest);
3065 if (other_bb == first_bb)
3066 other_bb = NULL;
3069 if (other_bb == NULL)
3070 return cfg_cleanup_needed;
3072 /* Now do the forward search, moving last_bb to successor bbs
3073 that aren't other_bb. */
3074 while (EDGE_COUNT (last_bb->succs) == 2)
3076 FOR_EACH_EDGE (e, ei, last_bb->succs)
3077 if (e->dest != other_bb)
3078 break;
3079 if (e == NULL)
3080 break;
3081 if (!single_pred_p (e->dest))
3082 break;
3083 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3084 break;
3085 if (!no_side_effect_bb (e->dest))
3086 break;
3087 last_bb = e->dest;
3089 if (first_bb == last_bb)
3090 return cfg_cleanup_needed;
3091 /* Here basic blocks first_bb through last_bb's predecessor
3092 end with GIMPLE_COND, all of them have one of the edges to
3093 other_bb and another to another block in the range,
3094 all blocks except first_bb don't have side-effects and
3095 last_bb ends with either GIMPLE_COND, or cast satisfying
3096 final_range_test_p. */
3097 for (bb = last_bb; ; bb = single_pred (bb))
3099 enum tree_code code;
3100 tree lhs, rhs;
3101 inter_bb_range_test_entry bb_ent;
3103 bb_ent.op = NULL_TREE;
3104 bb_ent.first_idx = ops.length ();
3105 bb_ent.last_idx = bb_ent.first_idx;
3106 e = find_edge (bb, other_bb);
3107 stmt = last_stmt (bb);
3108 gimple_set_visited (stmt, true);
3109 if (gimple_code (stmt) != GIMPLE_COND)
3111 use_operand_p use_p;
3112 gimple *phi;
3113 edge e2;
3114 unsigned int d;
3116 lhs = gimple_assign_lhs (stmt);
3117 rhs = gimple_assign_rhs1 (stmt);
3118 gcc_assert (bb == last_bb);
3120 /* stmt is
3121 _123 = (int) _234;
3123 followed by:
3124 <bb M>:
3125 # _345 = PHI <_123(N), 1(...), 1(...)>
3127 or 0 instead of 1. If it is 0, the _234
3128 range test is anded together with all the
3129 other range tests, if it is 1, it is ored with
3130 them. */
3131 single_imm_use (lhs, &use_p, &phi);
3132 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3133 e2 = find_edge (first_bb, other_bb);
3134 d = e2->dest_idx;
3135 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3136 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3137 code = BIT_AND_EXPR;
3138 else
3140 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3141 code = BIT_IOR_EXPR;
3144 /* If _234 SSA_NAME_DEF_STMT is
3145 _234 = _567 | _789;
3146 (or &, corresponding to 1/0 in the phi arguments,
3147 push into ops the individual range test arguments
3148 of the bitwise or resp. and, recursively. */
3149 if (!get_ops (rhs, code, &ops,
3150 loop_containing_stmt (stmt))
3151 && has_single_use (rhs))
3153 /* Otherwise, push the _234 range test itself. */
3154 operand_entry *oe = operand_entry_pool.allocate ();
3156 oe->op = rhs;
3157 oe->rank = code;
3158 oe->id = 0;
3159 oe->count = 1;
3160 ops.safe_push (oe);
3161 bb_ent.last_idx++;
3163 else
3164 bb_ent.last_idx = ops.length ();
3165 bb_ent.op = rhs;
3166 bbinfo.safe_push (bb_ent);
3167 continue;
3169 /* Otherwise stmt is GIMPLE_COND. */
3170 code = gimple_cond_code (stmt);
3171 lhs = gimple_cond_lhs (stmt);
3172 rhs = gimple_cond_rhs (stmt);
3173 if (TREE_CODE (lhs) == SSA_NAME
3174 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3175 && ((code != EQ_EXPR && code != NE_EXPR)
3176 || rhs != boolean_false_node
3177 /* Either push into ops the individual bitwise
3178 or resp. and operands, depending on which
3179 edge is other_bb. */
3180 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3181 ^ (code == EQ_EXPR))
3182 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3183 loop_containing_stmt (stmt))))
3185 /* Or push the GIMPLE_COND stmt itself. */
3186 operand_entry *oe = operand_entry_pool.allocate ();
3188 oe->op = NULL;
3189 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3190 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3191 /* oe->op = NULL signs that there is no SSA_NAME
3192 for the range test, and oe->id instead is the
3193 basic block number, at which's end the GIMPLE_COND
3194 is. */
3195 oe->id = bb->index;
3196 oe->count = 1;
3197 ops.safe_push (oe);
3198 bb_ent.op = NULL;
3199 bb_ent.last_idx++;
3201 else if (ops.length () > bb_ent.first_idx)
3203 bb_ent.op = lhs;
3204 bb_ent.last_idx = ops.length ();
3206 bbinfo.safe_push (bb_ent);
3207 if (bb == first_bb)
3208 break;
3210 if (ops.length () > 1)
3211 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3212 if (any_changes)
3214 unsigned int idx, max_idx = 0;
3215 /* update_ops relies on has_single_use predicates returning the
3216 same values as it did during get_ops earlier. Additionally it
3217 never removes statements, only adds new ones and it should walk
3218 from the single imm use and check the predicate already before
3219 making those changes.
3220 On the other side, the handling of GIMPLE_COND directly can turn
3221 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3222 it needs to be done in a separate loop afterwards. */
3223 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3225 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3226 && bbinfo[idx].op != NULL_TREE)
3228 tree new_op;
3230 max_idx = idx;
3231 stmt = last_stmt (bb);
3232 new_op = update_ops (bbinfo[idx].op,
3233 (enum tree_code)
3234 ops[bbinfo[idx].first_idx]->rank,
3235 ops, &bbinfo[idx].first_idx,
3236 loop_containing_stmt (stmt));
3237 if (new_op == NULL_TREE)
3239 gcc_assert (bb == last_bb);
3240 new_op = ops[bbinfo[idx].first_idx++]->op;
3242 if (bbinfo[idx].op != new_op)
3244 imm_use_iterator iter;
3245 use_operand_p use_p;
3246 gimple *use_stmt, *cast_stmt = NULL;
3248 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3249 if (is_gimple_debug (use_stmt))
3250 continue;
3251 else if (gimple_code (use_stmt) == GIMPLE_COND
3252 || gimple_code (use_stmt) == GIMPLE_PHI)
3253 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3254 SET_USE (use_p, new_op);
3255 else if (gimple_assign_cast_p (use_stmt))
3256 cast_stmt = use_stmt;
3257 else
3258 gcc_unreachable ();
3259 if (cast_stmt)
3261 gcc_assert (bb == last_bb);
3262 tree lhs = gimple_assign_lhs (cast_stmt);
3263 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3264 enum tree_code rhs_code
3265 = gimple_assign_rhs_code (cast_stmt);
3266 gassign *g;
3267 if (is_gimple_min_invariant (new_op))
3269 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3270 g = gimple_build_assign (new_lhs, new_op);
3272 else
3273 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3274 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3275 gimple_set_uid (g, gimple_uid (cast_stmt));
3276 gimple_set_visited (g, true);
3277 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3278 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3279 if (is_gimple_debug (use_stmt))
3280 continue;
3281 else if (gimple_code (use_stmt) == GIMPLE_COND
3282 || gimple_code (use_stmt) == GIMPLE_PHI)
3283 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3284 SET_USE (use_p, new_lhs);
3285 else
3286 gcc_unreachable ();
3290 if (bb == first_bb)
3291 break;
3293 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3295 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3296 && bbinfo[idx].op == NULL_TREE
3297 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3299 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3301 if (idx > max_idx)
3302 max_idx = idx;
3304 /* If we collapse the conditional to a true/false
3305 condition, then bubble that knowledge up to our caller. */
3306 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3308 gimple_cond_make_false (cond_stmt);
3309 cfg_cleanup_needed = true;
3311 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3313 gimple_cond_make_true (cond_stmt);
3314 cfg_cleanup_needed = true;
3316 else
3318 gimple_cond_set_code (cond_stmt, NE_EXPR);
3319 gimple_cond_set_lhs (cond_stmt,
3320 ops[bbinfo[idx].first_idx]->op);
3321 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3323 update_stmt (cond_stmt);
3325 if (bb == first_bb)
3326 break;
3329 /* The above changes could result in basic blocks after the first
3330 modified one, up to and including last_bb, to be executed even if
3331 they would not be in the original program. If the value ranges of
3332 assignment lhs' in those bbs were dependent on the conditions
3333 guarding those basic blocks which now can change, the VRs might
3334 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
3335 are only used within the same bb, it should be not a big deal if
3336 we just reset all the VRs in those bbs. See PR68671. */
3337 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
3338 reset_flow_sensitive_info_in_bb (bb);
3340 return cfg_cleanup_needed;
3343 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3344 of STMT in it's operands. This is also known as a "destructive
3345 update" operation. */
3347 static bool
3348 is_phi_for_stmt (gimple *stmt, tree operand)
3350 gimple *def_stmt;
3351 gphi *def_phi;
3352 tree lhs;
3353 use_operand_p arg_p;
3354 ssa_op_iter i;
3356 if (TREE_CODE (operand) != SSA_NAME)
3357 return false;
3359 lhs = gimple_assign_lhs (stmt);
3361 def_stmt = SSA_NAME_DEF_STMT (operand);
3362 def_phi = dyn_cast <gphi *> (def_stmt);
3363 if (!def_phi)
3364 return false;
3366 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3367 if (lhs == USE_FROM_PTR (arg_p))
3368 return true;
3369 return false;
3372 /* Remove def stmt of VAR if VAR has zero uses and recurse
3373 on rhs1 operand if so. */
3375 static void
3376 remove_visited_stmt_chain (tree var)
3378 gimple *stmt;
3379 gimple_stmt_iterator gsi;
3381 while (1)
3383 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3384 return;
3385 stmt = SSA_NAME_DEF_STMT (var);
3386 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3388 var = gimple_assign_rhs1 (stmt);
3389 gsi = gsi_for_stmt (stmt);
3390 reassoc_remove_stmt (&gsi);
3391 release_defs (stmt);
3393 else
3394 return;
3398 /* This function checks three consequtive operands in
3399 passed operands vector OPS starting from OPINDEX and
3400 swaps two operands if it is profitable for binary operation
3401 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3403 We pair ops with the same rank if possible.
3405 The alternative we try is to see if STMT is a destructive
3406 update style statement, which is like:
3407 b = phi (a, ...)
3408 a = c + b;
3409 In that case, we want to use the destructive update form to
3410 expose the possible vectorizer sum reduction opportunity.
3411 In that case, the third operand will be the phi node. This
3412 check is not performed if STMT is null.
3414 We could, of course, try to be better as noted above, and do a
3415 lot of work to try to find these opportunities in >3 operand
3416 cases, but it is unlikely to be worth it. */
3418 static void
3419 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
3420 unsigned int opindex, gimple *stmt)
3422 operand_entry *oe1, *oe2, *oe3;
3424 oe1 = ops[opindex];
3425 oe2 = ops[opindex + 1];
3426 oe3 = ops[opindex + 2];
3428 if ((oe1->rank == oe2->rank
3429 && oe2->rank != oe3->rank)
3430 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3431 && !is_phi_for_stmt (stmt, oe1->op)
3432 && !is_phi_for_stmt (stmt, oe2->op)))
3434 operand_entry temp = *oe3;
3435 oe3->op = oe1->op;
3436 oe3->rank = oe1->rank;
3437 oe1->op = temp.op;
3438 oe1->rank= temp.rank;
3440 else if ((oe1->rank == oe3->rank
3441 && oe2->rank != oe3->rank)
3442 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3443 && !is_phi_for_stmt (stmt, oe1->op)
3444 && !is_phi_for_stmt (stmt, oe3->op)))
3446 operand_entry temp = *oe2;
3447 oe2->op = oe1->op;
3448 oe2->rank = oe1->rank;
3449 oe1->op = temp.op;
3450 oe1->rank = temp.rank;
3454 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3455 two definitions, otherwise return STMT. */
3457 static inline gimple *
3458 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
3460 if (TREE_CODE (rhs1) == SSA_NAME
3461 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3462 stmt = SSA_NAME_DEF_STMT (rhs1);
3463 if (TREE_CODE (rhs2) == SSA_NAME
3464 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3465 stmt = SSA_NAME_DEF_STMT (rhs2);
3466 return stmt;
3469 /* Recursively rewrite our linearized statements so that the operators
3470 match those in OPS[OPINDEX], putting the computation in rank
3471 order. Return new lhs. */
3473 static tree
3474 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
3475 vec<operand_entry *> ops, bool changed)
3477 tree rhs1 = gimple_assign_rhs1 (stmt);
3478 tree rhs2 = gimple_assign_rhs2 (stmt);
3479 tree lhs = gimple_assign_lhs (stmt);
3480 operand_entry *oe;
3482 /* The final recursion case for this function is that you have
3483 exactly two operations left.
3484 If we had exactly one op in the entire list to start with, we
3485 would have never called this function, and the tail recursion
3486 rewrites them one at a time. */
3487 if (opindex + 2 == ops.length ())
3489 operand_entry *oe1, *oe2;
3491 oe1 = ops[opindex];
3492 oe2 = ops[opindex + 1];
3494 if (rhs1 != oe1->op || rhs2 != oe2->op)
3496 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3497 unsigned int uid = gimple_uid (stmt);
3499 if (dump_file && (dump_flags & TDF_DETAILS))
3501 fprintf (dump_file, "Transforming ");
3502 print_gimple_stmt (dump_file, stmt, 0, 0);
3505 /* Even when changed is false, reassociation could have e.g. removed
3506 some redundant operations, so unless we are just swapping the
3507 arguments or unless there is no change at all (then we just
3508 return lhs), force creation of a new SSA_NAME. */
3509 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3511 gimple *insert_point
3512 = find_insert_point (stmt, oe1->op, oe2->op);
3513 lhs = make_ssa_name (TREE_TYPE (lhs));
3514 stmt
3515 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3516 oe1->op, oe2->op);
3517 gimple_set_uid (stmt, uid);
3518 gimple_set_visited (stmt, true);
3519 if (insert_point == gsi_stmt (gsi))
3520 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3521 else
3522 insert_stmt_after (stmt, insert_point);
3524 else
3526 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3527 == stmt);
3528 gimple_assign_set_rhs1 (stmt, oe1->op);
3529 gimple_assign_set_rhs2 (stmt, oe2->op);
3530 update_stmt (stmt);
3533 if (rhs1 != oe1->op && rhs1 != oe2->op)
3534 remove_visited_stmt_chain (rhs1);
3536 if (dump_file && (dump_flags & TDF_DETAILS))
3538 fprintf (dump_file, " into ");
3539 print_gimple_stmt (dump_file, stmt, 0, 0);
3542 return lhs;
3545 /* If we hit here, we should have 3 or more ops left. */
3546 gcc_assert (opindex + 2 < ops.length ());
3548 /* Rewrite the next operator. */
3549 oe = ops[opindex];
3551 /* Recurse on the LHS of the binary operator, which is guaranteed to
3552 be the non-leaf side. */
3553 tree new_rhs1
3554 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3555 changed || oe->op != rhs2);
3557 if (oe->op != rhs2 || new_rhs1 != rhs1)
3559 if (dump_file && (dump_flags & TDF_DETAILS))
3561 fprintf (dump_file, "Transforming ");
3562 print_gimple_stmt (dump_file, stmt, 0, 0);
3565 /* If changed is false, this is either opindex == 0
3566 or all outer rhs2's were equal to corresponding oe->op,
3567 and powi_result is NULL.
3568 That means lhs is equivalent before and after reassociation.
3569 Otherwise ensure the old lhs SSA_NAME is not reused and
3570 create a new stmt as well, so that any debug stmts will be
3571 properly adjusted. */
3572 if (changed)
3574 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3575 unsigned int uid = gimple_uid (stmt);
3576 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3578 lhs = make_ssa_name (TREE_TYPE (lhs));
3579 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3580 new_rhs1, oe->op);
3581 gimple_set_uid (stmt, uid);
3582 gimple_set_visited (stmt, true);
3583 if (insert_point == gsi_stmt (gsi))
3584 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3585 else
3586 insert_stmt_after (stmt, insert_point);
3588 else
3590 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3591 == stmt);
3592 gimple_assign_set_rhs1 (stmt, new_rhs1);
3593 gimple_assign_set_rhs2 (stmt, oe->op);
3594 update_stmt (stmt);
3597 if (dump_file && (dump_flags & TDF_DETAILS))
3599 fprintf (dump_file, " into ");
3600 print_gimple_stmt (dump_file, stmt, 0, 0);
3603 return lhs;
3606 /* Find out how many cycles we need to compute statements chain.
3607 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3608 maximum number of independent statements we may execute per cycle. */
3610 static int
3611 get_required_cycles (int ops_num, int cpu_width)
3613 int res;
3614 int elog;
3615 unsigned int rest;
3617 /* While we have more than 2 * cpu_width operands
3618 we may reduce number of operands by cpu_width
3619 per cycle. */
3620 res = ops_num / (2 * cpu_width);
3622 /* Remained operands count may be reduced twice per cycle
3623 until we have only one operand. */
3624 rest = (unsigned)(ops_num - res * cpu_width);
3625 elog = exact_log2 (rest);
3626 if (elog >= 0)
3627 res += elog;
3628 else
3629 res += floor_log2 (rest) + 1;
3631 return res;
3634 /* Returns an optimal number of registers to use for computation of
3635 given statements. */
3637 static int
3638 get_reassociation_width (int ops_num, enum tree_code opc,
3639 machine_mode mode)
3641 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3642 int width;
3643 int width_min;
3644 int cycles_best;
3646 if (param_width > 0)
3647 width = param_width;
3648 else
3649 width = targetm.sched.reassociation_width (opc, mode);
3651 if (width == 1)
3652 return width;
3654 /* Get the minimal time required for sequence computation. */
3655 cycles_best = get_required_cycles (ops_num, width);
3657 /* Check if we may use less width and still compute sequence for
3658 the same time. It will allow us to reduce registers usage.
3659 get_required_cycles is monotonically increasing with lower width
3660 so we can perform a binary search for the minimal width that still
3661 results in the optimal cycle count. */
3662 width_min = 1;
3663 while (width > width_min)
3665 int width_mid = (width + width_min) / 2;
3667 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3668 width = width_mid;
3669 else if (width_min < width_mid)
3670 width_min = width_mid;
3671 else
3672 break;
3675 return width;
3678 /* Recursively rewrite our linearized statements so that the operators
3679 match those in OPS[OPINDEX], putting the computation in rank
3680 order and trying to allow operations to be executed in
3681 parallel. */
3683 static void
3684 rewrite_expr_tree_parallel (gassign *stmt, int width,
3685 vec<operand_entry *> ops)
3687 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3688 int op_num = ops.length ();
3689 int stmt_num = op_num - 1;
3690 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
3691 int op_index = op_num - 1;
3692 int stmt_index = 0;
3693 int ready_stmts_end = 0;
3694 int i = 0;
3695 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3697 /* We start expression rewriting from the top statements.
3698 So, in this loop we create a full list of statements
3699 we will work with. */
3700 stmts[stmt_num - 1] = stmt;
3701 for (i = stmt_num - 2; i >= 0; i--)
3702 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3704 for (i = 0; i < stmt_num; i++)
3706 tree op1, op2;
3708 /* Determine whether we should use results of
3709 already handled statements or not. */
3710 if (ready_stmts_end == 0
3711 && (i - stmt_index >= width || op_index < 1))
3712 ready_stmts_end = i;
3714 /* Now we choose operands for the next statement. Non zero
3715 value in ready_stmts_end means here that we should use
3716 the result of already generated statements as new operand. */
3717 if (ready_stmts_end > 0)
3719 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3720 if (ready_stmts_end > stmt_index)
3721 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3722 else if (op_index >= 0)
3723 op2 = ops[op_index--]->op;
3724 else
3726 gcc_assert (stmt_index < i);
3727 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3730 if (stmt_index >= ready_stmts_end)
3731 ready_stmts_end = 0;
3733 else
3735 if (op_index > 1)
3736 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3737 op2 = ops[op_index--]->op;
3738 op1 = ops[op_index--]->op;
3741 /* If we emit the last statement then we should put
3742 operands into the last statement. It will also
3743 break the loop. */
3744 if (op_index < 0 && stmt_index == i)
3745 i = stmt_num - 1;
3747 if (dump_file && (dump_flags & TDF_DETAILS))
3749 fprintf (dump_file, "Transforming ");
3750 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3753 /* We keep original statement only for the last one. All
3754 others are recreated. */
3755 if (i == stmt_num - 1)
3757 gimple_assign_set_rhs1 (stmts[i], op1);
3758 gimple_assign_set_rhs2 (stmts[i], op2);
3759 update_stmt (stmts[i]);
3761 else
3762 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3764 if (dump_file && (dump_flags & TDF_DETAILS))
3766 fprintf (dump_file, " into ");
3767 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3771 remove_visited_stmt_chain (last_rhs1);
3774 /* Transform STMT, which is really (A +B) + (C + D) into the left
3775 linear form, ((A+B)+C)+D.
3776 Recurse on D if necessary. */
3778 static void
3779 linearize_expr (gimple *stmt)
3781 gimple_stmt_iterator gsi;
3782 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3783 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3784 gimple *oldbinrhs = binrhs;
3785 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3786 gimple *newbinrhs = NULL;
3787 struct loop *loop = loop_containing_stmt (stmt);
3788 tree lhs = gimple_assign_lhs (stmt);
3790 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3791 && is_reassociable_op (binrhs, rhscode, loop));
3793 gsi = gsi_for_stmt (stmt);
3795 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3796 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
3797 gimple_assign_rhs_code (binrhs),
3798 gimple_assign_lhs (binlhs),
3799 gimple_assign_rhs2 (binrhs));
3800 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3801 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3802 gimple_set_uid (binrhs, gimple_uid (stmt));
3804 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3805 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3807 if (dump_file && (dump_flags & TDF_DETAILS))
3809 fprintf (dump_file, "Linearized: ");
3810 print_gimple_stmt (dump_file, stmt, 0, 0);
3813 reassociate_stats.linearized++;
3814 update_stmt (stmt);
3816 gsi = gsi_for_stmt (oldbinrhs);
3817 reassoc_remove_stmt (&gsi);
3818 release_defs (oldbinrhs);
3820 gimple_set_visited (stmt, true);
3821 gimple_set_visited (binlhs, true);
3822 gimple_set_visited (binrhs, true);
3824 /* Tail recurse on the new rhs if it still needs reassociation. */
3825 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3826 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3827 want to change the algorithm while converting to tuples. */
3828 linearize_expr (stmt);
3831 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3832 it. Otherwise, return NULL. */
3834 static gimple *
3835 get_single_immediate_use (tree lhs)
3837 use_operand_p immuse;
3838 gimple *immusestmt;
3840 if (TREE_CODE (lhs) == SSA_NAME
3841 && single_imm_use (lhs, &immuse, &immusestmt)
3842 && is_gimple_assign (immusestmt))
3843 return immusestmt;
3845 return NULL;
3848 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3849 representing the negated value. Insertions of any necessary
3850 instructions go before GSI.
3851 This function is recursive in that, if you hand it "a_5" as the
3852 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3853 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3855 static tree
3856 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3858 gimple *negatedefstmt = NULL;
3859 tree resultofnegate;
3860 gimple_stmt_iterator gsi;
3861 unsigned int uid;
3863 /* If we are trying to negate a name, defined by an add, negate the
3864 add operands instead. */
3865 if (TREE_CODE (tonegate) == SSA_NAME)
3866 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3867 if (TREE_CODE (tonegate) == SSA_NAME
3868 && is_gimple_assign (negatedefstmt)
3869 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3870 && has_single_use (gimple_assign_lhs (negatedefstmt))
3871 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3873 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3874 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3875 tree lhs = gimple_assign_lhs (negatedefstmt);
3876 gimple *g;
3878 gsi = gsi_for_stmt (negatedefstmt);
3879 rhs1 = negate_value (rhs1, &gsi);
3881 gsi = gsi_for_stmt (negatedefstmt);
3882 rhs2 = negate_value (rhs2, &gsi);
3884 gsi = gsi_for_stmt (negatedefstmt);
3885 lhs = make_ssa_name (TREE_TYPE (lhs));
3886 gimple_set_visited (negatedefstmt, true);
3887 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
3888 gimple_set_uid (g, gimple_uid (negatedefstmt));
3889 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3890 return lhs;
3893 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3894 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3895 NULL_TREE, true, GSI_SAME_STMT);
3896 gsi = *gsip;
3897 uid = gimple_uid (gsi_stmt (gsi));
3898 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3900 gimple *stmt = gsi_stmt (gsi);
3901 if (gimple_uid (stmt) != 0)
3902 break;
3903 gimple_set_uid (stmt, uid);
3905 return resultofnegate;
3908 /* Return true if we should break up the subtract in STMT into an add
3909 with negate. This is true when we the subtract operands are really
3910 adds, or the subtract itself is used in an add expression. In
3911 either case, breaking up the subtract into an add with negate
3912 exposes the adds to reassociation. */
3914 static bool
3915 should_break_up_subtract (gimple *stmt)
3917 tree lhs = gimple_assign_lhs (stmt);
3918 tree binlhs = gimple_assign_rhs1 (stmt);
3919 tree binrhs = gimple_assign_rhs2 (stmt);
3920 gimple *immusestmt;
3921 struct loop *loop = loop_containing_stmt (stmt);
3923 if (TREE_CODE (binlhs) == SSA_NAME
3924 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3925 return true;
3927 if (TREE_CODE (binrhs) == SSA_NAME
3928 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3929 return true;
3931 if (TREE_CODE (lhs) == SSA_NAME
3932 && (immusestmt = get_single_immediate_use (lhs))
3933 && is_gimple_assign (immusestmt)
3934 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3935 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3936 return true;
3937 return false;
3940 /* Transform STMT from A - B into A + -B. */
3942 static void
3943 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
3945 tree rhs1 = gimple_assign_rhs1 (stmt);
3946 tree rhs2 = gimple_assign_rhs2 (stmt);
3948 if (dump_file && (dump_flags & TDF_DETAILS))
3950 fprintf (dump_file, "Breaking up subtract ");
3951 print_gimple_stmt (dump_file, stmt, 0, 0);
3954 rhs2 = negate_value (rhs2, gsip);
3955 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3956 update_stmt (stmt);
3959 /* Determine whether STMT is a builtin call that raises an SSA name
3960 to an integer power and has only one use. If so, and this is early
3961 reassociation and unsafe math optimizations are permitted, place
3962 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3963 If any of these conditions does not hold, return FALSE. */
3965 static bool
3966 acceptable_pow_call (gimple *stmt, tree *base, HOST_WIDE_INT *exponent)
3968 tree arg1;
3969 REAL_VALUE_TYPE c, cint;
3971 if (!reassoc_insert_powi_p
3972 || !flag_unsafe_math_optimizations
3973 || !is_gimple_call (stmt)
3974 || !has_single_use (gimple_call_lhs (stmt)))
3975 return false;
3977 switch (gimple_call_combined_fn (stmt))
3979 CASE_CFN_POW:
3980 if (flag_errno_math)
3981 return false;
3983 *base = gimple_call_arg (stmt, 0);
3984 arg1 = gimple_call_arg (stmt, 1);
3986 if (TREE_CODE (arg1) != REAL_CST)
3987 return false;
3989 c = TREE_REAL_CST (arg1);
3991 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3992 return false;
3994 *exponent = real_to_integer (&c);
3995 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3996 if (!real_identical (&c, &cint))
3997 return false;
3999 break;
4001 CASE_CFN_POWI:
4002 *base = gimple_call_arg (stmt, 0);
4003 arg1 = gimple_call_arg (stmt, 1);
4005 if (!tree_fits_shwi_p (arg1))
4006 return false;
4008 *exponent = tree_to_shwi (arg1);
4009 break;
4011 default:
4012 return false;
4015 /* Expanding negative exponents is generally unproductive, so we don't
4016 complicate matters with those. Exponents of zero and one should
4017 have been handled by expression folding. */
4018 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4019 return false;
4021 return true;
4024 /* Recursively linearize a binary expression that is the RHS of STMT.
4025 Place the operands of the expression tree in the vector named OPS. */
4027 static void
4028 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4029 bool is_associative, bool set_visited)
4031 tree binlhs = gimple_assign_rhs1 (stmt);
4032 tree binrhs = gimple_assign_rhs2 (stmt);
4033 gimple *binlhsdef = NULL, *binrhsdef = NULL;
4034 bool binlhsisreassoc = false;
4035 bool binrhsisreassoc = false;
4036 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4037 struct loop *loop = loop_containing_stmt (stmt);
4038 tree base = NULL_TREE;
4039 HOST_WIDE_INT exponent = 0;
4041 if (set_visited)
4042 gimple_set_visited (stmt, true);
4044 if (TREE_CODE (binlhs) == SSA_NAME)
4046 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4047 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4048 && !stmt_could_throw_p (binlhsdef));
4051 if (TREE_CODE (binrhs) == SSA_NAME)
4053 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4054 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4055 && !stmt_could_throw_p (binrhsdef));
4058 /* If the LHS is not reassociable, but the RHS is, we need to swap
4059 them. If neither is reassociable, there is nothing we can do, so
4060 just put them in the ops vector. If the LHS is reassociable,
4061 linearize it. If both are reassociable, then linearize the RHS
4062 and the LHS. */
4064 if (!binlhsisreassoc)
4066 /* If this is not a associative operation like division, give up. */
4067 if (!is_associative)
4069 add_to_ops_vec (ops, binrhs);
4070 return;
4073 if (!binrhsisreassoc)
4075 if (rhscode == MULT_EXPR
4076 && TREE_CODE (binrhs) == SSA_NAME
4077 && acceptable_pow_call (binrhsdef, &base, &exponent))
4079 add_repeat_to_ops_vec (ops, base, exponent);
4080 gimple_set_visited (binrhsdef, true);
4082 else
4083 add_to_ops_vec (ops, binrhs);
4085 if (rhscode == MULT_EXPR
4086 && TREE_CODE (binlhs) == SSA_NAME
4087 && acceptable_pow_call (binlhsdef, &base, &exponent))
4089 add_repeat_to_ops_vec (ops, base, exponent);
4090 gimple_set_visited (binlhsdef, true);
4092 else
4093 add_to_ops_vec (ops, binlhs);
4095 return;
4098 if (dump_file && (dump_flags & TDF_DETAILS))
4100 fprintf (dump_file, "swapping operands of ");
4101 print_gimple_stmt (dump_file, stmt, 0, 0);
4104 swap_ssa_operands (stmt,
4105 gimple_assign_rhs1_ptr (stmt),
4106 gimple_assign_rhs2_ptr (stmt));
4107 update_stmt (stmt);
4109 if (dump_file && (dump_flags & TDF_DETAILS))
4111 fprintf (dump_file, " is now ");
4112 print_gimple_stmt (dump_file, stmt, 0, 0);
4115 /* We want to make it so the lhs is always the reassociative op,
4116 so swap. */
4117 std::swap (binlhs, binrhs);
4119 else if (binrhsisreassoc)
4121 linearize_expr (stmt);
4122 binlhs = gimple_assign_rhs1 (stmt);
4123 binrhs = gimple_assign_rhs2 (stmt);
4126 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4127 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4128 rhscode, loop));
4129 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4130 is_associative, set_visited);
4132 if (rhscode == MULT_EXPR
4133 && TREE_CODE (binrhs) == SSA_NAME
4134 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4136 add_repeat_to_ops_vec (ops, base, exponent);
4137 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4139 else
4140 add_to_ops_vec (ops, binrhs);
4143 /* Repropagate the negates back into subtracts, since no other pass
4144 currently does it. */
4146 static void
4147 repropagate_negates (void)
4149 unsigned int i = 0;
4150 tree negate;
4152 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4154 gimple *user = get_single_immediate_use (negate);
4156 if (!user || !is_gimple_assign (user))
4157 continue;
4159 /* The negate operand can be either operand of a PLUS_EXPR
4160 (it can be the LHS if the RHS is a constant for example).
4162 Force the negate operand to the RHS of the PLUS_EXPR, then
4163 transform the PLUS_EXPR into a MINUS_EXPR. */
4164 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4166 /* If the negated operand appears on the LHS of the
4167 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4168 to force the negated operand to the RHS of the PLUS_EXPR. */
4169 if (gimple_assign_rhs1 (user) == negate)
4171 swap_ssa_operands (user,
4172 gimple_assign_rhs1_ptr (user),
4173 gimple_assign_rhs2_ptr (user));
4176 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4177 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4178 if (gimple_assign_rhs2 (user) == negate)
4180 tree rhs1 = gimple_assign_rhs1 (user);
4181 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4182 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4183 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4184 update_stmt (user);
4187 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4189 if (gimple_assign_rhs1 (user) == negate)
4191 /* We have
4192 x = -a
4193 y = x - b
4194 which we transform into
4195 x = a + b
4196 y = -x .
4197 This pushes down the negate which we possibly can merge
4198 into some other operation, hence insert it into the
4199 plus_negates vector. */
4200 gimple *feed = SSA_NAME_DEF_STMT (negate);
4201 tree a = gimple_assign_rhs1 (feed);
4202 tree b = gimple_assign_rhs2 (user);
4203 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4204 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4205 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4206 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4207 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4208 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4209 user = gsi_stmt (gsi2);
4210 update_stmt (user);
4211 reassoc_remove_stmt (&gsi);
4212 release_defs (feed);
4213 plus_negates.safe_push (gimple_assign_lhs (user));
4215 else
4217 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4218 rid of one operation. */
4219 gimple *feed = SSA_NAME_DEF_STMT (negate);
4220 tree a = gimple_assign_rhs1 (feed);
4221 tree rhs1 = gimple_assign_rhs1 (user);
4222 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4223 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4224 update_stmt (gsi_stmt (gsi));
4230 /* Returns true if OP is of a type for which we can do reassociation.
4231 That is for integral or non-saturating fixed-point types, and for
4232 floating point type when associative-math is enabled. */
4234 static bool
4235 can_reassociate_p (tree op)
4237 tree type = TREE_TYPE (op);
4238 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4239 || NON_SAT_FIXED_POINT_TYPE_P (type)
4240 || (flag_associative_math && FLOAT_TYPE_P (type)))
4241 return true;
4242 return false;
4245 /* Break up subtract operations in block BB.
4247 We do this top down because we don't know whether the subtract is
4248 part of a possible chain of reassociation except at the top.
4250 IE given
4251 d = f + g
4252 c = a + e
4253 b = c - d
4254 q = b - r
4255 k = t - q
4257 we want to break up k = t - q, but we won't until we've transformed q
4258 = b - r, which won't be broken up until we transform b = c - d.
4260 En passant, clear the GIMPLE visited flag on every statement
4261 and set UIDs within each basic block. */
4263 static void
4264 break_up_subtract_bb (basic_block bb)
4266 gimple_stmt_iterator gsi;
4267 basic_block son;
4268 unsigned int uid = 1;
4270 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4272 gimple *stmt = gsi_stmt (gsi);
4273 gimple_set_visited (stmt, false);
4274 gimple_set_uid (stmt, uid++);
4276 if (!is_gimple_assign (stmt)
4277 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4278 continue;
4280 /* Look for simple gimple subtract operations. */
4281 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4283 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4284 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4285 continue;
4287 /* Check for a subtract used only in an addition. If this
4288 is the case, transform it into add of a negate for better
4289 reassociation. IE transform C = A-B into C = A + -B if C
4290 is only used in an addition. */
4291 if (should_break_up_subtract (stmt))
4292 break_up_subtract (stmt, &gsi);
4294 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4295 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4296 plus_negates.safe_push (gimple_assign_lhs (stmt));
4298 for (son = first_dom_son (CDI_DOMINATORS, bb);
4299 son;
4300 son = next_dom_son (CDI_DOMINATORS, son))
4301 break_up_subtract_bb (son);
4304 /* Used for repeated factor analysis. */
4305 struct repeat_factor
4307 /* An SSA name that occurs in a multiply chain. */
4308 tree factor;
4310 /* Cached rank of the factor. */
4311 unsigned rank;
4313 /* Number of occurrences of the factor in the chain. */
4314 HOST_WIDE_INT count;
4316 /* An SSA name representing the product of this factor and
4317 all factors appearing later in the repeated factor vector. */
4318 tree repr;
4322 static vec<repeat_factor> repeat_factor_vec;
4324 /* Used for sorting the repeat factor vector. Sort primarily by
4325 ascending occurrence count, secondarily by descending rank. */
4327 static int
4328 compare_repeat_factors (const void *x1, const void *x2)
4330 const repeat_factor *rf1 = (const repeat_factor *) x1;
4331 const repeat_factor *rf2 = (const repeat_factor *) x2;
4333 if (rf1->count != rf2->count)
4334 return rf1->count - rf2->count;
4336 return rf2->rank - rf1->rank;
4339 /* Look for repeated operands in OPS in the multiply tree rooted at
4340 STMT. Replace them with an optimal sequence of multiplies and powi
4341 builtin calls, and remove the used operands from OPS. Return an
4342 SSA name representing the value of the replacement sequence. */
4344 static tree
4345 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
4347 unsigned i, j, vec_len;
4348 int ii;
4349 operand_entry *oe;
4350 repeat_factor *rf1, *rf2;
4351 repeat_factor rfnew;
4352 tree result = NULL_TREE;
4353 tree target_ssa, iter_result;
4354 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4355 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4356 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4357 gimple *mul_stmt, *pow_stmt;
4359 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4360 target. */
4361 if (!powi_fndecl)
4362 return NULL_TREE;
4364 /* Allocate the repeated factor vector. */
4365 repeat_factor_vec.create (10);
4367 /* Scan the OPS vector for all SSA names in the product and build
4368 up a vector of occurrence counts for each factor. */
4369 FOR_EACH_VEC_ELT (*ops, i, oe)
4371 if (TREE_CODE (oe->op) == SSA_NAME)
4373 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4375 if (rf1->factor == oe->op)
4377 rf1->count += oe->count;
4378 break;
4382 if (j >= repeat_factor_vec.length ())
4384 rfnew.factor = oe->op;
4385 rfnew.rank = oe->rank;
4386 rfnew.count = oe->count;
4387 rfnew.repr = NULL_TREE;
4388 repeat_factor_vec.safe_push (rfnew);
4393 /* Sort the repeated factor vector by (a) increasing occurrence count,
4394 and (b) decreasing rank. */
4395 repeat_factor_vec.qsort (compare_repeat_factors);
4397 /* It is generally best to combine as many base factors as possible
4398 into a product before applying __builtin_powi to the result.
4399 However, the sort order chosen for the repeated factor vector
4400 allows us to cache partial results for the product of the base
4401 factors for subsequent use. When we already have a cached partial
4402 result from a previous iteration, it is best to make use of it
4403 before looking for another __builtin_pow opportunity.
4405 As an example, consider x * x * y * y * y * z * z * z * z.
4406 We want to first compose the product x * y * z, raise it to the
4407 second power, then multiply this by y * z, and finally multiply
4408 by z. This can be done in 5 multiplies provided we cache y * z
4409 for use in both expressions:
4411 t1 = y * z
4412 t2 = t1 * x
4413 t3 = t2 * t2
4414 t4 = t1 * t3
4415 result = t4 * z
4417 If we instead ignored the cached y * z and first multiplied by
4418 the __builtin_pow opportunity z * z, we would get the inferior:
4420 t1 = y * z
4421 t2 = t1 * x
4422 t3 = t2 * t2
4423 t4 = z * z
4424 t5 = t3 * t4
4425 result = t5 * y */
4427 vec_len = repeat_factor_vec.length ();
4429 /* Repeatedly look for opportunities to create a builtin_powi call. */
4430 while (true)
4432 HOST_WIDE_INT power;
4434 /* First look for the largest cached product of factors from
4435 preceding iterations. If found, create a builtin_powi for
4436 it if the minimum occurrence count for its factors is at
4437 least 2, or just use this cached product as our next
4438 multiplicand if the minimum occurrence count is 1. */
4439 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4441 if (rf1->repr && rf1->count > 0)
4442 break;
4445 if (j < vec_len)
4447 power = rf1->count;
4449 if (power == 1)
4451 iter_result = rf1->repr;
4453 if (dump_file && (dump_flags & TDF_DETAILS))
4455 unsigned elt;
4456 repeat_factor *rf;
4457 fputs ("Multiplying by cached product ", dump_file);
4458 for (elt = j; elt < vec_len; elt++)
4460 rf = &repeat_factor_vec[elt];
4461 print_generic_expr (dump_file, rf->factor, 0);
4462 if (elt < vec_len - 1)
4463 fputs (" * ", dump_file);
4465 fputs ("\n", dump_file);
4468 else
4470 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4471 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4472 build_int_cst (integer_type_node,
4473 power));
4474 gimple_call_set_lhs (pow_stmt, iter_result);
4475 gimple_set_location (pow_stmt, gimple_location (stmt));
4476 gimple_set_uid (pow_stmt, gimple_uid (stmt));
4477 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4479 if (dump_file && (dump_flags & TDF_DETAILS))
4481 unsigned elt;
4482 repeat_factor *rf;
4483 fputs ("Building __builtin_pow call for cached product (",
4484 dump_file);
4485 for (elt = j; elt < vec_len; elt++)
4487 rf = &repeat_factor_vec[elt];
4488 print_generic_expr (dump_file, rf->factor, 0);
4489 if (elt < vec_len - 1)
4490 fputs (" * ", dump_file);
4492 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4493 power);
4497 else
4499 /* Otherwise, find the first factor in the repeated factor
4500 vector whose occurrence count is at least 2. If no such
4501 factor exists, there are no builtin_powi opportunities
4502 remaining. */
4503 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4505 if (rf1->count >= 2)
4506 break;
4509 if (j >= vec_len)
4510 break;
4512 power = rf1->count;
4514 if (dump_file && (dump_flags & TDF_DETAILS))
4516 unsigned elt;
4517 repeat_factor *rf;
4518 fputs ("Building __builtin_pow call for (", dump_file);
4519 for (elt = j; elt < vec_len; elt++)
4521 rf = &repeat_factor_vec[elt];
4522 print_generic_expr (dump_file, rf->factor, 0);
4523 if (elt < vec_len - 1)
4524 fputs (" * ", dump_file);
4526 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4529 reassociate_stats.pows_created++;
4531 /* Visit each element of the vector in reverse order (so that
4532 high-occurrence elements are visited first, and within the
4533 same occurrence count, lower-ranked elements are visited
4534 first). Form a linear product of all elements in this order
4535 whose occurrencce count is at least that of element J.
4536 Record the SSA name representing the product of each element
4537 with all subsequent elements in the vector. */
4538 if (j == vec_len - 1)
4539 rf1->repr = rf1->factor;
4540 else
4542 for (ii = vec_len - 2; ii >= (int)j; ii--)
4544 tree op1, op2;
4546 rf1 = &repeat_factor_vec[ii];
4547 rf2 = &repeat_factor_vec[ii + 1];
4549 /* Init the last factor's representative to be itself. */
4550 if (!rf2->repr)
4551 rf2->repr = rf2->factor;
4553 op1 = rf1->factor;
4554 op2 = rf2->repr;
4556 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4557 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4558 op1, op2);
4559 gimple_set_location (mul_stmt, gimple_location (stmt));
4560 gimple_set_uid (mul_stmt, gimple_uid (stmt));
4561 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4562 rf1->repr = target_ssa;
4564 /* Don't reprocess the multiply we just introduced. */
4565 gimple_set_visited (mul_stmt, true);
4569 /* Form a call to __builtin_powi for the maximum product
4570 just formed, raised to the power obtained earlier. */
4571 rf1 = &repeat_factor_vec[j];
4572 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4573 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4574 build_int_cst (integer_type_node,
4575 power));
4576 gimple_call_set_lhs (pow_stmt, iter_result);
4577 gimple_set_location (pow_stmt, gimple_location (stmt));
4578 gimple_set_uid (pow_stmt, gimple_uid (stmt));
4579 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4582 /* If we previously formed at least one other builtin_powi call,
4583 form the product of this one and those others. */
4584 if (result)
4586 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4587 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4588 result, iter_result);
4589 gimple_set_location (mul_stmt, gimple_location (stmt));
4590 gimple_set_uid (mul_stmt, gimple_uid (stmt));
4591 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4592 gimple_set_visited (mul_stmt, true);
4593 result = new_result;
4595 else
4596 result = iter_result;
4598 /* Decrement the occurrence count of each element in the product
4599 by the count found above, and remove this many copies of each
4600 factor from OPS. */
4601 for (i = j; i < vec_len; i++)
4603 unsigned k = power;
4604 unsigned n;
4606 rf1 = &repeat_factor_vec[i];
4607 rf1->count -= power;
4609 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4611 if (oe->op == rf1->factor)
4613 if (oe->count <= k)
4615 ops->ordered_remove (n);
4616 k -= oe->count;
4618 if (k == 0)
4619 break;
4621 else
4623 oe->count -= k;
4624 break;
4631 /* At this point all elements in the repeated factor vector have a
4632 remaining occurrence count of 0 or 1, and those with a count of 1
4633 don't have cached representatives. Re-sort the ops vector and
4634 clean up. */
4635 ops->qsort (sort_by_operand_rank);
4636 repeat_factor_vec.release ();
4638 /* Return the final product computed herein. Note that there may
4639 still be some elements with single occurrence count left in OPS;
4640 those will be handled by the normal reassociation logic. */
4641 return result;
4644 /* Attempt to optimize
4645 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
4646 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
4648 static void
4649 attempt_builtin_copysign (vec<operand_entry *> *ops)
4651 operand_entry *oe;
4652 unsigned int i;
4653 unsigned int length = ops->length ();
4654 tree cst = ops->last ()->op;
4656 if (length == 1 || TREE_CODE (cst) != REAL_CST)
4657 return;
4659 FOR_EACH_VEC_ELT (*ops, i, oe)
4661 if (TREE_CODE (oe->op) == SSA_NAME
4662 && has_single_use (oe->op))
4664 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
4665 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
4667 tree arg0, arg1;
4668 switch (gimple_call_combined_fn (old_call))
4670 CASE_CFN_COPYSIGN:
4671 arg0 = gimple_call_arg (old_call, 0);
4672 arg1 = gimple_call_arg (old_call, 1);
4673 /* The first argument of copysign must be a constant,
4674 otherwise there's nothing to do. */
4675 if (TREE_CODE (arg0) == REAL_CST)
4677 tree type = TREE_TYPE (arg0);
4678 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
4679 /* If we couldn't fold to a single constant, skip it.
4680 That happens e.g. for inexact multiplication when
4681 -frounding-math. */
4682 if (mul == NULL_TREE)
4683 break;
4684 /* Instead of adjusting OLD_CALL, let's build a new
4685 call to not leak the LHS and prevent keeping bogus
4686 debug statements. DCE will clean up the old call. */
4687 gcall *new_call;
4688 if (gimple_call_internal_p (old_call))
4689 new_call = gimple_build_call_internal
4690 (IFN_COPYSIGN, 2, mul, arg1);
4691 else
4692 new_call = gimple_build_call
4693 (gimple_call_fndecl (old_call), 2, mul, arg1);
4694 tree lhs = make_ssa_name (type);
4695 gimple_call_set_lhs (new_call, lhs);
4696 gimple_set_location (new_call,
4697 gimple_location (old_call));
4698 insert_stmt_after (new_call, old_call);
4699 /* We've used the constant, get rid of it. */
4700 ops->pop ();
4701 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
4702 /* Handle the CST1 < 0 case by negating the result. */
4703 if (cst1_neg)
4705 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
4706 gimple *negate_stmt
4707 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
4708 insert_stmt_after (negate_stmt, new_call);
4709 oe->op = negrhs;
4711 else
4712 oe->op = lhs;
4713 if (dump_file && (dump_flags & TDF_DETAILS))
4715 fprintf (dump_file, "Optimizing copysign: ");
4716 print_generic_expr (dump_file, cst, 0);
4717 fprintf (dump_file, " * COPYSIGN (");
4718 print_generic_expr (dump_file, arg0, 0);
4719 fprintf (dump_file, ", ");
4720 print_generic_expr (dump_file, arg1, 0);
4721 fprintf (dump_file, ") into %sCOPYSIGN (",
4722 cst1_neg ? "-" : "");
4723 print_generic_expr (dump_file, mul, 0);
4724 fprintf (dump_file, ", ");
4725 print_generic_expr (dump_file, arg1, 0);
4726 fprintf (dump_file, "\n");
4728 return;
4730 break;
4731 default:
4732 break;
4739 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4741 static void
4742 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
4744 tree rhs1;
4746 if (dump_file && (dump_flags & TDF_DETAILS))
4748 fprintf (dump_file, "Transforming ");
4749 print_gimple_stmt (dump_file, stmt, 0, 0);
4752 rhs1 = gimple_assign_rhs1 (stmt);
4753 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4754 update_stmt (stmt);
4755 remove_visited_stmt_chain (rhs1);
4757 if (dump_file && (dump_flags & TDF_DETAILS))
4759 fprintf (dump_file, " into ");
4760 print_gimple_stmt (dump_file, stmt, 0, 0);
4764 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4766 static void
4767 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
4768 tree rhs1, tree rhs2)
4770 if (dump_file && (dump_flags & TDF_DETAILS))
4772 fprintf (dump_file, "Transforming ");
4773 print_gimple_stmt (dump_file, stmt, 0, 0);
4776 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4777 update_stmt (gsi_stmt (*gsi));
4778 remove_visited_stmt_chain (rhs1);
4780 if (dump_file && (dump_flags & TDF_DETAILS))
4782 fprintf (dump_file, " into ");
4783 print_gimple_stmt (dump_file, stmt, 0, 0);
4787 /* Reassociate expressions in basic block BB and its post-dominator as
4788 children.
4790 Bubble up return status from maybe_optimize_range_tests. */
4792 static bool
4793 reassociate_bb (basic_block bb)
4795 gimple_stmt_iterator gsi;
4796 basic_block son;
4797 gimple *stmt = last_stmt (bb);
4798 bool cfg_cleanup_needed = false;
4800 if (stmt && !gimple_visited_p (stmt))
4801 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
4803 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4805 stmt = gsi_stmt (gsi);
4807 if (is_gimple_assign (stmt)
4808 && !stmt_could_throw_p (stmt))
4810 tree lhs, rhs1, rhs2;
4811 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4813 /* If this is not a gimple binary expression, there is
4814 nothing for us to do with it. */
4815 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4816 continue;
4818 /* If this was part of an already processed statement,
4819 we don't need to touch it again. */
4820 if (gimple_visited_p (stmt))
4822 /* This statement might have become dead because of previous
4823 reassociations. */
4824 if (has_zero_uses (gimple_get_lhs (stmt)))
4826 reassoc_remove_stmt (&gsi);
4827 release_defs (stmt);
4828 /* We might end up removing the last stmt above which
4829 places the iterator to the end of the sequence.
4830 Reset it to the last stmt in this case which might
4831 be the end of the sequence as well if we removed
4832 the last statement of the sequence. In which case
4833 we need to bail out. */
4834 if (gsi_end_p (gsi))
4836 gsi = gsi_last_bb (bb);
4837 if (gsi_end_p (gsi))
4838 break;
4841 continue;
4844 lhs = gimple_assign_lhs (stmt);
4845 rhs1 = gimple_assign_rhs1 (stmt);
4846 rhs2 = gimple_assign_rhs2 (stmt);
4848 /* For non-bit or min/max operations we can't associate
4849 all types. Verify that here. */
4850 if (rhs_code != BIT_IOR_EXPR
4851 && rhs_code != BIT_AND_EXPR
4852 && rhs_code != BIT_XOR_EXPR
4853 && rhs_code != MIN_EXPR
4854 && rhs_code != MAX_EXPR
4855 && (!can_reassociate_p (lhs)
4856 || !can_reassociate_p (rhs1)
4857 || !can_reassociate_p (rhs2)))
4858 continue;
4860 if (associative_tree_code (rhs_code))
4862 auto_vec<operand_entry *> ops;
4863 tree powi_result = NULL_TREE;
4865 /* There may be no immediate uses left by the time we
4866 get here because we may have eliminated them all. */
4867 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4868 continue;
4870 gimple_set_visited (stmt, true);
4871 linearize_expr_tree (&ops, stmt, true, true);
4872 ops.qsort (sort_by_operand_rank);
4873 optimize_ops_list (rhs_code, &ops);
4874 if (undistribute_ops_list (rhs_code, &ops,
4875 loop_containing_stmt (stmt)))
4877 ops.qsort (sort_by_operand_rank);
4878 optimize_ops_list (rhs_code, &ops);
4881 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4882 optimize_range_tests (rhs_code, &ops);
4884 if (rhs_code == MULT_EXPR)
4885 attempt_builtin_copysign (&ops);
4887 if (reassoc_insert_powi_p
4888 && rhs_code == MULT_EXPR
4889 && flag_unsafe_math_optimizations)
4890 powi_result = attempt_builtin_powi (stmt, &ops);
4892 /* If the operand vector is now empty, all operands were
4893 consumed by the __builtin_powi optimization. */
4894 if (ops.length () == 0)
4895 transform_stmt_to_copy (&gsi, stmt, powi_result);
4896 else if (ops.length () == 1)
4898 tree last_op = ops.last ()->op;
4900 if (powi_result)
4901 transform_stmt_to_multiply (&gsi, stmt, last_op,
4902 powi_result);
4903 else
4904 transform_stmt_to_copy (&gsi, stmt, last_op);
4906 else
4908 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4909 int ops_num = ops.length ();
4910 int width = get_reassociation_width (ops_num, rhs_code, mode);
4911 tree new_lhs = lhs;
4913 if (dump_file && (dump_flags & TDF_DETAILS))
4914 fprintf (dump_file,
4915 "Width = %d was chosen for reassociation\n", width);
4917 if (width > 1
4918 && ops.length () > 3)
4919 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
4920 width, ops);
4921 else
4923 /* When there are three operands left, we want
4924 to make sure the ones that get the double
4925 binary op are chosen wisely. */
4926 int len = ops.length ();
4927 if (len >= 3)
4928 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4930 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4931 powi_result != NULL);
4934 /* If we combined some repeated factors into a
4935 __builtin_powi call, multiply that result by the
4936 reassociated operands. */
4937 if (powi_result)
4939 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4940 tree type = TREE_TYPE (lhs);
4941 tree target_ssa = make_temp_ssa_name (type, NULL,
4942 "reassocpow");
4943 gimple_set_lhs (lhs_stmt, target_ssa);
4944 update_stmt (lhs_stmt);
4945 if (lhs != new_lhs)
4946 target_ssa = new_lhs;
4947 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
4948 powi_result, target_ssa);
4949 gimple_set_location (mul_stmt, gimple_location (stmt));
4950 gimple_set_uid (mul_stmt, gimple_uid (stmt));
4951 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4957 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4958 son;
4959 son = next_dom_son (CDI_POST_DOMINATORS, son))
4960 cfg_cleanup_needed |= reassociate_bb (son);
4962 return cfg_cleanup_needed;
4965 /* Add jumps around shifts for range tests turned into bit tests.
4966 For each SSA_NAME VAR we have code like:
4967 VAR = ...; // final stmt of range comparison
4968 // bit test here...;
4969 OTHERVAR = ...; // final stmt of the bit test sequence
4970 RES = VAR | OTHERVAR;
4971 Turn the above into:
4972 VAR = ...;
4973 if (VAR != 0)
4974 goto <l3>;
4975 else
4976 goto <l2>;
4977 <l2>:
4978 // bit test here...;
4979 OTHERVAR = ...;
4980 <l3>:
4981 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4983 static void
4984 branch_fixup (void)
4986 tree var;
4987 unsigned int i;
4989 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4991 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
4992 gimple *use_stmt;
4993 use_operand_p use;
4994 bool ok = single_imm_use (var, &use, &use_stmt);
4995 gcc_assert (ok
4996 && is_gimple_assign (use_stmt)
4997 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4998 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5000 basic_block cond_bb = gimple_bb (def_stmt);
5001 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5002 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5004 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5005 gimple *g = gimple_build_cond (NE_EXPR, var,
5006 build_zero_cst (TREE_TYPE (var)),
5007 NULL_TREE, NULL_TREE);
5008 location_t loc = gimple_location (use_stmt);
5009 gimple_set_location (g, loc);
5010 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5012 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5013 etrue->probability = REG_BR_PROB_BASE / 2;
5014 etrue->count = cond_bb->count / 2;
5015 edge efalse = find_edge (cond_bb, then_bb);
5016 efalse->flags = EDGE_FALSE_VALUE;
5017 efalse->probability -= etrue->probability;
5018 efalse->count -= etrue->count;
5019 then_bb->count -= etrue->count;
5021 tree othervar = NULL_TREE;
5022 if (gimple_assign_rhs1 (use_stmt) == var)
5023 othervar = gimple_assign_rhs2 (use_stmt);
5024 else if (gimple_assign_rhs2 (use_stmt) == var)
5025 othervar = gimple_assign_rhs1 (use_stmt);
5026 else
5027 gcc_unreachable ();
5028 tree lhs = gimple_assign_lhs (use_stmt);
5029 gphi *phi = create_phi_node (lhs, merge_bb);
5030 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5031 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5032 gsi = gsi_for_stmt (use_stmt);
5033 gsi_remove (&gsi, true);
5035 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5036 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5038 reassoc_branch_fixups.release ();
5041 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5042 void debug_ops_vector (vec<operand_entry *> ops);
5044 /* Dump the operand entry vector OPS to FILE. */
5046 void
5047 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5049 operand_entry *oe;
5050 unsigned int i;
5052 FOR_EACH_VEC_ELT (ops, i, oe)
5054 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5055 print_generic_expr (file, oe->op, 0);
5056 fprintf (file, "\n");
5060 /* Dump the operand entry vector OPS to STDERR. */
5062 DEBUG_FUNCTION void
5063 debug_ops_vector (vec<operand_entry *> ops)
5065 dump_ops_vector (stderr, ops);
5068 /* Bubble up return status from reassociate_bb. */
5070 static bool
5071 do_reassoc (void)
5073 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5074 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5077 /* Initialize the reassociation pass. */
5079 static void
5080 init_reassoc (void)
5082 int i;
5083 long rank = 2;
5084 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5086 /* Find the loops, so that we can prevent moving calculations in
5087 them. */
5088 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5090 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5092 next_operand_entry_id = 0;
5094 /* Reverse RPO (Reverse Post Order) will give us something where
5095 deeper loops come later. */
5096 pre_and_rev_post_order_compute (NULL, bbs, false);
5097 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5098 operand_rank = new hash_map<tree, long>;
5100 /* Give each default definition a distinct rank. This includes
5101 parameters and the static chain. Walk backwards over all
5102 SSA names so that we get proper rank ordering according
5103 to tree_swap_operands_p. */
5104 for (i = num_ssa_names - 1; i > 0; --i)
5106 tree name = ssa_name (i);
5107 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5108 insert_operand_rank (name, ++rank);
5111 /* Set up rank for each BB */
5112 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5113 bb_rank[bbs[i]] = ++rank << 16;
5115 free (bbs);
5116 calculate_dominance_info (CDI_POST_DOMINATORS);
5117 plus_negates = vNULL;
5120 /* Cleanup after the reassociation pass, and print stats if
5121 requested. */
5123 static void
5124 fini_reassoc (void)
5126 statistics_counter_event (cfun, "Linearized",
5127 reassociate_stats.linearized);
5128 statistics_counter_event (cfun, "Constants eliminated",
5129 reassociate_stats.constants_eliminated);
5130 statistics_counter_event (cfun, "Ops eliminated",
5131 reassociate_stats.ops_eliminated);
5132 statistics_counter_event (cfun, "Statements rewritten",
5133 reassociate_stats.rewritten);
5134 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5135 reassociate_stats.pows_encountered);
5136 statistics_counter_event (cfun, "Built-in powi calls created",
5137 reassociate_stats.pows_created);
5139 delete operand_rank;
5140 operand_entry_pool.release ();
5141 free (bb_rank);
5142 plus_negates.release ();
5143 free_dominance_info (CDI_POST_DOMINATORS);
5144 loop_optimizer_finalize ();
5147 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
5148 insertion of __builtin_powi calls.
5150 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5151 optimization of a gimple conditional. Otherwise returns zero. */
5153 static unsigned int
5154 execute_reassoc (bool insert_powi_p)
5156 reassoc_insert_powi_p = insert_powi_p;
5158 init_reassoc ();
5160 bool cfg_cleanup_needed;
5161 cfg_cleanup_needed = do_reassoc ();
5162 repropagate_negates ();
5163 branch_fixup ();
5165 fini_reassoc ();
5166 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
5169 namespace {
5171 const pass_data pass_data_reassoc =
5173 GIMPLE_PASS, /* type */
5174 "reassoc", /* name */
5175 OPTGROUP_NONE, /* optinfo_flags */
5176 TV_TREE_REASSOC, /* tv_id */
5177 ( PROP_cfg | PROP_ssa ), /* properties_required */
5178 0, /* properties_provided */
5179 0, /* properties_destroyed */
5180 0, /* todo_flags_start */
5181 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5184 class pass_reassoc : public gimple_opt_pass
5186 public:
5187 pass_reassoc (gcc::context *ctxt)
5188 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
5191 /* opt_pass methods: */
5192 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5193 void set_pass_param (unsigned int n, bool param)
5195 gcc_assert (n == 0);
5196 insert_powi_p = param;
5198 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5199 virtual unsigned int execute (function *)
5200 { return execute_reassoc (insert_powi_p); }
5202 private:
5203 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
5204 point 3a in the pass header comment. */
5205 bool insert_powi_p;
5206 }; // class pass_reassoc
5208 } // anon namespace
5210 gimple_opt_pass *
5211 make_pass_reassoc (gcc::context *ctxt)
5213 return new pass_reassoc (ctxt);