* g++.dg/debug/dwarf2/pr44641.C: Revert line number change. Remove
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob34f3d649b9aed1b06c2e433a31f589c68d264905
1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "cfghooks.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "rtl.h"
29 #include "ssa.h"
30 #include "tm_p.h"
31 #include "alias.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "cfganal.h"
35 #include "gimple-pretty-print.h"
36 #include "tree-inline.h"
37 #include "internal-fn.h"
38 #include "gimple-fold.h"
39 #include "tree-eh.h"
40 #include "gimple-iterator.h"
41 #include "gimplify-me.h"
42 #include "tree-cfg.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "tree-ssa-loop.h"
45 #include "flags.h"
46 #include "tree-dfa.h"
47 #include "tree-ssa.h"
48 #include "tree-iterator.h"
49 #include "tree-pass.h"
50 #include "alloc-pool.h"
51 #include "langhooks.h"
52 #include "cfgloop.h"
53 #include "target.h"
54 #include "params.h"
55 #include "diagnostic-core.h"
56 #include "builtins.h"
57 #include "gimplify.h"
58 #include "insn-codes.h"
59 #include "optabs-tree.h"
61 /* This is a simple global reassociation pass. It is, in part, based
62 on the LLVM pass of the same name (They do some things more/less
63 than we do, in different orders, etc).
65 It consists of five steps:
67 1. Breaking up subtract operations into addition + negate, where
68 it would promote the reassociation of adds.
70 2. Left linearization of the expression trees, so that (A+B)+(C+D)
71 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
72 During linearization, we place the operands of the binary
73 expressions into a vector of operand_entry_t
75 3. Optimization of the operand lists, eliminating things like a +
76 -a, a & a, etc.
78 3a. Combine repeated factors with the same occurrence counts
79 into a __builtin_powi call that will later be optimized into
80 an optimal number of multiplies.
82 4. Rewrite the expression trees we linearized and optimized so
83 they are in proper rank order.
85 5. Repropagate negates, as nothing else will clean it up ATM.
87 A bit of theory on #4, since nobody seems to write anything down
88 about why it makes sense to do it the way they do it:
90 We could do this much nicer theoretically, but don't (for reasons
91 explained after how to do it theoretically nice :P).
93 In order to promote the most redundancy elimination, you want
94 binary expressions whose operands are the same rank (or
95 preferably, the same value) exposed to the redundancy eliminator,
96 for possible elimination.
98 So the way to do this if we really cared, is to build the new op
99 tree from the leaves to the roots, merging as you go, and putting the
100 new op on the end of the worklist, until you are left with one
101 thing on the worklist.
103 IE if you have to rewrite the following set of operands (listed with
104 rank in parentheses), with opcode PLUS_EXPR:
106 a (1), b (1), c (1), d (2), e (2)
109 We start with our merge worklist empty, and the ops list with all of
110 those on it.
112 You want to first merge all leaves of the same rank, as much as
113 possible.
115 So first build a binary op of
117 mergetmp = a + b, and put "mergetmp" on the merge worklist.
119 Because there is no three operand form of PLUS_EXPR, c is not going to
120 be exposed to redundancy elimination as a rank 1 operand.
122 So you might as well throw it on the merge worklist (you could also
123 consider it to now be a rank two operand, and merge it with d and e,
124 but in this case, you then have evicted e from a binary op. So at
125 least in this situation, you can't win.)
127 Then build a binary op of d + e
128 mergetmp2 = d + e
130 and put mergetmp2 on the merge worklist.
132 so merge worklist = {mergetmp, c, mergetmp2}
134 Continue building binary ops of these operations until you have only
135 one operation left on the worklist.
137 So we have
139 build binary op
140 mergetmp3 = mergetmp + c
142 worklist = {mergetmp2, mergetmp3}
144 mergetmp4 = mergetmp2 + mergetmp3
146 worklist = {mergetmp4}
148 because we have one operation left, we can now just set the original
149 statement equal to the result of that operation.
151 This will at least expose a + b and d + e to redundancy elimination
152 as binary operations.
154 For extra points, you can reuse the old statements to build the
155 mergetmps, since you shouldn't run out.
157 So why don't we do this?
159 Because it's expensive, and rarely will help. Most trees we are
160 reassociating have 3 or less ops. If they have 2 ops, they already
161 will be written into a nice single binary op. If you have 3 ops, a
162 single simple check suffices to tell you whether the first two are of the
163 same rank. If so, you know to order it
165 mergetmp = op1 + op2
166 newstmt = mergetmp + op3
168 instead of
169 mergetmp = op2 + op3
170 newstmt = mergetmp + op1
172 If all three are of the same rank, you can't expose them all in a
173 single binary operator anyway, so the above is *still* the best you
174 can do.
176 Thus, this is what we do. When we have three ops left, we check to see
177 what order to put them in, and call it a day. As a nod to vector sum
178 reduction, we check if any of the ops are really a phi node that is a
179 destructive update for the associating op, and keep the destructive
180 update together for vector sum reduction recognition. */
183 /* Statistics */
184 static struct
186 int linearized;
187 int constants_eliminated;
188 int ops_eliminated;
189 int rewritten;
190 int pows_encountered;
191 int pows_created;
192 } reassociate_stats;
194 /* Operator, rank pair. */
195 typedef struct operand_entry
197 unsigned int rank;
198 int id;
199 tree op;
200 unsigned int count;
201 } *operand_entry_t;
203 static object_allocator<operand_entry> operand_entry_pool
204 ("operand entry pool");
206 /* This is used to assign a unique ID to each struct operand_entry
207 so that qsort results are identical on different hosts. */
208 static int next_operand_entry_id;
210 /* Starting rank number for a given basic block, so that we can rank
211 operations using unmovable instructions in that BB based on the bb
212 depth. */
213 static long *bb_rank;
215 /* Operand->rank hashtable. */
216 static hash_map<tree, long> *operand_rank;
218 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
219 all basic blocks the CFG should be adjusted - basic blocks
220 split right after that SSA_NAME's definition statement and before
221 the only use, which must be a bit ior. */
222 static vec<tree> reassoc_branch_fixups;
224 /* Forward decls. */
225 static long get_rank (tree);
226 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
228 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
229 possibly added by gsi_remove. */
231 bool
232 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
234 gimple *stmt = gsi_stmt (*gsi);
236 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
237 return gsi_remove (gsi, true);
239 gimple_stmt_iterator prev = *gsi;
240 gsi_prev (&prev);
241 unsigned uid = gimple_uid (stmt);
242 basic_block bb = gimple_bb (stmt);
243 bool ret = gsi_remove (gsi, true);
244 if (!gsi_end_p (prev))
245 gsi_next (&prev);
246 else
247 prev = gsi_start_bb (bb);
248 gimple *end_stmt = gsi_stmt (*gsi);
249 while ((stmt = gsi_stmt (prev)) != end_stmt)
251 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
252 gimple_set_uid (stmt, uid);
253 gsi_next (&prev);
255 return ret;
258 /* Bias amount for loop-carried phis. We want this to be larger than
259 the depth of any reassociation tree we can see, but not larger than
260 the rank difference between two blocks. */
261 #define PHI_LOOP_BIAS (1 << 15)
263 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
264 an innermost loop, and the phi has only a single use which is inside
265 the loop, then the rank is the block rank of the loop latch plus an
266 extra bias for the loop-carried dependence. This causes expressions
267 calculated into an accumulator variable to be independent for each
268 iteration of the loop. If STMT is some other phi, the rank is the
269 block rank of its containing block. */
270 static long
271 phi_rank (gimple *stmt)
273 basic_block bb = gimple_bb (stmt);
274 struct loop *father = bb->loop_father;
275 tree res;
276 unsigned i;
277 use_operand_p use;
278 gimple *use_stmt;
280 /* We only care about real loops (those with a latch). */
281 if (!father->latch)
282 return bb_rank[bb->index];
284 /* Interesting phis must be in headers of innermost loops. */
285 if (bb != father->header
286 || father->inner)
287 return bb_rank[bb->index];
289 /* Ignore virtual SSA_NAMEs. */
290 res = gimple_phi_result (stmt);
291 if (virtual_operand_p (res))
292 return bb_rank[bb->index];
294 /* The phi definition must have a single use, and that use must be
295 within the loop. Otherwise this isn't an accumulator pattern. */
296 if (!single_imm_use (res, &use, &use_stmt)
297 || gimple_bb (use_stmt)->loop_father != father)
298 return bb_rank[bb->index];
300 /* Look for phi arguments from within the loop. If found, bias this phi. */
301 for (i = 0; i < gimple_phi_num_args (stmt); i++)
303 tree arg = gimple_phi_arg_def (stmt, i);
304 if (TREE_CODE (arg) == SSA_NAME
305 && !SSA_NAME_IS_DEFAULT_DEF (arg))
307 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
308 if (gimple_bb (def_stmt)->loop_father == father)
309 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
313 /* Must be an uninteresting phi. */
314 return bb_rank[bb->index];
317 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
318 loop-carried dependence of an innermost loop, return TRUE; else
319 return FALSE. */
320 static bool
321 loop_carried_phi (tree exp)
323 gimple *phi_stmt;
324 long block_rank;
326 if (TREE_CODE (exp) != SSA_NAME
327 || SSA_NAME_IS_DEFAULT_DEF (exp))
328 return false;
330 phi_stmt = SSA_NAME_DEF_STMT (exp);
332 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
333 return false;
335 /* Non-loop-carried phis have block rank. Loop-carried phis have
336 an additional bias added in. If this phi doesn't have block rank,
337 it's biased and should not be propagated. */
338 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
340 if (phi_rank (phi_stmt) != block_rank)
341 return true;
343 return false;
346 /* Return the maximum of RANK and the rank that should be propagated
347 from expression OP. For most operands, this is just the rank of OP.
348 For loop-carried phis, the value is zero to avoid undoing the bias
349 in favor of the phi. */
350 static long
351 propagate_rank (long rank, tree op)
353 long op_rank;
355 if (loop_carried_phi (op))
356 return rank;
358 op_rank = get_rank (op);
360 return MAX (rank, op_rank);
363 /* Look up the operand rank structure for expression E. */
365 static inline long
366 find_operand_rank (tree e)
368 long *slot = operand_rank->get (e);
369 return slot ? *slot : -1;
372 /* Insert {E,RANK} into the operand rank hashtable. */
374 static inline void
375 insert_operand_rank (tree e, long rank)
377 gcc_assert (rank > 0);
378 gcc_assert (!operand_rank->put (e, rank));
381 /* Given an expression E, return the rank of the expression. */
383 static long
384 get_rank (tree e)
386 /* SSA_NAME's have the rank of the expression they are the result
388 For globals and uninitialized values, the rank is 0.
389 For function arguments, use the pre-setup rank.
390 For PHI nodes, stores, asm statements, etc, we use the rank of
391 the BB.
392 For simple operations, the rank is the maximum rank of any of
393 its operands, or the bb_rank, whichever is less.
394 I make no claims that this is optimal, however, it gives good
395 results. */
397 /* We make an exception to the normal ranking system to break
398 dependences of accumulator variables in loops. Suppose we
399 have a simple one-block loop containing:
401 x_1 = phi(x_0, x_2)
402 b = a + x_1
403 c = b + d
404 x_2 = c + e
406 As shown, each iteration of the calculation into x is fully
407 dependent upon the iteration before it. We would prefer to
408 see this in the form:
410 x_1 = phi(x_0, x_2)
411 b = a + d
412 c = b + e
413 x_2 = c + x_1
415 If the loop is unrolled, the calculations of b and c from
416 different iterations can be interleaved.
418 To obtain this result during reassociation, we bias the rank
419 of the phi definition x_1 upward, when it is recognized as an
420 accumulator pattern. The artificial rank causes it to be
421 added last, providing the desired independence. */
423 if (TREE_CODE (e) == SSA_NAME)
425 ssa_op_iter iter;
426 gimple *stmt;
427 long rank;
428 tree op;
430 if (SSA_NAME_IS_DEFAULT_DEF (e))
431 return find_operand_rank (e);
433 stmt = SSA_NAME_DEF_STMT (e);
434 if (gimple_code (stmt) == GIMPLE_PHI)
435 return phi_rank (stmt);
437 if (!is_gimple_assign (stmt))
438 return bb_rank[gimple_bb (stmt)->index];
440 /* If we already have a rank for this expression, use that. */
441 rank = find_operand_rank (e);
442 if (rank != -1)
443 return rank;
445 /* Otherwise, find the maximum rank for the operands. As an
446 exception, remove the bias from loop-carried phis when propagating
447 the rank so that dependent operations are not also biased. */
448 /* Simply walk over all SSA uses - this takes advatage of the
449 fact that non-SSA operands are is_gimple_min_invariant and
450 thus have rank 0. */
451 rank = 0;
452 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
453 rank = propagate_rank (rank, op);
455 if (dump_file && (dump_flags & TDF_DETAILS))
457 fprintf (dump_file, "Rank for ");
458 print_generic_expr (dump_file, e, 0);
459 fprintf (dump_file, " is %ld\n", (rank + 1));
462 /* Note the rank in the hashtable so we don't recompute it. */
463 insert_operand_rank (e, (rank + 1));
464 return (rank + 1);
467 /* Constants, globals, etc., are rank 0 */
468 return 0;
472 /* We want integer ones to end up last no matter what, since they are
473 the ones we can do the most with. */
474 #define INTEGER_CONST_TYPE 1 << 3
475 #define FLOAT_CONST_TYPE 1 << 2
476 #define OTHER_CONST_TYPE 1 << 1
478 /* Classify an invariant tree into integer, float, or other, so that
479 we can sort them to be near other constants of the same type. */
480 static inline int
481 constant_type (tree t)
483 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
484 return INTEGER_CONST_TYPE;
485 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
486 return FLOAT_CONST_TYPE;
487 else
488 return OTHER_CONST_TYPE;
491 /* qsort comparison function to sort operand entries PA and PB by rank
492 so that the sorted array is ordered by rank in decreasing order. */
493 static int
494 sort_by_operand_rank (const void *pa, const void *pb)
496 const operand_entry_t oea = *(const operand_entry_t *)pa;
497 const operand_entry_t oeb = *(const operand_entry_t *)pb;
499 /* It's nicer for optimize_expression if constants that are likely
500 to fold when added/multiplied//whatever are put next to each
501 other. Since all constants have rank 0, order them by type. */
502 if (oeb->rank == 0 && oea->rank == 0)
504 if (constant_type (oeb->op) != constant_type (oea->op))
505 return constant_type (oeb->op) - constant_type (oea->op);
506 else
507 /* To make sorting result stable, we use unique IDs to determine
508 order. */
509 return oeb->id - oea->id;
512 /* Lastly, make sure the versions that are the same go next to each
513 other. */
514 if ((oeb->rank - oea->rank == 0)
515 && TREE_CODE (oea->op) == SSA_NAME
516 && TREE_CODE (oeb->op) == SSA_NAME)
518 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
519 versions of removed SSA_NAMEs, so if possible, prefer to sort
520 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
521 See PR60418. */
522 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
523 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
524 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
526 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
527 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
528 basic_block bba = gimple_bb (stmta);
529 basic_block bbb = gimple_bb (stmtb);
530 if (bbb != bba)
532 if (bb_rank[bbb->index] != bb_rank[bba->index])
533 return bb_rank[bbb->index] - bb_rank[bba->index];
535 else
537 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
538 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
539 if (da != db)
540 return da ? 1 : -1;
544 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
545 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
546 else
547 return oeb->id - oea->id;
550 if (oeb->rank != oea->rank)
551 return oeb->rank - oea->rank;
552 else
553 return oeb->id - oea->id;
556 /* Add an operand entry to *OPS for the tree operand OP. */
558 static void
559 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
561 operand_entry_t oe = operand_entry_pool.allocate ();
563 oe->op = op;
564 oe->rank = get_rank (op);
565 oe->id = next_operand_entry_id++;
566 oe->count = 1;
567 ops->safe_push (oe);
570 /* Add an operand entry to *OPS for the tree operand OP with repeat
571 count REPEAT. */
573 static void
574 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
575 HOST_WIDE_INT repeat)
577 operand_entry_t oe = operand_entry_pool.allocate ();
579 oe->op = op;
580 oe->rank = get_rank (op);
581 oe->id = next_operand_entry_id++;
582 oe->count = repeat;
583 ops->safe_push (oe);
585 reassociate_stats.pows_encountered++;
588 /* Return true if STMT is reassociable operation containing a binary
589 operation with tree code CODE, and is inside LOOP. */
591 static bool
592 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
594 basic_block bb = gimple_bb (stmt);
596 if (gimple_bb (stmt) == NULL)
597 return false;
599 if (!flow_bb_inside_loop_p (loop, bb))
600 return false;
602 if (is_gimple_assign (stmt)
603 && gimple_assign_rhs_code (stmt) == code
604 && has_single_use (gimple_assign_lhs (stmt)))
605 return true;
607 return false;
611 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
612 operand of the negate operation. Otherwise, return NULL. */
614 static tree
615 get_unary_op (tree name, enum tree_code opcode)
617 gimple *stmt = SSA_NAME_DEF_STMT (name);
619 if (!is_gimple_assign (stmt))
620 return NULL_TREE;
622 if (gimple_assign_rhs_code (stmt) == opcode)
623 return gimple_assign_rhs1 (stmt);
624 return NULL_TREE;
627 /* If CURR and LAST are a pair of ops that OPCODE allows us to
628 eliminate through equivalences, do so, remove them from OPS, and
629 return true. Otherwise, return false. */
631 static bool
632 eliminate_duplicate_pair (enum tree_code opcode,
633 vec<operand_entry_t> *ops,
634 bool *all_done,
635 unsigned int i,
636 operand_entry_t curr,
637 operand_entry_t last)
640 /* If we have two of the same op, and the opcode is & |, min, or max,
641 we can eliminate one of them.
642 If we have two of the same op, and the opcode is ^, we can
643 eliminate both of them. */
645 if (last && last->op == curr->op)
647 switch (opcode)
649 case MAX_EXPR:
650 case MIN_EXPR:
651 case BIT_IOR_EXPR:
652 case BIT_AND_EXPR:
653 if (dump_file && (dump_flags & TDF_DETAILS))
655 fprintf (dump_file, "Equivalence: ");
656 print_generic_expr (dump_file, curr->op, 0);
657 fprintf (dump_file, " [&|minmax] ");
658 print_generic_expr (dump_file, last->op, 0);
659 fprintf (dump_file, " -> ");
660 print_generic_stmt (dump_file, last->op, 0);
663 ops->ordered_remove (i);
664 reassociate_stats.ops_eliminated ++;
666 return true;
668 case BIT_XOR_EXPR:
669 if (dump_file && (dump_flags & TDF_DETAILS))
671 fprintf (dump_file, "Equivalence: ");
672 print_generic_expr (dump_file, curr->op, 0);
673 fprintf (dump_file, " ^ ");
674 print_generic_expr (dump_file, last->op, 0);
675 fprintf (dump_file, " -> nothing\n");
678 reassociate_stats.ops_eliminated += 2;
680 if (ops->length () == 2)
682 ops->create (0);
683 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
684 *all_done = true;
686 else
688 ops->ordered_remove (i-1);
689 ops->ordered_remove (i-1);
692 return true;
694 default:
695 break;
698 return false;
701 static vec<tree> plus_negates;
703 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
704 expression, look in OPS for a corresponding positive operation to cancel
705 it out. If we find one, remove the other from OPS, replace
706 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
707 return false. */
709 static bool
710 eliminate_plus_minus_pair (enum tree_code opcode,
711 vec<operand_entry_t> *ops,
712 unsigned int currindex,
713 operand_entry_t curr)
715 tree negateop;
716 tree notop;
717 unsigned int i;
718 operand_entry_t oe;
720 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
721 return false;
723 negateop = get_unary_op (curr->op, NEGATE_EXPR);
724 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
725 if (negateop == NULL_TREE && notop == NULL_TREE)
726 return false;
728 /* Any non-negated version will have a rank that is one less than
729 the current rank. So once we hit those ranks, if we don't find
730 one, we can stop. */
732 for (i = currindex + 1;
733 ops->iterate (i, &oe)
734 && oe->rank >= curr->rank - 1 ;
735 i++)
737 if (oe->op == negateop)
740 if (dump_file && (dump_flags & TDF_DETAILS))
742 fprintf (dump_file, "Equivalence: ");
743 print_generic_expr (dump_file, negateop, 0);
744 fprintf (dump_file, " + -");
745 print_generic_expr (dump_file, oe->op, 0);
746 fprintf (dump_file, " -> 0\n");
749 ops->ordered_remove (i);
750 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
751 ops->ordered_remove (currindex);
752 reassociate_stats.ops_eliminated ++;
754 return true;
756 else if (oe->op == notop)
758 tree op_type = TREE_TYPE (oe->op);
760 if (dump_file && (dump_flags & TDF_DETAILS))
762 fprintf (dump_file, "Equivalence: ");
763 print_generic_expr (dump_file, notop, 0);
764 fprintf (dump_file, " + ~");
765 print_generic_expr (dump_file, oe->op, 0);
766 fprintf (dump_file, " -> -1\n");
769 ops->ordered_remove (i);
770 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
771 ops->ordered_remove (currindex);
772 reassociate_stats.ops_eliminated ++;
774 return true;
778 /* CURR->OP is a negate expr in a plus expr: save it for later
779 inspection in repropagate_negates(). */
780 if (negateop != NULL_TREE)
781 plus_negates.safe_push (curr->op);
783 return false;
786 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
787 bitwise not expression, look in OPS for a corresponding operand to
788 cancel it out. If we find one, remove the other from OPS, replace
789 OPS[CURRINDEX] with 0, and return true. Otherwise, return
790 false. */
792 static bool
793 eliminate_not_pairs (enum tree_code opcode,
794 vec<operand_entry_t> *ops,
795 unsigned int currindex,
796 operand_entry_t curr)
798 tree notop;
799 unsigned int i;
800 operand_entry_t oe;
802 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
803 || TREE_CODE (curr->op) != SSA_NAME)
804 return false;
806 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
807 if (notop == NULL_TREE)
808 return false;
810 /* Any non-not version will have a rank that is one less than
811 the current rank. So once we hit those ranks, if we don't find
812 one, we can stop. */
814 for (i = currindex + 1;
815 ops->iterate (i, &oe)
816 && oe->rank >= curr->rank - 1;
817 i++)
819 if (oe->op == notop)
821 if (dump_file && (dump_flags & TDF_DETAILS))
823 fprintf (dump_file, "Equivalence: ");
824 print_generic_expr (dump_file, notop, 0);
825 if (opcode == BIT_AND_EXPR)
826 fprintf (dump_file, " & ~");
827 else if (opcode == BIT_IOR_EXPR)
828 fprintf (dump_file, " | ~");
829 print_generic_expr (dump_file, oe->op, 0);
830 if (opcode == BIT_AND_EXPR)
831 fprintf (dump_file, " -> 0\n");
832 else if (opcode == BIT_IOR_EXPR)
833 fprintf (dump_file, " -> -1\n");
836 if (opcode == BIT_AND_EXPR)
837 oe->op = build_zero_cst (TREE_TYPE (oe->op));
838 else if (opcode == BIT_IOR_EXPR)
839 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
841 reassociate_stats.ops_eliminated += ops->length () - 1;
842 ops->truncate (0);
843 ops->quick_push (oe);
844 return true;
848 return false;
851 /* Use constant value that may be present in OPS to try to eliminate
852 operands. Note that this function is only really used when we've
853 eliminated ops for other reasons, or merged constants. Across
854 single statements, fold already does all of this, plus more. There
855 is little point in duplicating logic, so I've only included the
856 identities that I could ever construct testcases to trigger. */
858 static void
859 eliminate_using_constants (enum tree_code opcode,
860 vec<operand_entry_t> *ops)
862 operand_entry_t oelast = ops->last ();
863 tree type = TREE_TYPE (oelast->op);
865 if (oelast->rank == 0
866 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
868 switch (opcode)
870 case BIT_AND_EXPR:
871 if (integer_zerop (oelast->op))
873 if (ops->length () != 1)
875 if (dump_file && (dump_flags & TDF_DETAILS))
876 fprintf (dump_file, "Found & 0, removing all other ops\n");
878 reassociate_stats.ops_eliminated += ops->length () - 1;
880 ops->truncate (0);
881 ops->quick_push (oelast);
882 return;
885 else if (integer_all_onesp (oelast->op))
887 if (ops->length () != 1)
889 if (dump_file && (dump_flags & TDF_DETAILS))
890 fprintf (dump_file, "Found & -1, removing\n");
891 ops->pop ();
892 reassociate_stats.ops_eliminated++;
895 break;
896 case BIT_IOR_EXPR:
897 if (integer_all_onesp (oelast->op))
899 if (ops->length () != 1)
901 if (dump_file && (dump_flags & TDF_DETAILS))
902 fprintf (dump_file, "Found | -1, removing all other ops\n");
904 reassociate_stats.ops_eliminated += ops->length () - 1;
906 ops->truncate (0);
907 ops->quick_push (oelast);
908 return;
911 else if (integer_zerop (oelast->op))
913 if (ops->length () != 1)
915 if (dump_file && (dump_flags & TDF_DETAILS))
916 fprintf (dump_file, "Found | 0, removing\n");
917 ops->pop ();
918 reassociate_stats.ops_eliminated++;
921 break;
922 case MULT_EXPR:
923 if (integer_zerop (oelast->op)
924 || (FLOAT_TYPE_P (type)
925 && !HONOR_NANS (type)
926 && !HONOR_SIGNED_ZEROS (type)
927 && real_zerop (oelast->op)))
929 if (ops->length () != 1)
931 if (dump_file && (dump_flags & TDF_DETAILS))
932 fprintf (dump_file, "Found * 0, removing all other ops\n");
934 reassociate_stats.ops_eliminated += ops->length () - 1;
935 ops->truncate (1);
936 ops->quick_push (oelast);
937 return;
940 else if (integer_onep (oelast->op)
941 || (FLOAT_TYPE_P (type)
942 && !HONOR_SNANS (type)
943 && real_onep (oelast->op)))
945 if (ops->length () != 1)
947 if (dump_file && (dump_flags & TDF_DETAILS))
948 fprintf (dump_file, "Found * 1, removing\n");
949 ops->pop ();
950 reassociate_stats.ops_eliminated++;
951 return;
954 break;
955 case BIT_XOR_EXPR:
956 case PLUS_EXPR:
957 case MINUS_EXPR:
958 if (integer_zerop (oelast->op)
959 || (FLOAT_TYPE_P (type)
960 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
961 && fold_real_zero_addition_p (type, oelast->op,
962 opcode == MINUS_EXPR)))
964 if (ops->length () != 1)
966 if (dump_file && (dump_flags & TDF_DETAILS))
967 fprintf (dump_file, "Found [|^+] 0, removing\n");
968 ops->pop ();
969 reassociate_stats.ops_eliminated++;
970 return;
973 break;
974 default:
975 break;
981 static void linearize_expr_tree (vec<operand_entry_t> *, gimple *,
982 bool, bool);
984 /* Structure for tracking and counting operands. */
985 struct oecount {
986 int cnt;
987 int id;
988 enum tree_code oecode;
989 tree op;
993 /* The heap for the oecount hashtable and the sorted list of operands. */
994 static vec<oecount> cvec;
997 /* Oecount hashtable helpers. */
999 struct oecount_hasher : int_hash <int, 0, 1>
1001 static inline hashval_t hash (int);
1002 static inline bool equal (int, int);
1005 /* Hash function for oecount. */
1007 inline hashval_t
1008 oecount_hasher::hash (int p)
1010 const oecount *c = &cvec[p - 42];
1011 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1014 /* Comparison function for oecount. */
1016 inline bool
1017 oecount_hasher::equal (int p1, int p2)
1019 const oecount *c1 = &cvec[p1 - 42];
1020 const oecount *c2 = &cvec[p2 - 42];
1021 return (c1->oecode == c2->oecode
1022 && c1->op == c2->op);
1025 /* Comparison function for qsort sorting oecount elements by count. */
1027 static int
1028 oecount_cmp (const void *p1, const void *p2)
1030 const oecount *c1 = (const oecount *)p1;
1031 const oecount *c2 = (const oecount *)p2;
1032 if (c1->cnt != c2->cnt)
1033 return c1->cnt - c2->cnt;
1034 else
1035 /* If counts are identical, use unique IDs to stabilize qsort. */
1036 return c1->id - c2->id;
1039 /* Return TRUE iff STMT represents a builtin call that raises OP
1040 to some exponent. */
1042 static bool
1043 stmt_is_power_of_op (gimple *stmt, tree op)
1045 tree fndecl;
1047 if (!is_gimple_call (stmt))
1048 return false;
1050 fndecl = gimple_call_fndecl (stmt);
1052 if (!fndecl
1053 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1054 return false;
1056 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1058 CASE_FLT_FN (BUILT_IN_POW):
1059 CASE_FLT_FN (BUILT_IN_POWI):
1060 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1062 default:
1063 return false;
1067 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1068 in place and return the result. Assumes that stmt_is_power_of_op
1069 was previously called for STMT and returned TRUE. */
1071 static HOST_WIDE_INT
1072 decrement_power (gimple *stmt)
1074 REAL_VALUE_TYPE c, cint;
1075 HOST_WIDE_INT power;
1076 tree arg1;
1078 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1080 CASE_FLT_FN (BUILT_IN_POW):
1081 arg1 = gimple_call_arg (stmt, 1);
1082 c = TREE_REAL_CST (arg1);
1083 power = real_to_integer (&c) - 1;
1084 real_from_integer (&cint, VOIDmode, power, SIGNED);
1085 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1086 return power;
1088 CASE_FLT_FN (BUILT_IN_POWI):
1089 arg1 = gimple_call_arg (stmt, 1);
1090 power = TREE_INT_CST_LOW (arg1) - 1;
1091 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1092 return power;
1094 default:
1095 gcc_unreachable ();
1099 /* Find the single immediate use of STMT's LHS, and replace it
1100 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1101 replace *DEF with OP as well. */
1103 static void
1104 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1106 tree lhs;
1107 gimple *use_stmt;
1108 use_operand_p use;
1109 gimple_stmt_iterator gsi;
1111 if (is_gimple_call (stmt))
1112 lhs = gimple_call_lhs (stmt);
1113 else
1114 lhs = gimple_assign_lhs (stmt);
1116 gcc_assert (has_single_use (lhs));
1117 single_imm_use (lhs, &use, &use_stmt);
1118 if (lhs == *def)
1119 *def = op;
1120 SET_USE (use, op);
1121 if (TREE_CODE (op) != SSA_NAME)
1122 update_stmt (use_stmt);
1123 gsi = gsi_for_stmt (stmt);
1124 unlink_stmt_vdef (stmt);
1125 reassoc_remove_stmt (&gsi);
1126 release_defs (stmt);
1129 /* Walks the linear chain with result *DEF searching for an operation
1130 with operand OP and code OPCODE removing that from the chain. *DEF
1131 is updated if there is only one operand but no operation left. */
1133 static void
1134 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1136 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1140 tree name;
1142 if (opcode == MULT_EXPR
1143 && stmt_is_power_of_op (stmt, op))
1145 if (decrement_power (stmt) == 1)
1146 propagate_op_to_single_use (op, stmt, def);
1147 return;
1150 name = gimple_assign_rhs1 (stmt);
1152 /* If this is the operation we look for and one of the operands
1153 is ours simply propagate the other operand into the stmts
1154 single use. */
1155 if (gimple_assign_rhs_code (stmt) == opcode
1156 && (name == op
1157 || gimple_assign_rhs2 (stmt) == op))
1159 if (name == op)
1160 name = gimple_assign_rhs2 (stmt);
1161 propagate_op_to_single_use (name, stmt, def);
1162 return;
1165 /* We might have a multiply of two __builtin_pow* calls, and
1166 the operand might be hiding in the rightmost one. */
1167 if (opcode == MULT_EXPR
1168 && gimple_assign_rhs_code (stmt) == opcode
1169 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1170 && has_single_use (gimple_assign_rhs2 (stmt)))
1172 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1173 if (stmt_is_power_of_op (stmt2, op))
1175 if (decrement_power (stmt2) == 1)
1176 propagate_op_to_single_use (op, stmt2, def);
1177 return;
1181 /* Continue walking the chain. */
1182 gcc_assert (name != op
1183 && TREE_CODE (name) == SSA_NAME);
1184 stmt = SSA_NAME_DEF_STMT (name);
1186 while (1);
1189 /* Returns true if statement S1 dominates statement S2. Like
1190 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1192 static bool
1193 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1195 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1197 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1198 SSA_NAME. Assume it lives at the beginning of function and
1199 thus dominates everything. */
1200 if (!bb1 || s1 == s2)
1201 return true;
1203 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1204 if (!bb2)
1205 return false;
1207 if (bb1 == bb2)
1209 /* PHIs in the same basic block are assumed to be
1210 executed all in parallel, if only one stmt is a PHI,
1211 it dominates the other stmt in the same basic block. */
1212 if (gimple_code (s1) == GIMPLE_PHI)
1213 return true;
1215 if (gimple_code (s2) == GIMPLE_PHI)
1216 return false;
1218 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1220 if (gimple_uid (s1) < gimple_uid (s2))
1221 return true;
1223 if (gimple_uid (s1) > gimple_uid (s2))
1224 return false;
1226 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1227 unsigned int uid = gimple_uid (s1);
1228 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1230 gimple *s = gsi_stmt (gsi);
1231 if (gimple_uid (s) != uid)
1232 break;
1233 if (s == s2)
1234 return true;
1237 return false;
1240 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1243 /* Insert STMT after INSERT_POINT. */
1245 static void
1246 insert_stmt_after (gimple *stmt, gimple *insert_point)
1248 gimple_stmt_iterator gsi;
1249 basic_block bb;
1251 if (gimple_code (insert_point) == GIMPLE_PHI)
1252 bb = gimple_bb (insert_point);
1253 else if (!stmt_ends_bb_p (insert_point))
1255 gsi = gsi_for_stmt (insert_point);
1256 gimple_set_uid (stmt, gimple_uid (insert_point));
1257 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1258 return;
1260 else
1261 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1262 thus if it must end a basic block, it should be a call that can
1263 throw, or some assignment that can throw. If it throws, the LHS
1264 of it will not be initialized though, so only valid places using
1265 the SSA_NAME should be dominated by the fallthru edge. */
1266 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1267 gsi = gsi_after_labels (bb);
1268 if (gsi_end_p (gsi))
1270 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1271 gimple_set_uid (stmt,
1272 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1274 else
1275 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1276 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1279 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1280 the result. Places the statement after the definition of either
1281 OP1 or OP2. Returns the new statement. */
1283 static gimple *
1284 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1286 gimple *op1def = NULL, *op2def = NULL;
1287 gimple_stmt_iterator gsi;
1288 tree op;
1289 gassign *sum;
1291 /* Create the addition statement. */
1292 op = make_ssa_name (type);
1293 sum = gimple_build_assign (op, opcode, op1, op2);
1295 /* Find an insertion place and insert. */
1296 if (TREE_CODE (op1) == SSA_NAME)
1297 op1def = SSA_NAME_DEF_STMT (op1);
1298 if (TREE_CODE (op2) == SSA_NAME)
1299 op2def = SSA_NAME_DEF_STMT (op2);
1300 if ((!op1def || gimple_nop_p (op1def))
1301 && (!op2def || gimple_nop_p (op2def)))
1303 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1304 if (gsi_end_p (gsi))
1306 gimple_stmt_iterator gsi2
1307 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1308 gimple_set_uid (sum,
1309 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1311 else
1312 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1313 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1315 else
1317 gimple *insert_point;
1318 if ((!op1def || gimple_nop_p (op1def))
1319 || (op2def && !gimple_nop_p (op2def)
1320 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1321 insert_point = op2def;
1322 else
1323 insert_point = op1def;
1324 insert_stmt_after (sum, insert_point);
1326 update_stmt (sum);
1328 return sum;
1331 /* Perform un-distribution of divisions and multiplications.
1332 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1333 to (A + B) / X for real X.
1335 The algorithm is organized as follows.
1337 - First we walk the addition chain *OPS looking for summands that
1338 are defined by a multiplication or a real division. This results
1339 in the candidates bitmap with relevant indices into *OPS.
1341 - Second we build the chains of multiplications or divisions for
1342 these candidates, counting the number of occurrences of (operand, code)
1343 pairs in all of the candidates chains.
1345 - Third we sort the (operand, code) pairs by number of occurrence and
1346 process them starting with the pair with the most uses.
1348 * For each such pair we walk the candidates again to build a
1349 second candidate bitmap noting all multiplication/division chains
1350 that have at least one occurrence of (operand, code).
1352 * We build an alternate addition chain only covering these
1353 candidates with one (operand, code) operation removed from their
1354 multiplication/division chain.
1356 * The first candidate gets replaced by the alternate addition chain
1357 multiplied/divided by the operand.
1359 * All candidate chains get disabled for further processing and
1360 processing of (operand, code) pairs continues.
1362 The alternate addition chains built are re-processed by the main
1363 reassociation algorithm which allows optimizing a * x * y + b * y * x
1364 to (a + b ) * x * y in one invocation of the reassociation pass. */
1366 static bool
1367 undistribute_ops_list (enum tree_code opcode,
1368 vec<operand_entry_t> *ops, struct loop *loop)
1370 unsigned int length = ops->length ();
1371 operand_entry_t oe1;
1372 unsigned i, j;
1373 sbitmap candidates, candidates2;
1374 unsigned nr_candidates, nr_candidates2;
1375 sbitmap_iterator sbi0;
1376 vec<operand_entry_t> *subops;
1377 bool changed = false;
1378 int next_oecount_id = 0;
1380 if (length <= 1
1381 || opcode != PLUS_EXPR)
1382 return false;
1384 /* Build a list of candidates to process. */
1385 candidates = sbitmap_alloc (length);
1386 bitmap_clear (candidates);
1387 nr_candidates = 0;
1388 FOR_EACH_VEC_ELT (*ops, i, oe1)
1390 enum tree_code dcode;
1391 gimple *oe1def;
1393 if (TREE_CODE (oe1->op) != SSA_NAME)
1394 continue;
1395 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1396 if (!is_gimple_assign (oe1def))
1397 continue;
1398 dcode = gimple_assign_rhs_code (oe1def);
1399 if ((dcode != MULT_EXPR
1400 && dcode != RDIV_EXPR)
1401 || !is_reassociable_op (oe1def, dcode, loop))
1402 continue;
1404 bitmap_set_bit (candidates, i);
1405 nr_candidates++;
1408 if (nr_candidates < 2)
1410 sbitmap_free (candidates);
1411 return false;
1414 if (dump_file && (dump_flags & TDF_DETAILS))
1416 fprintf (dump_file, "searching for un-distribute opportunities ");
1417 print_generic_expr (dump_file,
1418 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1419 fprintf (dump_file, " %d\n", nr_candidates);
1422 /* Build linearized sub-operand lists and the counting table. */
1423 cvec.create (0);
1425 hash_table<oecount_hasher> ctable (15);
1427 /* ??? Macro arguments cannot have multi-argument template types in
1428 them. This typedef is needed to workaround that limitation. */
1429 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1430 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1431 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1433 gimple *oedef;
1434 enum tree_code oecode;
1435 unsigned j;
1437 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1438 oecode = gimple_assign_rhs_code (oedef);
1439 linearize_expr_tree (&subops[i], oedef,
1440 associative_tree_code (oecode), false);
1442 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1444 oecount c;
1445 int *slot;
1446 int idx;
1447 c.oecode = oecode;
1448 c.cnt = 1;
1449 c.id = next_oecount_id++;
1450 c.op = oe1->op;
1451 cvec.safe_push (c);
1452 idx = cvec.length () + 41;
1453 slot = ctable.find_slot (idx, INSERT);
1454 if (!*slot)
1456 *slot = idx;
1458 else
1460 cvec.pop ();
1461 cvec[*slot - 42].cnt++;
1466 /* Sort the counting table. */
1467 cvec.qsort (oecount_cmp);
1469 if (dump_file && (dump_flags & TDF_DETAILS))
1471 oecount *c;
1472 fprintf (dump_file, "Candidates:\n");
1473 FOR_EACH_VEC_ELT (cvec, j, c)
1475 fprintf (dump_file, " %u %s: ", c->cnt,
1476 c->oecode == MULT_EXPR
1477 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1478 print_generic_expr (dump_file, c->op, 0);
1479 fprintf (dump_file, "\n");
1483 /* Process the (operand, code) pairs in order of most occurrence. */
1484 candidates2 = sbitmap_alloc (length);
1485 while (!cvec.is_empty ())
1487 oecount *c = &cvec.last ();
1488 if (c->cnt < 2)
1489 break;
1491 /* Now collect the operands in the outer chain that contain
1492 the common operand in their inner chain. */
1493 bitmap_clear (candidates2);
1494 nr_candidates2 = 0;
1495 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1497 gimple *oedef;
1498 enum tree_code oecode;
1499 unsigned j;
1500 tree op = (*ops)[i]->op;
1502 /* If we undistributed in this chain already this may be
1503 a constant. */
1504 if (TREE_CODE (op) != SSA_NAME)
1505 continue;
1507 oedef = SSA_NAME_DEF_STMT (op);
1508 oecode = gimple_assign_rhs_code (oedef);
1509 if (oecode != c->oecode)
1510 continue;
1512 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1514 if (oe1->op == c->op)
1516 bitmap_set_bit (candidates2, i);
1517 ++nr_candidates2;
1518 break;
1523 if (nr_candidates2 >= 2)
1525 operand_entry_t oe1, oe2;
1526 gimple *prod;
1527 int first = bitmap_first_set_bit (candidates2);
1529 /* Build the new addition chain. */
1530 oe1 = (*ops)[first];
1531 if (dump_file && (dump_flags & TDF_DETAILS))
1533 fprintf (dump_file, "Building (");
1534 print_generic_expr (dump_file, oe1->op, 0);
1536 zero_one_operation (&oe1->op, c->oecode, c->op);
1537 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1539 gimple *sum;
1540 oe2 = (*ops)[i];
1541 if (dump_file && (dump_flags & TDF_DETAILS))
1543 fprintf (dump_file, " + ");
1544 print_generic_expr (dump_file, oe2->op, 0);
1546 zero_one_operation (&oe2->op, c->oecode, c->op);
1547 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1548 oe1->op, oe2->op, opcode);
1549 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1550 oe2->rank = 0;
1551 oe1->op = gimple_get_lhs (sum);
1554 /* Apply the multiplication/division. */
1555 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1556 oe1->op, c->op, c->oecode);
1557 if (dump_file && (dump_flags & TDF_DETAILS))
1559 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1560 print_generic_expr (dump_file, c->op, 0);
1561 fprintf (dump_file, "\n");
1564 /* Record it in the addition chain and disable further
1565 undistribution with this op. */
1566 oe1->op = gimple_assign_lhs (prod);
1567 oe1->rank = get_rank (oe1->op);
1568 subops[first].release ();
1570 changed = true;
1573 cvec.pop ();
1576 for (i = 0; i < ops->length (); ++i)
1577 subops[i].release ();
1578 free (subops);
1579 cvec.release ();
1580 sbitmap_free (candidates);
1581 sbitmap_free (candidates2);
1583 return changed;
1586 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1587 expression, examine the other OPS to see if any of them are comparisons
1588 of the same values, which we may be able to combine or eliminate.
1589 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1591 static bool
1592 eliminate_redundant_comparison (enum tree_code opcode,
1593 vec<operand_entry_t> *ops,
1594 unsigned int currindex,
1595 operand_entry_t curr)
1597 tree op1, op2;
1598 enum tree_code lcode, rcode;
1599 gimple *def1, *def2;
1600 int i;
1601 operand_entry_t oe;
1603 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1604 return false;
1606 /* Check that CURR is a comparison. */
1607 if (TREE_CODE (curr->op) != SSA_NAME)
1608 return false;
1609 def1 = SSA_NAME_DEF_STMT (curr->op);
1610 if (!is_gimple_assign (def1))
1611 return false;
1612 lcode = gimple_assign_rhs_code (def1);
1613 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1614 return false;
1615 op1 = gimple_assign_rhs1 (def1);
1616 op2 = gimple_assign_rhs2 (def1);
1618 /* Now look for a similar comparison in the remaining OPS. */
1619 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1621 tree t;
1623 if (TREE_CODE (oe->op) != SSA_NAME)
1624 continue;
1625 def2 = SSA_NAME_DEF_STMT (oe->op);
1626 if (!is_gimple_assign (def2))
1627 continue;
1628 rcode = gimple_assign_rhs_code (def2);
1629 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1630 continue;
1632 /* If we got here, we have a match. See if we can combine the
1633 two comparisons. */
1634 if (opcode == BIT_IOR_EXPR)
1635 t = maybe_fold_or_comparisons (lcode, op1, op2,
1636 rcode, gimple_assign_rhs1 (def2),
1637 gimple_assign_rhs2 (def2));
1638 else
1639 t = maybe_fold_and_comparisons (lcode, op1, op2,
1640 rcode, gimple_assign_rhs1 (def2),
1641 gimple_assign_rhs2 (def2));
1642 if (!t)
1643 continue;
1645 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1646 always give us a boolean_type_node value back. If the original
1647 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1648 we need to convert. */
1649 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1650 t = fold_convert (TREE_TYPE (curr->op), t);
1652 if (TREE_CODE (t) != INTEGER_CST
1653 && !operand_equal_p (t, curr->op, 0))
1655 enum tree_code subcode;
1656 tree newop1, newop2;
1657 if (!COMPARISON_CLASS_P (t))
1658 continue;
1659 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1660 STRIP_USELESS_TYPE_CONVERSION (newop1);
1661 STRIP_USELESS_TYPE_CONVERSION (newop2);
1662 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1663 continue;
1666 if (dump_file && (dump_flags & TDF_DETAILS))
1668 fprintf (dump_file, "Equivalence: ");
1669 print_generic_expr (dump_file, curr->op, 0);
1670 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1671 print_generic_expr (dump_file, oe->op, 0);
1672 fprintf (dump_file, " -> ");
1673 print_generic_expr (dump_file, t, 0);
1674 fprintf (dump_file, "\n");
1677 /* Now we can delete oe, as it has been subsumed by the new combined
1678 expression t. */
1679 ops->ordered_remove (i);
1680 reassociate_stats.ops_eliminated ++;
1682 /* If t is the same as curr->op, we're done. Otherwise we must
1683 replace curr->op with t. Special case is if we got a constant
1684 back, in which case we add it to the end instead of in place of
1685 the current entry. */
1686 if (TREE_CODE (t) == INTEGER_CST)
1688 ops->ordered_remove (currindex);
1689 add_to_ops_vec (ops, t);
1691 else if (!operand_equal_p (t, curr->op, 0))
1693 gimple *sum;
1694 enum tree_code subcode;
1695 tree newop1;
1696 tree newop2;
1697 gcc_assert (COMPARISON_CLASS_P (t));
1698 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1699 STRIP_USELESS_TYPE_CONVERSION (newop1);
1700 STRIP_USELESS_TYPE_CONVERSION (newop2);
1701 gcc_checking_assert (is_gimple_val (newop1)
1702 && is_gimple_val (newop2));
1703 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1704 curr->op = gimple_get_lhs (sum);
1706 return true;
1709 return false;
1712 /* Perform various identities and other optimizations on the list of
1713 operand entries, stored in OPS. The tree code for the binary
1714 operation between all the operands is OPCODE. */
1716 static void
1717 optimize_ops_list (enum tree_code opcode,
1718 vec<operand_entry_t> *ops)
1720 unsigned int length = ops->length ();
1721 unsigned int i;
1722 operand_entry_t oe;
1723 operand_entry_t oelast = NULL;
1724 bool iterate = false;
1726 if (length == 1)
1727 return;
1729 oelast = ops->last ();
1731 /* If the last two are constants, pop the constants off, merge them
1732 and try the next two. */
1733 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1735 operand_entry_t oelm1 = (*ops)[length - 2];
1737 if (oelm1->rank == 0
1738 && is_gimple_min_invariant (oelm1->op)
1739 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1740 TREE_TYPE (oelast->op)))
1742 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1743 oelm1->op, oelast->op);
1745 if (folded && is_gimple_min_invariant (folded))
1747 if (dump_file && (dump_flags & TDF_DETAILS))
1748 fprintf (dump_file, "Merging constants\n");
1750 ops->pop ();
1751 ops->pop ();
1753 add_to_ops_vec (ops, folded);
1754 reassociate_stats.constants_eliminated++;
1756 optimize_ops_list (opcode, ops);
1757 return;
1762 eliminate_using_constants (opcode, ops);
1763 oelast = NULL;
1765 for (i = 0; ops->iterate (i, &oe);)
1767 bool done = false;
1769 if (eliminate_not_pairs (opcode, ops, i, oe))
1770 return;
1771 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1772 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1773 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1775 if (done)
1776 return;
1777 iterate = true;
1778 oelast = NULL;
1779 continue;
1781 oelast = oe;
1782 i++;
1785 length = ops->length ();
1786 oelast = ops->last ();
1788 if (iterate)
1789 optimize_ops_list (opcode, ops);
1792 /* The following functions are subroutines to optimize_range_tests and allow
1793 it to try to change a logical combination of comparisons into a range
1794 test.
1796 For example, both
1797 X == 2 || X == 5 || X == 3 || X == 4
1799 X >= 2 && X <= 5
1800 are converted to
1801 (unsigned) (X - 2) <= 3
1803 For more information see comments above fold_test_range in fold-const.c,
1804 this implementation is for GIMPLE. */
1806 struct range_entry
1808 tree exp;
1809 tree low;
1810 tree high;
1811 bool in_p;
1812 bool strict_overflow_p;
1813 unsigned int idx, next;
1816 /* This is similar to make_range in fold-const.c, but on top of
1817 GIMPLE instead of trees. If EXP is non-NULL, it should be
1818 an SSA_NAME and STMT argument is ignored, otherwise STMT
1819 argument should be a GIMPLE_COND. */
1821 static void
1822 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
1824 int in_p;
1825 tree low, high;
1826 bool is_bool, strict_overflow_p;
1828 r->exp = NULL_TREE;
1829 r->in_p = false;
1830 r->strict_overflow_p = false;
1831 r->low = NULL_TREE;
1832 r->high = NULL_TREE;
1833 if (exp != NULL_TREE
1834 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1835 return;
1837 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1838 and see if we can refine the range. Some of the cases below may not
1839 happen, but it doesn't seem worth worrying about this. We "continue"
1840 the outer loop when we've changed something; otherwise we "break"
1841 the switch, which will "break" the while. */
1842 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1843 high = low;
1844 in_p = 0;
1845 strict_overflow_p = false;
1846 is_bool = false;
1847 if (exp == NULL_TREE)
1848 is_bool = true;
1849 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1851 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1852 is_bool = true;
1853 else
1854 return;
1856 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1857 is_bool = true;
1859 while (1)
1861 enum tree_code code;
1862 tree arg0, arg1, exp_type;
1863 tree nexp;
1864 location_t loc;
1866 if (exp != NULL_TREE)
1868 if (TREE_CODE (exp) != SSA_NAME
1869 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1870 break;
1872 stmt = SSA_NAME_DEF_STMT (exp);
1873 if (!is_gimple_assign (stmt))
1874 break;
1876 code = gimple_assign_rhs_code (stmt);
1877 arg0 = gimple_assign_rhs1 (stmt);
1878 arg1 = gimple_assign_rhs2 (stmt);
1879 exp_type = TREE_TYPE (exp);
1881 else
1883 code = gimple_cond_code (stmt);
1884 arg0 = gimple_cond_lhs (stmt);
1885 arg1 = gimple_cond_rhs (stmt);
1886 exp_type = boolean_type_node;
1889 if (TREE_CODE (arg0) != SSA_NAME)
1890 break;
1891 loc = gimple_location (stmt);
1892 switch (code)
1894 case BIT_NOT_EXPR:
1895 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1896 /* Ensure the range is either +[-,0], +[0,0],
1897 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1898 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1899 or similar expression of unconditional true or
1900 false, it should not be negated. */
1901 && ((high && integer_zerop (high))
1902 || (low && integer_onep (low))))
1904 in_p = !in_p;
1905 exp = arg0;
1906 continue;
1908 break;
1909 case SSA_NAME:
1910 exp = arg0;
1911 continue;
1912 CASE_CONVERT:
1913 if (is_bool)
1914 goto do_default;
1915 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1917 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1918 is_bool = true;
1919 else
1920 return;
1922 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1923 is_bool = true;
1924 goto do_default;
1925 case EQ_EXPR:
1926 case NE_EXPR:
1927 case LT_EXPR:
1928 case LE_EXPR:
1929 case GE_EXPR:
1930 case GT_EXPR:
1931 is_bool = true;
1932 /* FALLTHRU */
1933 default:
1934 if (!is_bool)
1935 return;
1936 do_default:
1937 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1938 &low, &high, &in_p,
1939 &strict_overflow_p);
1940 if (nexp != NULL_TREE)
1942 exp = nexp;
1943 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1944 continue;
1946 break;
1948 break;
1950 if (is_bool)
1952 r->exp = exp;
1953 r->in_p = in_p;
1954 r->low = low;
1955 r->high = high;
1956 r->strict_overflow_p = strict_overflow_p;
1960 /* Comparison function for qsort. Sort entries
1961 without SSA_NAME exp first, then with SSA_NAMEs sorted
1962 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1963 by increasing ->low and if ->low is the same, by increasing
1964 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1965 maximum. */
1967 static int
1968 range_entry_cmp (const void *a, const void *b)
1970 const struct range_entry *p = (const struct range_entry *) a;
1971 const struct range_entry *q = (const struct range_entry *) b;
1973 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1975 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1977 /* Group range_entries for the same SSA_NAME together. */
1978 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1979 return -1;
1980 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1981 return 1;
1982 /* If ->low is different, NULL low goes first, then by
1983 ascending low. */
1984 if (p->low != NULL_TREE)
1986 if (q->low != NULL_TREE)
1988 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1989 p->low, q->low);
1990 if (tem && integer_onep (tem))
1991 return -1;
1992 tem = fold_binary (GT_EXPR, boolean_type_node,
1993 p->low, q->low);
1994 if (tem && integer_onep (tem))
1995 return 1;
1997 else
1998 return 1;
2000 else if (q->low != NULL_TREE)
2001 return -1;
2002 /* If ->high is different, NULL high goes last, before that by
2003 ascending high. */
2004 if (p->high != NULL_TREE)
2006 if (q->high != NULL_TREE)
2008 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2009 p->high, q->high);
2010 if (tem && integer_onep (tem))
2011 return -1;
2012 tem = fold_binary (GT_EXPR, boolean_type_node,
2013 p->high, q->high);
2014 if (tem && integer_onep (tem))
2015 return 1;
2017 else
2018 return -1;
2020 else if (q->high != NULL_TREE)
2021 return 1;
2022 /* If both ranges are the same, sort below by ascending idx. */
2024 else
2025 return 1;
2027 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2028 return -1;
2030 if (p->idx < q->idx)
2031 return -1;
2032 else
2034 gcc_checking_assert (p->idx > q->idx);
2035 return 1;
2039 /* Helper routine of optimize_range_test.
2040 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2041 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2042 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2043 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2044 an array of COUNT pointers to other ranges. Return
2045 true if the range merge has been successful.
2046 If OPCODE is ERROR_MARK, this is called from within
2047 maybe_optimize_range_tests and is performing inter-bb range optimization.
2048 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2049 oe->rank. */
2051 static bool
2052 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2053 struct range_entry **otherrangep,
2054 unsigned int count, enum tree_code opcode,
2055 vec<operand_entry_t> *ops, tree exp, gimple_seq seq,
2056 bool in_p, tree low, tree high, bool strict_overflow_p)
2058 operand_entry_t oe = (*ops)[range->idx];
2059 tree op = oe->op;
2060 gimple *stmt = op ? SSA_NAME_DEF_STMT (op) :
2061 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2062 location_t loc = gimple_location (stmt);
2063 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2064 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2065 in_p, low, high);
2066 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2067 gimple_stmt_iterator gsi;
2068 unsigned int i;
2070 if (tem == NULL_TREE)
2071 return false;
2073 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2074 warning_at (loc, OPT_Wstrict_overflow,
2075 "assuming signed overflow does not occur "
2076 "when simplifying range test");
2078 if (dump_file && (dump_flags & TDF_DETAILS))
2080 struct range_entry *r;
2081 fprintf (dump_file, "Optimizing range tests ");
2082 print_generic_expr (dump_file, range->exp, 0);
2083 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2084 print_generic_expr (dump_file, range->low, 0);
2085 fprintf (dump_file, ", ");
2086 print_generic_expr (dump_file, range->high, 0);
2087 fprintf (dump_file, "]");
2088 for (i = 0; i < count; i++)
2090 if (otherrange)
2091 r = otherrange + i;
2092 else
2093 r = otherrangep[i];
2094 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2095 print_generic_expr (dump_file, r->low, 0);
2096 fprintf (dump_file, ", ");
2097 print_generic_expr (dump_file, r->high, 0);
2098 fprintf (dump_file, "]");
2100 fprintf (dump_file, "\n into ");
2101 print_generic_expr (dump_file, tem, 0);
2102 fprintf (dump_file, "\n");
2105 if (opcode == BIT_IOR_EXPR
2106 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2107 tem = invert_truthvalue_loc (loc, tem);
2109 tem = fold_convert_loc (loc, optype, tem);
2110 gsi = gsi_for_stmt (stmt);
2111 unsigned int uid = gimple_uid (stmt);
2112 /* In rare cases range->exp can be equal to lhs of stmt.
2113 In that case we have to insert after the stmt rather then before
2114 it. If stmt is a PHI, insert it at the start of the basic block. */
2115 if (op != range->exp)
2117 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2118 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2119 GSI_SAME_STMT);
2120 gsi_prev (&gsi);
2122 else if (gimple_code (stmt) != GIMPLE_PHI)
2124 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2125 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2126 GSI_CONTINUE_LINKING);
2128 else
2130 gsi = gsi_after_labels (gimple_bb (stmt));
2131 if (!gsi_end_p (gsi))
2132 uid = gimple_uid (gsi_stmt (gsi));
2133 else
2135 gsi = gsi_start_bb (gimple_bb (stmt));
2136 uid = 1;
2137 while (!gsi_end_p (gsi))
2139 uid = gimple_uid (gsi_stmt (gsi));
2140 gsi_next (&gsi);
2143 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2144 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2145 GSI_SAME_STMT);
2146 if (gsi_end_p (gsi))
2147 gsi = gsi_last_bb (gimple_bb (stmt));
2148 else
2149 gsi_prev (&gsi);
2151 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2152 if (gimple_uid (gsi_stmt (gsi)))
2153 break;
2154 else
2155 gimple_set_uid (gsi_stmt (gsi), uid);
2157 oe->op = tem;
2158 range->exp = exp;
2159 range->low = low;
2160 range->high = high;
2161 range->in_p = in_p;
2162 range->strict_overflow_p = false;
2164 for (i = 0; i < count; i++)
2166 if (otherrange)
2167 range = otherrange + i;
2168 else
2169 range = otherrangep[i];
2170 oe = (*ops)[range->idx];
2171 /* Now change all the other range test immediate uses, so that
2172 those tests will be optimized away. */
2173 if (opcode == ERROR_MARK)
2175 if (oe->op)
2176 oe->op = build_int_cst (TREE_TYPE (oe->op),
2177 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2178 else
2179 oe->op = (oe->rank == BIT_IOR_EXPR
2180 ? boolean_false_node : boolean_true_node);
2182 else
2183 oe->op = error_mark_node;
2184 range->exp = NULL_TREE;
2186 return true;
2189 /* Optimize X == CST1 || X == CST2
2190 if popcount (CST1 ^ CST2) == 1 into
2191 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2192 Similarly for ranges. E.g.
2193 X != 2 && X != 3 && X != 10 && X != 11
2194 will be transformed by the previous optimization into
2195 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2196 and this loop can transform that into
2197 !(((X & ~8) - 2U) <= 1U). */
2199 static bool
2200 optimize_range_tests_xor (enum tree_code opcode, tree type,
2201 tree lowi, tree lowj, tree highi, tree highj,
2202 vec<operand_entry_t> *ops,
2203 struct range_entry *rangei,
2204 struct range_entry *rangej)
2206 tree lowxor, highxor, tem, exp;
2207 /* Check lowi ^ lowj == highi ^ highj and
2208 popcount (lowi ^ lowj) == 1. */
2209 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2210 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2211 return false;
2212 if (!integer_pow2p (lowxor))
2213 return false;
2214 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2215 if (!tree_int_cst_equal (lowxor, highxor))
2216 return false;
2218 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2219 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2220 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2221 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2222 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2223 NULL, rangei->in_p, lowj, highj,
2224 rangei->strict_overflow_p
2225 || rangej->strict_overflow_p))
2226 return true;
2227 return false;
2230 /* Optimize X == CST1 || X == CST2
2231 if popcount (CST2 - CST1) == 1 into
2232 ((X - CST1) & ~(CST2 - CST1)) == 0.
2233 Similarly for ranges. E.g.
2234 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2235 || X == 75 || X == 45
2236 will be transformed by the previous optimization into
2237 (X - 43U) <= 3U || (X - 75U) <= 3U
2238 and this loop can transform that into
2239 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2240 static bool
2241 optimize_range_tests_diff (enum tree_code opcode, tree type,
2242 tree lowi, tree lowj, tree highi, tree highj,
2243 vec<operand_entry_t> *ops,
2244 struct range_entry *rangei,
2245 struct range_entry *rangej)
2247 tree tem1, tem2, mask;
2248 /* Check highi - lowi == highj - lowj. */
2249 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2250 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2251 return false;
2252 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2253 if (!tree_int_cst_equal (tem1, tem2))
2254 return false;
2255 /* Check popcount (lowj - lowi) == 1. */
2256 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2257 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2258 return false;
2259 if (!integer_pow2p (tem1))
2260 return false;
2262 type = unsigned_type_for (type);
2263 tem1 = fold_convert (type, tem1);
2264 tem2 = fold_convert (type, tem2);
2265 lowi = fold_convert (type, lowi);
2266 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2267 tem1 = fold_binary (MINUS_EXPR, type,
2268 fold_convert (type, rangei->exp), lowi);
2269 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2270 lowj = build_int_cst (type, 0);
2271 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2272 NULL, rangei->in_p, lowj, tem2,
2273 rangei->strict_overflow_p
2274 || rangej->strict_overflow_p))
2275 return true;
2276 return false;
2279 /* It does some common checks for function optimize_range_tests_xor and
2280 optimize_range_tests_diff.
2281 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2282 Else it calls optimize_range_tests_diff. */
2284 static bool
2285 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2286 bool optimize_xor, vec<operand_entry_t> *ops,
2287 struct range_entry *ranges)
2289 int i, j;
2290 bool any_changes = false;
2291 for (i = first; i < length; i++)
2293 tree lowi, highi, lowj, highj, type, tem;
2295 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2296 continue;
2297 type = TREE_TYPE (ranges[i].exp);
2298 if (!INTEGRAL_TYPE_P (type))
2299 continue;
2300 lowi = ranges[i].low;
2301 if (lowi == NULL_TREE)
2302 lowi = TYPE_MIN_VALUE (type);
2303 highi = ranges[i].high;
2304 if (highi == NULL_TREE)
2305 continue;
2306 for (j = i + 1; j < length && j < i + 64; j++)
2308 bool changes;
2309 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2310 continue;
2311 lowj = ranges[j].low;
2312 if (lowj == NULL_TREE)
2313 continue;
2314 highj = ranges[j].high;
2315 if (highj == NULL_TREE)
2316 highj = TYPE_MAX_VALUE (type);
2317 /* Check lowj > highi. */
2318 tem = fold_binary (GT_EXPR, boolean_type_node,
2319 lowj, highi);
2320 if (tem == NULL_TREE || !integer_onep (tem))
2321 continue;
2322 if (optimize_xor)
2323 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2324 highi, highj, ops,
2325 ranges + i, ranges + j);
2326 else
2327 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2328 highi, highj, ops,
2329 ranges + i, ranges + j);
2330 if (changes)
2332 any_changes = true;
2333 break;
2337 return any_changes;
2340 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2341 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2342 EXP on success, NULL otherwise. */
2344 static tree
2345 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2346 wide_int *mask, tree *totallowp)
2348 tree tem = int_const_binop (MINUS_EXPR, high, low);
2349 if (tem == NULL_TREE
2350 || TREE_CODE (tem) != INTEGER_CST
2351 || TREE_OVERFLOW (tem)
2352 || tree_int_cst_sgn (tem) == -1
2353 || compare_tree_int (tem, prec) != -1)
2354 return NULL_TREE;
2356 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2357 *mask = wi::shifted_mask (0, max, false, prec);
2358 if (TREE_CODE (exp) == BIT_AND_EXPR
2359 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2361 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2362 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2363 if (wi::popcount (msk) == 1
2364 && wi::ltu_p (msk, prec - max))
2366 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2367 max += msk.to_uhwi ();
2368 exp = TREE_OPERAND (exp, 0);
2369 if (integer_zerop (low)
2370 && TREE_CODE (exp) == PLUS_EXPR
2371 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2373 tree ret = TREE_OPERAND (exp, 0);
2374 STRIP_NOPS (ret);
2375 widest_int bias
2376 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2377 TYPE_PRECISION (TREE_TYPE (low))));
2378 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2379 if (totallowp)
2381 *totallowp = tbias;
2382 return ret;
2384 else if (!tree_int_cst_lt (totallow, tbias))
2385 return NULL_TREE;
2386 bias = wi::to_widest (tbias);
2387 bias -= wi::to_widest (totallow);
2388 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2390 *mask = wi::lshift (*mask, bias);
2391 return ret;
2396 if (totallowp)
2397 return exp;
2398 if (!tree_int_cst_lt (totallow, low))
2399 return exp;
2400 tem = int_const_binop (MINUS_EXPR, low, totallow);
2401 if (tem == NULL_TREE
2402 || TREE_CODE (tem) != INTEGER_CST
2403 || TREE_OVERFLOW (tem)
2404 || compare_tree_int (tem, prec - max) == 1)
2405 return NULL_TREE;
2407 *mask = wi::lshift (*mask, wi::to_widest (tem));
2408 return exp;
2411 /* Attempt to optimize small range tests using bit test.
2412 E.g.
2413 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2414 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2415 has been by earlier optimizations optimized into:
2416 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2417 As all the 43 through 82 range is less than 64 numbers,
2418 for 64-bit word targets optimize that into:
2419 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2421 static bool
2422 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2423 vec<operand_entry_t> *ops,
2424 struct range_entry *ranges)
2426 int i, j;
2427 bool any_changes = false;
2428 int prec = GET_MODE_BITSIZE (word_mode);
2429 auto_vec<struct range_entry *, 64> candidates;
2431 for (i = first; i < length - 2; i++)
2433 tree lowi, highi, lowj, highj, type;
2435 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2436 continue;
2437 type = TREE_TYPE (ranges[i].exp);
2438 if (!INTEGRAL_TYPE_P (type))
2439 continue;
2440 lowi = ranges[i].low;
2441 if (lowi == NULL_TREE)
2442 lowi = TYPE_MIN_VALUE (type);
2443 highi = ranges[i].high;
2444 if (highi == NULL_TREE)
2445 continue;
2446 wide_int mask;
2447 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2448 highi, &mask, &lowi);
2449 if (exp == NULL_TREE)
2450 continue;
2451 bool strict_overflow_p = ranges[i].strict_overflow_p;
2452 candidates.truncate (0);
2453 int end = MIN (i + 64, length);
2454 for (j = i + 1; j < end; j++)
2456 tree exp2;
2457 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2458 continue;
2459 if (ranges[j].exp == exp)
2461 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2463 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2464 if (exp2 == exp)
2466 else if (TREE_CODE (exp2) == PLUS_EXPR)
2468 exp2 = TREE_OPERAND (exp2, 0);
2469 STRIP_NOPS (exp2);
2470 if (exp2 != exp)
2471 continue;
2473 else
2474 continue;
2476 else
2477 continue;
2478 lowj = ranges[j].low;
2479 if (lowj == NULL_TREE)
2480 continue;
2481 highj = ranges[j].high;
2482 if (highj == NULL_TREE)
2483 highj = TYPE_MAX_VALUE (type);
2484 wide_int mask2;
2485 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2486 highj, &mask2, NULL);
2487 if (exp2 != exp)
2488 continue;
2489 mask |= mask2;
2490 strict_overflow_p |= ranges[j].strict_overflow_p;
2491 candidates.safe_push (&ranges[j]);
2494 /* If we need otherwise 3 or more comparisons, use a bit test. */
2495 if (candidates.length () >= 2)
2497 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2498 wi::to_widest (lowi)
2499 + prec - 1 - wi::clz (mask));
2500 operand_entry_t oe = (*ops)[ranges[i].idx];
2501 tree op = oe->op;
2502 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2503 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2504 location_t loc = gimple_location (stmt);
2505 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2507 /* See if it isn't cheaper to pretend the minimum value of the
2508 range is 0, if maximum value is small enough.
2509 We can avoid then subtraction of the minimum value, but the
2510 mask constant could be perhaps more expensive. */
2511 if (compare_tree_int (lowi, 0) > 0
2512 && compare_tree_int (high, prec) < 0)
2514 int cost_diff;
2515 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2516 rtx reg = gen_raw_REG (word_mode, 10000);
2517 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2518 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2519 GEN_INT (-m)), speed_p);
2520 rtx r = immed_wide_int_const (mask, word_mode);
2521 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2522 word_mode, speed_p);
2523 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2524 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2525 word_mode, speed_p);
2526 if (cost_diff > 0)
2528 mask = wi::lshift (mask, m);
2529 lowi = build_zero_cst (TREE_TYPE (lowi));
2533 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2534 false, lowi, high);
2535 if (tem == NULL_TREE || is_gimple_val (tem))
2536 continue;
2537 tree etype = unsigned_type_for (TREE_TYPE (exp));
2538 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2539 fold_convert_loc (loc, etype, exp),
2540 fold_convert_loc (loc, etype, lowi));
2541 exp = fold_convert_loc (loc, integer_type_node, exp);
2542 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2543 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2544 build_int_cst (word_type, 1), exp);
2545 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2546 wide_int_to_tree (word_type, mask));
2547 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2548 build_zero_cst (word_type));
2549 if (is_gimple_val (exp))
2550 continue;
2552 /* The shift might have undefined behavior if TEM is true,
2553 but reassociate_bb isn't prepared to have basic blocks
2554 split when it is running. So, temporarily emit a code
2555 with BIT_IOR_EXPR instead of &&, and fix it up in
2556 branch_fixup. */
2557 gimple_seq seq;
2558 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2559 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2560 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2561 gimple_seq seq2;
2562 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2563 gimple_seq_add_seq_without_update (&seq, seq2);
2564 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2565 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2566 gimple *g = gimple_build_assign (make_ssa_name (optype),
2567 BIT_IOR_EXPR, tem, exp);
2568 gimple_set_location (g, loc);
2569 gimple_seq_add_stmt_without_update (&seq, g);
2570 exp = gimple_assign_lhs (g);
2571 tree val = build_zero_cst (optype);
2572 if (update_range_test (&ranges[i], NULL, candidates.address (),
2573 candidates.length (), opcode, ops, exp,
2574 seq, false, val, val, strict_overflow_p))
2576 any_changes = true;
2577 reassoc_branch_fixups.safe_push (tem);
2579 else
2580 gimple_seq_discard (seq);
2583 return any_changes;
2586 /* Optimize range tests, similarly how fold_range_test optimizes
2587 it on trees. The tree code for the binary
2588 operation between all the operands is OPCODE.
2589 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2590 maybe_optimize_range_tests for inter-bb range optimization.
2591 In that case if oe->op is NULL, oe->id is bb->index whose
2592 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2593 the actual opcode. */
2595 static bool
2596 optimize_range_tests (enum tree_code opcode,
2597 vec<operand_entry_t> *ops)
2599 unsigned int length = ops->length (), i, j, first;
2600 operand_entry_t oe;
2601 struct range_entry *ranges;
2602 bool any_changes = false;
2604 if (length == 1)
2605 return false;
2607 ranges = XNEWVEC (struct range_entry, length);
2608 for (i = 0; i < length; i++)
2610 oe = (*ops)[i];
2611 ranges[i].idx = i;
2612 init_range_entry (ranges + i, oe->op,
2613 oe->op ? NULL :
2614 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2615 /* For | invert it now, we will invert it again before emitting
2616 the optimized expression. */
2617 if (opcode == BIT_IOR_EXPR
2618 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2619 ranges[i].in_p = !ranges[i].in_p;
2622 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2623 for (i = 0; i < length; i++)
2624 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2625 break;
2627 /* Try to merge ranges. */
2628 for (first = i; i < length; i++)
2630 tree low = ranges[i].low;
2631 tree high = ranges[i].high;
2632 int in_p = ranges[i].in_p;
2633 bool strict_overflow_p = ranges[i].strict_overflow_p;
2634 int update_fail_count = 0;
2636 for (j = i + 1; j < length; j++)
2638 if (ranges[i].exp != ranges[j].exp)
2639 break;
2640 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2641 ranges[j].in_p, ranges[j].low, ranges[j].high))
2642 break;
2643 strict_overflow_p |= ranges[j].strict_overflow_p;
2646 if (j == i + 1)
2647 continue;
2649 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2650 opcode, ops, ranges[i].exp, NULL, in_p,
2651 low, high, strict_overflow_p))
2653 i = j - 1;
2654 any_changes = true;
2656 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2657 while update_range_test would fail. */
2658 else if (update_fail_count == 64)
2659 i = j - 1;
2660 else
2661 ++update_fail_count;
2664 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2665 ops, ranges);
2667 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2668 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2669 ops, ranges);
2670 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2671 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2672 ops, ranges);
2674 if (any_changes && opcode != ERROR_MARK)
2676 j = 0;
2677 FOR_EACH_VEC_ELT (*ops, i, oe)
2679 if (oe->op == error_mark_node)
2680 continue;
2681 else if (i != j)
2682 (*ops)[j] = oe;
2683 j++;
2685 ops->truncate (j);
2688 XDELETEVEC (ranges);
2689 return any_changes;
2692 /* Return true if STMT is a cast like:
2693 <bb N>:
2695 _123 = (int) _234;
2697 <bb M>:
2698 # _345 = PHI <_123(N), 1(...), 1(...)>
2699 where _234 has bool type, _123 has single use and
2700 bb N has a single successor M. This is commonly used in
2701 the last block of a range test. */
2703 static bool
2704 final_range_test_p (gimple *stmt)
2706 basic_block bb, rhs_bb;
2707 edge e;
2708 tree lhs, rhs;
2709 use_operand_p use_p;
2710 gimple *use_stmt;
2712 if (!gimple_assign_cast_p (stmt))
2713 return false;
2714 bb = gimple_bb (stmt);
2715 if (!single_succ_p (bb))
2716 return false;
2717 e = single_succ_edge (bb);
2718 if (e->flags & EDGE_COMPLEX)
2719 return false;
2721 lhs = gimple_assign_lhs (stmt);
2722 rhs = gimple_assign_rhs1 (stmt);
2723 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2724 || TREE_CODE (rhs) != SSA_NAME
2725 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2726 return false;
2728 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2729 if (!single_imm_use (lhs, &use_p, &use_stmt))
2730 return false;
2732 if (gimple_code (use_stmt) != GIMPLE_PHI
2733 || gimple_bb (use_stmt) != e->dest)
2734 return false;
2736 /* And that the rhs is defined in the same loop. */
2737 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2738 if (rhs_bb == NULL
2739 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2740 return false;
2742 return true;
2745 /* Return true if BB is suitable basic block for inter-bb range test
2746 optimization. If BACKWARD is true, BB should be the only predecessor
2747 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2748 or compared with to find a common basic block to which all conditions
2749 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2750 be the only predecessor of BB. */
2752 static bool
2753 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2754 bool backward)
2756 edge_iterator ei, ei2;
2757 edge e, e2;
2758 gimple *stmt;
2759 gphi_iterator gsi;
2760 bool other_edge_seen = false;
2761 bool is_cond;
2763 if (test_bb == bb)
2764 return false;
2765 /* Check last stmt first. */
2766 stmt = last_stmt (bb);
2767 if (stmt == NULL
2768 || (gimple_code (stmt) != GIMPLE_COND
2769 && (backward || !final_range_test_p (stmt)))
2770 || gimple_visited_p (stmt)
2771 || stmt_could_throw_p (stmt)
2772 || *other_bb == bb)
2773 return false;
2774 is_cond = gimple_code (stmt) == GIMPLE_COND;
2775 if (is_cond)
2777 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2778 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2779 to *OTHER_BB (if not set yet, try to find it out). */
2780 if (EDGE_COUNT (bb->succs) != 2)
2781 return false;
2782 FOR_EACH_EDGE (e, ei, bb->succs)
2784 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2785 return false;
2786 if (e->dest == test_bb)
2788 if (backward)
2789 continue;
2790 else
2791 return false;
2793 if (e->dest == bb)
2794 return false;
2795 if (*other_bb == NULL)
2797 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2798 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2799 return false;
2800 else if (e->dest == e2->dest)
2801 *other_bb = e->dest;
2802 if (*other_bb == NULL)
2803 return false;
2805 if (e->dest == *other_bb)
2806 other_edge_seen = true;
2807 else if (backward)
2808 return false;
2810 if (*other_bb == NULL || !other_edge_seen)
2811 return false;
2813 else if (single_succ (bb) != *other_bb)
2814 return false;
2816 /* Now check all PHIs of *OTHER_BB. */
2817 e = find_edge (bb, *other_bb);
2818 e2 = find_edge (test_bb, *other_bb);
2819 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2821 gphi *phi = gsi.phi ();
2822 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2823 corresponding to BB and TEST_BB predecessor must be the same. */
2824 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2825 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2827 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2828 one of the PHIs should have the lhs of the last stmt in
2829 that block as PHI arg and that PHI should have 0 or 1
2830 corresponding to it in all other range test basic blocks
2831 considered. */
2832 if (!is_cond)
2834 if (gimple_phi_arg_def (phi, e->dest_idx)
2835 == gimple_assign_lhs (stmt)
2836 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2837 || integer_onep (gimple_phi_arg_def (phi,
2838 e2->dest_idx))))
2839 continue;
2841 else
2843 gimple *test_last = last_stmt (test_bb);
2844 if (gimple_code (test_last) != GIMPLE_COND
2845 && gimple_phi_arg_def (phi, e2->dest_idx)
2846 == gimple_assign_lhs (test_last)
2847 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2848 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2849 continue;
2852 return false;
2855 return true;
2858 /* Return true if BB doesn't have side-effects that would disallow
2859 range test optimization, all SSA_NAMEs set in the bb are consumed
2860 in the bb and there are no PHIs. */
2862 static bool
2863 no_side_effect_bb (basic_block bb)
2865 gimple_stmt_iterator gsi;
2866 gimple *last;
2868 if (!gimple_seq_empty_p (phi_nodes (bb)))
2869 return false;
2870 last = last_stmt (bb);
2871 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2873 gimple *stmt = gsi_stmt (gsi);
2874 tree lhs;
2875 imm_use_iterator imm_iter;
2876 use_operand_p use_p;
2878 if (is_gimple_debug (stmt))
2879 continue;
2880 if (gimple_has_side_effects (stmt))
2881 return false;
2882 if (stmt == last)
2883 return true;
2884 if (!is_gimple_assign (stmt))
2885 return false;
2886 lhs = gimple_assign_lhs (stmt);
2887 if (TREE_CODE (lhs) != SSA_NAME)
2888 return false;
2889 if (gimple_assign_rhs_could_trap_p (stmt))
2890 return false;
2891 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2893 gimple *use_stmt = USE_STMT (use_p);
2894 if (is_gimple_debug (use_stmt))
2895 continue;
2896 if (gimple_bb (use_stmt) != bb)
2897 return false;
2900 return false;
2903 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2904 return true and fill in *OPS recursively. */
2906 static bool
2907 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2908 struct loop *loop)
2910 gimple *stmt = SSA_NAME_DEF_STMT (var);
2911 tree rhs[2];
2912 int i;
2914 if (!is_reassociable_op (stmt, code, loop))
2915 return false;
2917 rhs[0] = gimple_assign_rhs1 (stmt);
2918 rhs[1] = gimple_assign_rhs2 (stmt);
2919 gimple_set_visited (stmt, true);
2920 for (i = 0; i < 2; i++)
2921 if (TREE_CODE (rhs[i]) == SSA_NAME
2922 && !get_ops (rhs[i], code, ops, loop)
2923 && has_single_use (rhs[i]))
2925 operand_entry_t oe = operand_entry_pool.allocate ();
2927 oe->op = rhs[i];
2928 oe->rank = code;
2929 oe->id = 0;
2930 oe->count = 1;
2931 ops->safe_push (oe);
2933 return true;
2936 /* Find the ops that were added by get_ops starting from VAR, see if
2937 they were changed during update_range_test and if yes, create new
2938 stmts. */
2940 static tree
2941 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2942 unsigned int *pidx, struct loop *loop)
2944 gimple *stmt = SSA_NAME_DEF_STMT (var);
2945 tree rhs[4];
2946 int i;
2948 if (!is_reassociable_op (stmt, code, loop))
2949 return NULL;
2951 rhs[0] = gimple_assign_rhs1 (stmt);
2952 rhs[1] = gimple_assign_rhs2 (stmt);
2953 rhs[2] = rhs[0];
2954 rhs[3] = rhs[1];
2955 for (i = 0; i < 2; i++)
2956 if (TREE_CODE (rhs[i]) == SSA_NAME)
2958 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2959 if (rhs[2 + i] == NULL_TREE)
2961 if (has_single_use (rhs[i]))
2962 rhs[2 + i] = ops[(*pidx)++]->op;
2963 else
2964 rhs[2 + i] = rhs[i];
2967 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2968 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2970 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2971 var = make_ssa_name (TREE_TYPE (var));
2972 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
2973 rhs[2], rhs[3]);
2974 gimple_set_uid (g, gimple_uid (stmt));
2975 gimple_set_visited (g, true);
2976 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2978 return var;
2981 /* Structure to track the initial value passed to get_ops and
2982 the range in the ops vector for each basic block. */
2984 struct inter_bb_range_test_entry
2986 tree op;
2987 unsigned int first_idx, last_idx;
2990 /* Inter-bb range test optimization. */
2992 static void
2993 maybe_optimize_range_tests (gimple *stmt)
2995 basic_block first_bb = gimple_bb (stmt);
2996 basic_block last_bb = first_bb;
2997 basic_block other_bb = NULL;
2998 basic_block bb;
2999 edge_iterator ei;
3000 edge e;
3001 auto_vec<operand_entry_t> ops;
3002 auto_vec<inter_bb_range_test_entry> bbinfo;
3003 bool any_changes = false;
3005 /* Consider only basic blocks that end with GIMPLE_COND or
3006 a cast statement satisfying final_range_test_p. All
3007 but the last bb in the first_bb .. last_bb range
3008 should end with GIMPLE_COND. */
3009 if (gimple_code (stmt) == GIMPLE_COND)
3011 if (EDGE_COUNT (first_bb->succs) != 2)
3012 return;
3014 else if (final_range_test_p (stmt))
3015 other_bb = single_succ (first_bb);
3016 else
3017 return;
3019 if (stmt_could_throw_p (stmt))
3020 return;
3022 /* As relative ordering of post-dominator sons isn't fixed,
3023 maybe_optimize_range_tests can be called first on any
3024 bb in the range we want to optimize. So, start searching
3025 backwards, if first_bb can be set to a predecessor. */
3026 while (single_pred_p (first_bb))
3028 basic_block pred_bb = single_pred (first_bb);
3029 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3030 break;
3031 if (!no_side_effect_bb (first_bb))
3032 break;
3033 first_bb = pred_bb;
3035 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3036 Before starting forward search in last_bb successors, find
3037 out the other_bb. */
3038 if (first_bb == last_bb)
3040 other_bb = NULL;
3041 /* As non-GIMPLE_COND last stmt always terminates the range,
3042 if forward search didn't discover anything, just give up. */
3043 if (gimple_code (stmt) != GIMPLE_COND)
3044 return;
3045 /* Look at both successors. Either it ends with a GIMPLE_COND
3046 and satisfies suitable_cond_bb, or ends with a cast and
3047 other_bb is that cast's successor. */
3048 FOR_EACH_EDGE (e, ei, first_bb->succs)
3049 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3050 || e->dest == first_bb)
3051 return;
3052 else if (single_pred_p (e->dest))
3054 stmt = last_stmt (e->dest);
3055 if (stmt
3056 && gimple_code (stmt) == GIMPLE_COND
3057 && EDGE_COUNT (e->dest->succs) == 2)
3059 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3060 break;
3061 else
3062 other_bb = NULL;
3064 else if (stmt
3065 && final_range_test_p (stmt)
3066 && find_edge (first_bb, single_succ (e->dest)))
3068 other_bb = single_succ (e->dest);
3069 if (other_bb == first_bb)
3070 other_bb = NULL;
3073 if (other_bb == NULL)
3074 return;
3076 /* Now do the forward search, moving last_bb to successor bbs
3077 that aren't other_bb. */
3078 while (EDGE_COUNT (last_bb->succs) == 2)
3080 FOR_EACH_EDGE (e, ei, last_bb->succs)
3081 if (e->dest != other_bb)
3082 break;
3083 if (e == NULL)
3084 break;
3085 if (!single_pred_p (e->dest))
3086 break;
3087 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3088 break;
3089 if (!no_side_effect_bb (e->dest))
3090 break;
3091 last_bb = e->dest;
3093 if (first_bb == last_bb)
3094 return;
3095 /* Here basic blocks first_bb through last_bb's predecessor
3096 end with GIMPLE_COND, all of them have one of the edges to
3097 other_bb and another to another block in the range,
3098 all blocks except first_bb don't have side-effects and
3099 last_bb ends with either GIMPLE_COND, or cast satisfying
3100 final_range_test_p. */
3101 for (bb = last_bb; ; bb = single_pred (bb))
3103 enum tree_code code;
3104 tree lhs, rhs;
3105 inter_bb_range_test_entry bb_ent;
3107 bb_ent.op = NULL_TREE;
3108 bb_ent.first_idx = ops.length ();
3109 bb_ent.last_idx = bb_ent.first_idx;
3110 e = find_edge (bb, other_bb);
3111 stmt = last_stmt (bb);
3112 gimple_set_visited (stmt, true);
3113 if (gimple_code (stmt) != GIMPLE_COND)
3115 use_operand_p use_p;
3116 gimple *phi;
3117 edge e2;
3118 unsigned int d;
3120 lhs = gimple_assign_lhs (stmt);
3121 rhs = gimple_assign_rhs1 (stmt);
3122 gcc_assert (bb == last_bb);
3124 /* stmt is
3125 _123 = (int) _234;
3127 followed by:
3128 <bb M>:
3129 # _345 = PHI <_123(N), 1(...), 1(...)>
3131 or 0 instead of 1. If it is 0, the _234
3132 range test is anded together with all the
3133 other range tests, if it is 1, it is ored with
3134 them. */
3135 single_imm_use (lhs, &use_p, &phi);
3136 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3137 e2 = find_edge (first_bb, other_bb);
3138 d = e2->dest_idx;
3139 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3140 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3141 code = BIT_AND_EXPR;
3142 else
3144 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3145 code = BIT_IOR_EXPR;
3148 /* If _234 SSA_NAME_DEF_STMT is
3149 _234 = _567 | _789;
3150 (or &, corresponding to 1/0 in the phi arguments,
3151 push into ops the individual range test arguments
3152 of the bitwise or resp. and, recursively. */
3153 if (!get_ops (rhs, code, &ops,
3154 loop_containing_stmt (stmt))
3155 && has_single_use (rhs))
3157 /* Otherwise, push the _234 range test itself. */
3158 operand_entry_t oe = operand_entry_pool.allocate ();
3160 oe->op = rhs;
3161 oe->rank = code;
3162 oe->id = 0;
3163 oe->count = 1;
3164 ops.safe_push (oe);
3165 bb_ent.last_idx++;
3167 else
3168 bb_ent.last_idx = ops.length ();
3169 bb_ent.op = rhs;
3170 bbinfo.safe_push (bb_ent);
3171 continue;
3173 /* Otherwise stmt is GIMPLE_COND. */
3174 code = gimple_cond_code (stmt);
3175 lhs = gimple_cond_lhs (stmt);
3176 rhs = gimple_cond_rhs (stmt);
3177 if (TREE_CODE (lhs) == SSA_NAME
3178 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3179 && ((code != EQ_EXPR && code != NE_EXPR)
3180 || rhs != boolean_false_node
3181 /* Either push into ops the individual bitwise
3182 or resp. and operands, depending on which
3183 edge is other_bb. */
3184 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3185 ^ (code == EQ_EXPR))
3186 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3187 loop_containing_stmt (stmt))))
3189 /* Or push the GIMPLE_COND stmt itself. */
3190 operand_entry_t oe = operand_entry_pool.allocate ();
3192 oe->op = NULL;
3193 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3194 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3195 /* oe->op = NULL signs that there is no SSA_NAME
3196 for the range test, and oe->id instead is the
3197 basic block number, at which's end the GIMPLE_COND
3198 is. */
3199 oe->id = bb->index;
3200 oe->count = 1;
3201 ops.safe_push (oe);
3202 bb_ent.op = NULL;
3203 bb_ent.last_idx++;
3205 else if (ops.length () > bb_ent.first_idx)
3207 bb_ent.op = lhs;
3208 bb_ent.last_idx = ops.length ();
3210 bbinfo.safe_push (bb_ent);
3211 if (bb == first_bb)
3212 break;
3214 if (ops.length () > 1)
3215 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3216 if (any_changes)
3218 unsigned int idx;
3219 /* update_ops relies on has_single_use predicates returning the
3220 same values as it did during get_ops earlier. Additionally it
3221 never removes statements, only adds new ones and it should walk
3222 from the single imm use and check the predicate already before
3223 making those changes.
3224 On the other side, the handling of GIMPLE_COND directly can turn
3225 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3226 it needs to be done in a separate loop afterwards. */
3227 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3229 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3230 && bbinfo[idx].op != NULL_TREE)
3232 tree new_op;
3234 stmt = last_stmt (bb);
3235 new_op = update_ops (bbinfo[idx].op,
3236 (enum tree_code)
3237 ops[bbinfo[idx].first_idx]->rank,
3238 ops, &bbinfo[idx].first_idx,
3239 loop_containing_stmt (stmt));
3240 if (new_op == NULL_TREE)
3242 gcc_assert (bb == last_bb);
3243 new_op = ops[bbinfo[idx].first_idx++]->op;
3245 if (bbinfo[idx].op != new_op)
3247 imm_use_iterator iter;
3248 use_operand_p use_p;
3249 gimple *use_stmt, *cast_stmt = NULL;
3251 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3252 if (is_gimple_debug (use_stmt))
3253 continue;
3254 else if (gimple_code (use_stmt) == GIMPLE_COND
3255 || gimple_code (use_stmt) == GIMPLE_PHI)
3256 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3257 SET_USE (use_p, new_op);
3258 else if (gimple_assign_cast_p (use_stmt))
3259 cast_stmt = use_stmt;
3260 else
3261 gcc_unreachable ();
3262 if (cast_stmt)
3264 gcc_assert (bb == last_bb);
3265 tree lhs = gimple_assign_lhs (cast_stmt);
3266 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3267 enum tree_code rhs_code
3268 = gimple_assign_rhs_code (cast_stmt);
3269 gassign *g;
3270 if (is_gimple_min_invariant (new_op))
3272 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3273 g = gimple_build_assign (new_lhs, new_op);
3275 else
3276 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3277 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3278 gimple_set_uid (g, gimple_uid (cast_stmt));
3279 gimple_set_visited (g, true);
3280 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3281 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3282 if (is_gimple_debug (use_stmt))
3283 continue;
3284 else if (gimple_code (use_stmt) == GIMPLE_COND
3285 || gimple_code (use_stmt) == GIMPLE_PHI)
3286 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3287 SET_USE (use_p, new_lhs);
3288 else
3289 gcc_unreachable ();
3293 if (bb == first_bb)
3294 break;
3296 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3298 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3299 && bbinfo[idx].op == NULL_TREE
3300 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3302 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3303 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3304 gimple_cond_make_false (cond_stmt);
3305 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3306 gimple_cond_make_true (cond_stmt);
3307 else
3309 gimple_cond_set_code (cond_stmt, NE_EXPR);
3310 gimple_cond_set_lhs (cond_stmt,
3311 ops[bbinfo[idx].first_idx]->op);
3312 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3314 update_stmt (cond_stmt);
3316 if (bb == first_bb)
3317 break;
3322 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3323 of STMT in it's operands. This is also known as a "destructive
3324 update" operation. */
3326 static bool
3327 is_phi_for_stmt (gimple *stmt, tree operand)
3329 gimple *def_stmt;
3330 gphi *def_phi;
3331 tree lhs;
3332 use_operand_p arg_p;
3333 ssa_op_iter i;
3335 if (TREE_CODE (operand) != SSA_NAME)
3336 return false;
3338 lhs = gimple_assign_lhs (stmt);
3340 def_stmt = SSA_NAME_DEF_STMT (operand);
3341 def_phi = dyn_cast <gphi *> (def_stmt);
3342 if (!def_phi)
3343 return false;
3345 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3346 if (lhs == USE_FROM_PTR (arg_p))
3347 return true;
3348 return false;
3351 /* Remove def stmt of VAR if VAR has zero uses and recurse
3352 on rhs1 operand if so. */
3354 static void
3355 remove_visited_stmt_chain (tree var)
3357 gimple *stmt;
3358 gimple_stmt_iterator gsi;
3360 while (1)
3362 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3363 return;
3364 stmt = SSA_NAME_DEF_STMT (var);
3365 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3367 var = gimple_assign_rhs1 (stmt);
3368 gsi = gsi_for_stmt (stmt);
3369 reassoc_remove_stmt (&gsi);
3370 release_defs (stmt);
3372 else
3373 return;
3377 /* This function checks three consequtive operands in
3378 passed operands vector OPS starting from OPINDEX and
3379 swaps two operands if it is profitable for binary operation
3380 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3382 We pair ops with the same rank if possible.
3384 The alternative we try is to see if STMT is a destructive
3385 update style statement, which is like:
3386 b = phi (a, ...)
3387 a = c + b;
3388 In that case, we want to use the destructive update form to
3389 expose the possible vectorizer sum reduction opportunity.
3390 In that case, the third operand will be the phi node. This
3391 check is not performed if STMT is null.
3393 We could, of course, try to be better as noted above, and do a
3394 lot of work to try to find these opportunities in >3 operand
3395 cases, but it is unlikely to be worth it. */
3397 static void
3398 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3399 unsigned int opindex, gimple *stmt)
3401 operand_entry_t oe1, oe2, oe3;
3403 oe1 = ops[opindex];
3404 oe2 = ops[opindex + 1];
3405 oe3 = ops[opindex + 2];
3407 if ((oe1->rank == oe2->rank
3408 && oe2->rank != oe3->rank)
3409 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3410 && !is_phi_for_stmt (stmt, oe1->op)
3411 && !is_phi_for_stmt (stmt, oe2->op)))
3413 struct operand_entry temp = *oe3;
3414 oe3->op = oe1->op;
3415 oe3->rank = oe1->rank;
3416 oe1->op = temp.op;
3417 oe1->rank= temp.rank;
3419 else if ((oe1->rank == oe3->rank
3420 && oe2->rank != oe3->rank)
3421 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3422 && !is_phi_for_stmt (stmt, oe1->op)
3423 && !is_phi_for_stmt (stmt, oe3->op)))
3425 struct operand_entry temp = *oe2;
3426 oe2->op = oe1->op;
3427 oe2->rank = oe1->rank;
3428 oe1->op = temp.op;
3429 oe1->rank = temp.rank;
3433 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3434 two definitions, otherwise return STMT. */
3436 static inline gimple *
3437 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
3439 if (TREE_CODE (rhs1) == SSA_NAME
3440 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3441 stmt = SSA_NAME_DEF_STMT (rhs1);
3442 if (TREE_CODE (rhs2) == SSA_NAME
3443 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3444 stmt = SSA_NAME_DEF_STMT (rhs2);
3445 return stmt;
3448 /* Recursively rewrite our linearized statements so that the operators
3449 match those in OPS[OPINDEX], putting the computation in rank
3450 order. Return new lhs. */
3452 static tree
3453 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
3454 vec<operand_entry_t> ops, bool changed)
3456 tree rhs1 = gimple_assign_rhs1 (stmt);
3457 tree rhs2 = gimple_assign_rhs2 (stmt);
3458 tree lhs = gimple_assign_lhs (stmt);
3459 operand_entry_t oe;
3461 /* The final recursion case for this function is that you have
3462 exactly two operations left.
3463 If we had exactly one op in the entire list to start with, we
3464 would have never called this function, and the tail recursion
3465 rewrites them one at a time. */
3466 if (opindex + 2 == ops.length ())
3468 operand_entry_t oe1, oe2;
3470 oe1 = ops[opindex];
3471 oe2 = ops[opindex + 1];
3473 if (rhs1 != oe1->op || rhs2 != oe2->op)
3475 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3476 unsigned int uid = gimple_uid (stmt);
3478 if (dump_file && (dump_flags & TDF_DETAILS))
3480 fprintf (dump_file, "Transforming ");
3481 print_gimple_stmt (dump_file, stmt, 0, 0);
3484 /* Even when changed is false, reassociation could have e.g. removed
3485 some redundant operations, so unless we are just swapping the
3486 arguments or unless there is no change at all (then we just
3487 return lhs), force creation of a new SSA_NAME. */
3488 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3490 gimple *insert_point
3491 = find_insert_point (stmt, oe1->op, oe2->op);
3492 lhs = make_ssa_name (TREE_TYPE (lhs));
3493 stmt
3494 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3495 oe1->op, oe2->op);
3496 gimple_set_uid (stmt, uid);
3497 gimple_set_visited (stmt, true);
3498 if (insert_point == gsi_stmt (gsi))
3499 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3500 else
3501 insert_stmt_after (stmt, insert_point);
3503 else
3505 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3506 == stmt);
3507 gimple_assign_set_rhs1 (stmt, oe1->op);
3508 gimple_assign_set_rhs2 (stmt, oe2->op);
3509 update_stmt (stmt);
3512 if (rhs1 != oe1->op && rhs1 != oe2->op)
3513 remove_visited_stmt_chain (rhs1);
3515 if (dump_file && (dump_flags & TDF_DETAILS))
3517 fprintf (dump_file, " into ");
3518 print_gimple_stmt (dump_file, stmt, 0, 0);
3521 return lhs;
3524 /* If we hit here, we should have 3 or more ops left. */
3525 gcc_assert (opindex + 2 < ops.length ());
3527 /* Rewrite the next operator. */
3528 oe = ops[opindex];
3530 /* Recurse on the LHS of the binary operator, which is guaranteed to
3531 be the non-leaf side. */
3532 tree new_rhs1
3533 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3534 changed || oe->op != rhs2);
3536 if (oe->op != rhs2 || new_rhs1 != rhs1)
3538 if (dump_file && (dump_flags & TDF_DETAILS))
3540 fprintf (dump_file, "Transforming ");
3541 print_gimple_stmt (dump_file, stmt, 0, 0);
3544 /* If changed is false, this is either opindex == 0
3545 or all outer rhs2's were equal to corresponding oe->op,
3546 and powi_result is NULL.
3547 That means lhs is equivalent before and after reassociation.
3548 Otherwise ensure the old lhs SSA_NAME is not reused and
3549 create a new stmt as well, so that any debug stmts will be
3550 properly adjusted. */
3551 if (changed)
3553 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3554 unsigned int uid = gimple_uid (stmt);
3555 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3557 lhs = make_ssa_name (TREE_TYPE (lhs));
3558 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3559 new_rhs1, oe->op);
3560 gimple_set_uid (stmt, uid);
3561 gimple_set_visited (stmt, true);
3562 if (insert_point == gsi_stmt (gsi))
3563 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3564 else
3565 insert_stmt_after (stmt, insert_point);
3567 else
3569 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3570 == stmt);
3571 gimple_assign_set_rhs1 (stmt, new_rhs1);
3572 gimple_assign_set_rhs2 (stmt, oe->op);
3573 update_stmt (stmt);
3576 if (dump_file && (dump_flags & TDF_DETAILS))
3578 fprintf (dump_file, " into ");
3579 print_gimple_stmt (dump_file, stmt, 0, 0);
3582 return lhs;
3585 /* Find out how many cycles we need to compute statements chain.
3586 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3587 maximum number of independent statements we may execute per cycle. */
3589 static int
3590 get_required_cycles (int ops_num, int cpu_width)
3592 int res;
3593 int elog;
3594 unsigned int rest;
3596 /* While we have more than 2 * cpu_width operands
3597 we may reduce number of operands by cpu_width
3598 per cycle. */
3599 res = ops_num / (2 * cpu_width);
3601 /* Remained operands count may be reduced twice per cycle
3602 until we have only one operand. */
3603 rest = (unsigned)(ops_num - res * cpu_width);
3604 elog = exact_log2 (rest);
3605 if (elog >= 0)
3606 res += elog;
3607 else
3608 res += floor_log2 (rest) + 1;
3610 return res;
3613 /* Returns an optimal number of registers to use for computation of
3614 given statements. */
3616 static int
3617 get_reassociation_width (int ops_num, enum tree_code opc,
3618 machine_mode mode)
3620 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3621 int width;
3622 int width_min;
3623 int cycles_best;
3625 if (param_width > 0)
3626 width = param_width;
3627 else
3628 width = targetm.sched.reassociation_width (opc, mode);
3630 if (width == 1)
3631 return width;
3633 /* Get the minimal time required for sequence computation. */
3634 cycles_best = get_required_cycles (ops_num, width);
3636 /* Check if we may use less width and still compute sequence for
3637 the same time. It will allow us to reduce registers usage.
3638 get_required_cycles is monotonically increasing with lower width
3639 so we can perform a binary search for the minimal width that still
3640 results in the optimal cycle count. */
3641 width_min = 1;
3642 while (width > width_min)
3644 int width_mid = (width + width_min) / 2;
3646 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3647 width = width_mid;
3648 else if (width_min < width_mid)
3649 width_min = width_mid;
3650 else
3651 break;
3654 return width;
3657 /* Recursively rewrite our linearized statements so that the operators
3658 match those in OPS[OPINDEX], putting the computation in rank
3659 order and trying to allow operations to be executed in
3660 parallel. */
3662 static void
3663 rewrite_expr_tree_parallel (gassign *stmt, int width,
3664 vec<operand_entry_t> ops)
3666 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3667 int op_num = ops.length ();
3668 int stmt_num = op_num - 1;
3669 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
3670 int op_index = op_num - 1;
3671 int stmt_index = 0;
3672 int ready_stmts_end = 0;
3673 int i = 0;
3674 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3676 /* We start expression rewriting from the top statements.
3677 So, in this loop we create a full list of statements
3678 we will work with. */
3679 stmts[stmt_num - 1] = stmt;
3680 for (i = stmt_num - 2; i >= 0; i--)
3681 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3683 for (i = 0; i < stmt_num; i++)
3685 tree op1, op2;
3687 /* Determine whether we should use results of
3688 already handled statements or not. */
3689 if (ready_stmts_end == 0
3690 && (i - stmt_index >= width || op_index < 1))
3691 ready_stmts_end = i;
3693 /* Now we choose operands for the next statement. Non zero
3694 value in ready_stmts_end means here that we should use
3695 the result of already generated statements as new operand. */
3696 if (ready_stmts_end > 0)
3698 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3699 if (ready_stmts_end > stmt_index)
3700 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3701 else if (op_index >= 0)
3702 op2 = ops[op_index--]->op;
3703 else
3705 gcc_assert (stmt_index < i);
3706 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3709 if (stmt_index >= ready_stmts_end)
3710 ready_stmts_end = 0;
3712 else
3714 if (op_index > 1)
3715 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3716 op2 = ops[op_index--]->op;
3717 op1 = ops[op_index--]->op;
3720 /* If we emit the last statement then we should put
3721 operands into the last statement. It will also
3722 break the loop. */
3723 if (op_index < 0 && stmt_index == i)
3724 i = stmt_num - 1;
3726 if (dump_file && (dump_flags & TDF_DETAILS))
3728 fprintf (dump_file, "Transforming ");
3729 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3732 /* We keep original statement only for the last one. All
3733 others are recreated. */
3734 if (i == stmt_num - 1)
3736 gimple_assign_set_rhs1 (stmts[i], op1);
3737 gimple_assign_set_rhs2 (stmts[i], op2);
3738 update_stmt (stmts[i]);
3740 else
3741 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3743 if (dump_file && (dump_flags & TDF_DETAILS))
3745 fprintf (dump_file, " into ");
3746 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3750 remove_visited_stmt_chain (last_rhs1);
3753 /* Transform STMT, which is really (A +B) + (C + D) into the left
3754 linear form, ((A+B)+C)+D.
3755 Recurse on D if necessary. */
3757 static void
3758 linearize_expr (gimple *stmt)
3760 gimple_stmt_iterator gsi;
3761 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3762 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3763 gimple *oldbinrhs = binrhs;
3764 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3765 gimple *newbinrhs = NULL;
3766 struct loop *loop = loop_containing_stmt (stmt);
3767 tree lhs = gimple_assign_lhs (stmt);
3769 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3770 && is_reassociable_op (binrhs, rhscode, loop));
3772 gsi = gsi_for_stmt (stmt);
3774 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3775 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
3776 gimple_assign_rhs_code (binrhs),
3777 gimple_assign_lhs (binlhs),
3778 gimple_assign_rhs2 (binrhs));
3779 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3780 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3781 gimple_set_uid (binrhs, gimple_uid (stmt));
3783 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3784 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3786 if (dump_file && (dump_flags & TDF_DETAILS))
3788 fprintf (dump_file, "Linearized: ");
3789 print_gimple_stmt (dump_file, stmt, 0, 0);
3792 reassociate_stats.linearized++;
3793 update_stmt (stmt);
3795 gsi = gsi_for_stmt (oldbinrhs);
3796 reassoc_remove_stmt (&gsi);
3797 release_defs (oldbinrhs);
3799 gimple_set_visited (stmt, true);
3800 gimple_set_visited (binlhs, true);
3801 gimple_set_visited (binrhs, true);
3803 /* Tail recurse on the new rhs if it still needs reassociation. */
3804 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3805 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3806 want to change the algorithm while converting to tuples. */
3807 linearize_expr (stmt);
3810 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3811 it. Otherwise, return NULL. */
3813 static gimple *
3814 get_single_immediate_use (tree lhs)
3816 use_operand_p immuse;
3817 gimple *immusestmt;
3819 if (TREE_CODE (lhs) == SSA_NAME
3820 && single_imm_use (lhs, &immuse, &immusestmt)
3821 && is_gimple_assign (immusestmt))
3822 return immusestmt;
3824 return NULL;
3827 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3828 representing the negated value. Insertions of any necessary
3829 instructions go before GSI.
3830 This function is recursive in that, if you hand it "a_5" as the
3831 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3832 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3834 static tree
3835 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3837 gimple *negatedefstmt = NULL;
3838 tree resultofnegate;
3839 gimple_stmt_iterator gsi;
3840 unsigned int uid;
3842 /* If we are trying to negate a name, defined by an add, negate the
3843 add operands instead. */
3844 if (TREE_CODE (tonegate) == SSA_NAME)
3845 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3846 if (TREE_CODE (tonegate) == SSA_NAME
3847 && is_gimple_assign (negatedefstmt)
3848 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3849 && has_single_use (gimple_assign_lhs (negatedefstmt))
3850 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3852 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3853 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3854 tree lhs = gimple_assign_lhs (negatedefstmt);
3855 gimple *g;
3857 gsi = gsi_for_stmt (negatedefstmt);
3858 rhs1 = negate_value (rhs1, &gsi);
3860 gsi = gsi_for_stmt (negatedefstmt);
3861 rhs2 = negate_value (rhs2, &gsi);
3863 gsi = gsi_for_stmt (negatedefstmt);
3864 lhs = make_ssa_name (TREE_TYPE (lhs));
3865 gimple_set_visited (negatedefstmt, true);
3866 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
3867 gimple_set_uid (g, gimple_uid (negatedefstmt));
3868 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3869 return lhs;
3872 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3873 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3874 NULL_TREE, true, GSI_SAME_STMT);
3875 gsi = *gsip;
3876 uid = gimple_uid (gsi_stmt (gsi));
3877 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3879 gimple *stmt = gsi_stmt (gsi);
3880 if (gimple_uid (stmt) != 0)
3881 break;
3882 gimple_set_uid (stmt, uid);
3884 return resultofnegate;
3887 /* Return true if we should break up the subtract in STMT into an add
3888 with negate. This is true when we the subtract operands are really
3889 adds, or the subtract itself is used in an add expression. In
3890 either case, breaking up the subtract into an add with negate
3891 exposes the adds to reassociation. */
3893 static bool
3894 should_break_up_subtract (gimple *stmt)
3896 tree lhs = gimple_assign_lhs (stmt);
3897 tree binlhs = gimple_assign_rhs1 (stmt);
3898 tree binrhs = gimple_assign_rhs2 (stmt);
3899 gimple *immusestmt;
3900 struct loop *loop = loop_containing_stmt (stmt);
3902 if (TREE_CODE (binlhs) == SSA_NAME
3903 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3904 return true;
3906 if (TREE_CODE (binrhs) == SSA_NAME
3907 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3908 return true;
3910 if (TREE_CODE (lhs) == SSA_NAME
3911 && (immusestmt = get_single_immediate_use (lhs))
3912 && is_gimple_assign (immusestmt)
3913 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3914 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3915 return true;
3916 return false;
3919 /* Transform STMT from A - B into A + -B. */
3921 static void
3922 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
3924 tree rhs1 = gimple_assign_rhs1 (stmt);
3925 tree rhs2 = gimple_assign_rhs2 (stmt);
3927 if (dump_file && (dump_flags & TDF_DETAILS))
3929 fprintf (dump_file, "Breaking up subtract ");
3930 print_gimple_stmt (dump_file, stmt, 0, 0);
3933 rhs2 = negate_value (rhs2, gsip);
3934 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3935 update_stmt (stmt);
3938 /* Determine whether STMT is a builtin call that raises an SSA name
3939 to an integer power and has only one use. If so, and this is early
3940 reassociation and unsafe math optimizations are permitted, place
3941 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3942 If any of these conditions does not hold, return FALSE. */
3944 static bool
3945 acceptable_pow_call (gimple *stmt, tree *base, HOST_WIDE_INT *exponent)
3947 tree fndecl, arg1;
3948 REAL_VALUE_TYPE c, cint;
3950 if (!first_pass_instance
3951 || !flag_unsafe_math_optimizations
3952 || !is_gimple_call (stmt)
3953 || !has_single_use (gimple_call_lhs (stmt)))
3954 return false;
3956 fndecl = gimple_call_fndecl (stmt);
3958 if (!fndecl
3959 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3960 return false;
3962 switch (DECL_FUNCTION_CODE (fndecl))
3964 CASE_FLT_FN (BUILT_IN_POW):
3965 if (flag_errno_math)
3966 return false;
3968 *base = gimple_call_arg (stmt, 0);
3969 arg1 = gimple_call_arg (stmt, 1);
3971 if (TREE_CODE (arg1) != REAL_CST)
3972 return false;
3974 c = TREE_REAL_CST (arg1);
3976 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3977 return false;
3979 *exponent = real_to_integer (&c);
3980 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3981 if (!real_identical (&c, &cint))
3982 return false;
3984 break;
3986 CASE_FLT_FN (BUILT_IN_POWI):
3987 *base = gimple_call_arg (stmt, 0);
3988 arg1 = gimple_call_arg (stmt, 1);
3990 if (!tree_fits_shwi_p (arg1))
3991 return false;
3993 *exponent = tree_to_shwi (arg1);
3994 break;
3996 default:
3997 return false;
4000 /* Expanding negative exponents is generally unproductive, so we don't
4001 complicate matters with those. Exponents of zero and one should
4002 have been handled by expression folding. */
4003 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4004 return false;
4006 return true;
4009 /* Recursively linearize a binary expression that is the RHS of STMT.
4010 Place the operands of the expression tree in the vector named OPS. */
4012 static void
4013 linearize_expr_tree (vec<operand_entry_t> *ops, gimple *stmt,
4014 bool is_associative, bool set_visited)
4016 tree binlhs = gimple_assign_rhs1 (stmt);
4017 tree binrhs = gimple_assign_rhs2 (stmt);
4018 gimple *binlhsdef = NULL, *binrhsdef = NULL;
4019 bool binlhsisreassoc = false;
4020 bool binrhsisreassoc = false;
4021 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4022 struct loop *loop = loop_containing_stmt (stmt);
4023 tree base = NULL_TREE;
4024 HOST_WIDE_INT exponent = 0;
4026 if (set_visited)
4027 gimple_set_visited (stmt, true);
4029 if (TREE_CODE (binlhs) == SSA_NAME)
4031 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4032 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4033 && !stmt_could_throw_p (binlhsdef));
4036 if (TREE_CODE (binrhs) == SSA_NAME)
4038 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4039 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4040 && !stmt_could_throw_p (binrhsdef));
4043 /* If the LHS is not reassociable, but the RHS is, we need to swap
4044 them. If neither is reassociable, there is nothing we can do, so
4045 just put them in the ops vector. If the LHS is reassociable,
4046 linearize it. If both are reassociable, then linearize the RHS
4047 and the LHS. */
4049 if (!binlhsisreassoc)
4051 /* If this is not a associative operation like division, give up. */
4052 if (!is_associative)
4054 add_to_ops_vec (ops, binrhs);
4055 return;
4058 if (!binrhsisreassoc)
4060 if (rhscode == MULT_EXPR
4061 && TREE_CODE (binrhs) == SSA_NAME
4062 && acceptable_pow_call (binrhsdef, &base, &exponent))
4064 add_repeat_to_ops_vec (ops, base, exponent);
4065 gimple_set_visited (binrhsdef, true);
4067 else
4068 add_to_ops_vec (ops, binrhs);
4070 if (rhscode == MULT_EXPR
4071 && TREE_CODE (binlhs) == SSA_NAME
4072 && acceptable_pow_call (binlhsdef, &base, &exponent))
4074 add_repeat_to_ops_vec (ops, base, exponent);
4075 gimple_set_visited (binlhsdef, true);
4077 else
4078 add_to_ops_vec (ops, binlhs);
4080 return;
4083 if (dump_file && (dump_flags & TDF_DETAILS))
4085 fprintf (dump_file, "swapping operands of ");
4086 print_gimple_stmt (dump_file, stmt, 0, 0);
4089 swap_ssa_operands (stmt,
4090 gimple_assign_rhs1_ptr (stmt),
4091 gimple_assign_rhs2_ptr (stmt));
4092 update_stmt (stmt);
4094 if (dump_file && (dump_flags & TDF_DETAILS))
4096 fprintf (dump_file, " is now ");
4097 print_gimple_stmt (dump_file, stmt, 0, 0);
4100 /* We want to make it so the lhs is always the reassociative op,
4101 so swap. */
4102 std::swap (binlhs, binrhs);
4104 else if (binrhsisreassoc)
4106 linearize_expr (stmt);
4107 binlhs = gimple_assign_rhs1 (stmt);
4108 binrhs = gimple_assign_rhs2 (stmt);
4111 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4112 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4113 rhscode, loop));
4114 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4115 is_associative, set_visited);
4117 if (rhscode == MULT_EXPR
4118 && TREE_CODE (binrhs) == SSA_NAME
4119 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4121 add_repeat_to_ops_vec (ops, base, exponent);
4122 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4124 else
4125 add_to_ops_vec (ops, binrhs);
4128 /* Repropagate the negates back into subtracts, since no other pass
4129 currently does it. */
4131 static void
4132 repropagate_negates (void)
4134 unsigned int i = 0;
4135 tree negate;
4137 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4139 gimple *user = get_single_immediate_use (negate);
4141 if (!user || !is_gimple_assign (user))
4142 continue;
4144 /* The negate operand can be either operand of a PLUS_EXPR
4145 (it can be the LHS if the RHS is a constant for example).
4147 Force the negate operand to the RHS of the PLUS_EXPR, then
4148 transform the PLUS_EXPR into a MINUS_EXPR. */
4149 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4151 /* If the negated operand appears on the LHS of the
4152 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4153 to force the negated operand to the RHS of the PLUS_EXPR. */
4154 if (gimple_assign_rhs1 (user) == negate)
4156 swap_ssa_operands (user,
4157 gimple_assign_rhs1_ptr (user),
4158 gimple_assign_rhs2_ptr (user));
4161 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4162 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4163 if (gimple_assign_rhs2 (user) == negate)
4165 tree rhs1 = gimple_assign_rhs1 (user);
4166 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4167 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4168 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4169 update_stmt (user);
4172 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4174 if (gimple_assign_rhs1 (user) == negate)
4176 /* We have
4177 x = -a
4178 y = x - b
4179 which we transform into
4180 x = a + b
4181 y = -x .
4182 This pushes down the negate which we possibly can merge
4183 into some other operation, hence insert it into the
4184 plus_negates vector. */
4185 gimple *feed = SSA_NAME_DEF_STMT (negate);
4186 tree a = gimple_assign_rhs1 (feed);
4187 tree b = gimple_assign_rhs2 (user);
4188 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4189 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4190 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4191 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4192 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4193 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4194 user = gsi_stmt (gsi2);
4195 update_stmt (user);
4196 reassoc_remove_stmt (&gsi);
4197 release_defs (feed);
4198 plus_negates.safe_push (gimple_assign_lhs (user));
4200 else
4202 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4203 rid of one operation. */
4204 gimple *feed = SSA_NAME_DEF_STMT (negate);
4205 tree a = gimple_assign_rhs1 (feed);
4206 tree rhs1 = gimple_assign_rhs1 (user);
4207 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4208 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4209 update_stmt (gsi_stmt (gsi));
4215 /* Returns true if OP is of a type for which we can do reassociation.
4216 That is for integral or non-saturating fixed-point types, and for
4217 floating point type when associative-math is enabled. */
4219 static bool
4220 can_reassociate_p (tree op)
4222 tree type = TREE_TYPE (op);
4223 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4224 || NON_SAT_FIXED_POINT_TYPE_P (type)
4225 || (flag_associative_math && FLOAT_TYPE_P (type)))
4226 return true;
4227 return false;
4230 /* Break up subtract operations in block BB.
4232 We do this top down because we don't know whether the subtract is
4233 part of a possible chain of reassociation except at the top.
4235 IE given
4236 d = f + g
4237 c = a + e
4238 b = c - d
4239 q = b - r
4240 k = t - q
4242 we want to break up k = t - q, but we won't until we've transformed q
4243 = b - r, which won't be broken up until we transform b = c - d.
4245 En passant, clear the GIMPLE visited flag on every statement
4246 and set UIDs within each basic block. */
4248 static void
4249 break_up_subtract_bb (basic_block bb)
4251 gimple_stmt_iterator gsi;
4252 basic_block son;
4253 unsigned int uid = 1;
4255 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4257 gimple *stmt = gsi_stmt (gsi);
4258 gimple_set_visited (stmt, false);
4259 gimple_set_uid (stmt, uid++);
4261 if (!is_gimple_assign (stmt)
4262 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4263 continue;
4265 /* Look for simple gimple subtract operations. */
4266 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4268 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4269 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4270 continue;
4272 /* Check for a subtract used only in an addition. If this
4273 is the case, transform it into add of a negate for better
4274 reassociation. IE transform C = A-B into C = A + -B if C
4275 is only used in an addition. */
4276 if (should_break_up_subtract (stmt))
4277 break_up_subtract (stmt, &gsi);
4279 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4280 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4281 plus_negates.safe_push (gimple_assign_lhs (stmt));
4283 for (son = first_dom_son (CDI_DOMINATORS, bb);
4284 son;
4285 son = next_dom_son (CDI_DOMINATORS, son))
4286 break_up_subtract_bb (son);
4289 /* Used for repeated factor analysis. */
4290 struct repeat_factor_d
4292 /* An SSA name that occurs in a multiply chain. */
4293 tree factor;
4295 /* Cached rank of the factor. */
4296 unsigned rank;
4298 /* Number of occurrences of the factor in the chain. */
4299 HOST_WIDE_INT count;
4301 /* An SSA name representing the product of this factor and
4302 all factors appearing later in the repeated factor vector. */
4303 tree repr;
4306 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4307 typedef const struct repeat_factor_d *const_repeat_factor_t;
4310 static vec<repeat_factor> repeat_factor_vec;
4312 /* Used for sorting the repeat factor vector. Sort primarily by
4313 ascending occurrence count, secondarily by descending rank. */
4315 static int
4316 compare_repeat_factors (const void *x1, const void *x2)
4318 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4319 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4321 if (rf1->count != rf2->count)
4322 return rf1->count - rf2->count;
4324 return rf2->rank - rf1->rank;
4327 /* Look for repeated operands in OPS in the multiply tree rooted at
4328 STMT. Replace them with an optimal sequence of multiplies and powi
4329 builtin calls, and remove the used operands from OPS. Return an
4330 SSA name representing the value of the replacement sequence. */
4332 static tree
4333 attempt_builtin_powi (gimple *stmt, vec<operand_entry_t> *ops)
4335 unsigned i, j, vec_len;
4336 int ii;
4337 operand_entry_t oe;
4338 repeat_factor_t rf1, rf2;
4339 repeat_factor rfnew;
4340 tree result = NULL_TREE;
4341 tree target_ssa, iter_result;
4342 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4343 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4344 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4345 gimple *mul_stmt, *pow_stmt;
4347 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4348 target. */
4349 if (!powi_fndecl)
4350 return NULL_TREE;
4352 /* Allocate the repeated factor vector. */
4353 repeat_factor_vec.create (10);
4355 /* Scan the OPS vector for all SSA names in the product and build
4356 up a vector of occurrence counts for each factor. */
4357 FOR_EACH_VEC_ELT (*ops, i, oe)
4359 if (TREE_CODE (oe->op) == SSA_NAME)
4361 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4363 if (rf1->factor == oe->op)
4365 rf1->count += oe->count;
4366 break;
4370 if (j >= repeat_factor_vec.length ())
4372 rfnew.factor = oe->op;
4373 rfnew.rank = oe->rank;
4374 rfnew.count = oe->count;
4375 rfnew.repr = NULL_TREE;
4376 repeat_factor_vec.safe_push (rfnew);
4381 /* Sort the repeated factor vector by (a) increasing occurrence count,
4382 and (b) decreasing rank. */
4383 repeat_factor_vec.qsort (compare_repeat_factors);
4385 /* It is generally best to combine as many base factors as possible
4386 into a product before applying __builtin_powi to the result.
4387 However, the sort order chosen for the repeated factor vector
4388 allows us to cache partial results for the product of the base
4389 factors for subsequent use. When we already have a cached partial
4390 result from a previous iteration, it is best to make use of it
4391 before looking for another __builtin_pow opportunity.
4393 As an example, consider x * x * y * y * y * z * z * z * z.
4394 We want to first compose the product x * y * z, raise it to the
4395 second power, then multiply this by y * z, and finally multiply
4396 by z. This can be done in 5 multiplies provided we cache y * z
4397 for use in both expressions:
4399 t1 = y * z
4400 t2 = t1 * x
4401 t3 = t2 * t2
4402 t4 = t1 * t3
4403 result = t4 * z
4405 If we instead ignored the cached y * z and first multiplied by
4406 the __builtin_pow opportunity z * z, we would get the inferior:
4408 t1 = y * z
4409 t2 = t1 * x
4410 t3 = t2 * t2
4411 t4 = z * z
4412 t5 = t3 * t4
4413 result = t5 * y */
4415 vec_len = repeat_factor_vec.length ();
4417 /* Repeatedly look for opportunities to create a builtin_powi call. */
4418 while (true)
4420 HOST_WIDE_INT power;
4422 /* First look for the largest cached product of factors from
4423 preceding iterations. If found, create a builtin_powi for
4424 it if the minimum occurrence count for its factors is at
4425 least 2, or just use this cached product as our next
4426 multiplicand if the minimum occurrence count is 1. */
4427 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4429 if (rf1->repr && rf1->count > 0)
4430 break;
4433 if (j < vec_len)
4435 power = rf1->count;
4437 if (power == 1)
4439 iter_result = rf1->repr;
4441 if (dump_file && (dump_flags & TDF_DETAILS))
4443 unsigned elt;
4444 repeat_factor_t rf;
4445 fputs ("Multiplying by cached product ", dump_file);
4446 for (elt = j; elt < vec_len; elt++)
4448 rf = &repeat_factor_vec[elt];
4449 print_generic_expr (dump_file, rf->factor, 0);
4450 if (elt < vec_len - 1)
4451 fputs (" * ", dump_file);
4453 fputs ("\n", dump_file);
4456 else
4458 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4459 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4460 build_int_cst (integer_type_node,
4461 power));
4462 gimple_call_set_lhs (pow_stmt, iter_result);
4463 gimple_set_location (pow_stmt, gimple_location (stmt));
4464 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4466 if (dump_file && (dump_flags & TDF_DETAILS))
4468 unsigned elt;
4469 repeat_factor_t rf;
4470 fputs ("Building __builtin_pow call for cached product (",
4471 dump_file);
4472 for (elt = j; elt < vec_len; elt++)
4474 rf = &repeat_factor_vec[elt];
4475 print_generic_expr (dump_file, rf->factor, 0);
4476 if (elt < vec_len - 1)
4477 fputs (" * ", dump_file);
4479 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4480 power);
4484 else
4486 /* Otherwise, find the first factor in the repeated factor
4487 vector whose occurrence count is at least 2. If no such
4488 factor exists, there are no builtin_powi opportunities
4489 remaining. */
4490 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4492 if (rf1->count >= 2)
4493 break;
4496 if (j >= vec_len)
4497 break;
4499 power = rf1->count;
4501 if (dump_file && (dump_flags & TDF_DETAILS))
4503 unsigned elt;
4504 repeat_factor_t rf;
4505 fputs ("Building __builtin_pow call for (", dump_file);
4506 for (elt = j; elt < vec_len; elt++)
4508 rf = &repeat_factor_vec[elt];
4509 print_generic_expr (dump_file, rf->factor, 0);
4510 if (elt < vec_len - 1)
4511 fputs (" * ", dump_file);
4513 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4516 reassociate_stats.pows_created++;
4518 /* Visit each element of the vector in reverse order (so that
4519 high-occurrence elements are visited first, and within the
4520 same occurrence count, lower-ranked elements are visited
4521 first). Form a linear product of all elements in this order
4522 whose occurrencce count is at least that of element J.
4523 Record the SSA name representing the product of each element
4524 with all subsequent elements in the vector. */
4525 if (j == vec_len - 1)
4526 rf1->repr = rf1->factor;
4527 else
4529 for (ii = vec_len - 2; ii >= (int)j; ii--)
4531 tree op1, op2;
4533 rf1 = &repeat_factor_vec[ii];
4534 rf2 = &repeat_factor_vec[ii + 1];
4536 /* Init the last factor's representative to be itself. */
4537 if (!rf2->repr)
4538 rf2->repr = rf2->factor;
4540 op1 = rf1->factor;
4541 op2 = rf2->repr;
4543 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4544 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4545 op1, op2);
4546 gimple_set_location (mul_stmt, gimple_location (stmt));
4547 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4548 rf1->repr = target_ssa;
4550 /* Don't reprocess the multiply we just introduced. */
4551 gimple_set_visited (mul_stmt, true);
4555 /* Form a call to __builtin_powi for the maximum product
4556 just formed, raised to the power obtained earlier. */
4557 rf1 = &repeat_factor_vec[j];
4558 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4559 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4560 build_int_cst (integer_type_node,
4561 power));
4562 gimple_call_set_lhs (pow_stmt, iter_result);
4563 gimple_set_location (pow_stmt, gimple_location (stmt));
4564 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4567 /* If we previously formed at least one other builtin_powi call,
4568 form the product of this one and those others. */
4569 if (result)
4571 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4572 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4573 result, iter_result);
4574 gimple_set_location (mul_stmt, gimple_location (stmt));
4575 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4576 gimple_set_visited (mul_stmt, true);
4577 result = new_result;
4579 else
4580 result = iter_result;
4582 /* Decrement the occurrence count of each element in the product
4583 by the count found above, and remove this many copies of each
4584 factor from OPS. */
4585 for (i = j; i < vec_len; i++)
4587 unsigned k = power;
4588 unsigned n;
4590 rf1 = &repeat_factor_vec[i];
4591 rf1->count -= power;
4593 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4595 if (oe->op == rf1->factor)
4597 if (oe->count <= k)
4599 ops->ordered_remove (n);
4600 k -= oe->count;
4602 if (k == 0)
4603 break;
4605 else
4607 oe->count -= k;
4608 break;
4615 /* At this point all elements in the repeated factor vector have a
4616 remaining occurrence count of 0 or 1, and those with a count of 1
4617 don't have cached representatives. Re-sort the ops vector and
4618 clean up. */
4619 ops->qsort (sort_by_operand_rank);
4620 repeat_factor_vec.release ();
4622 /* Return the final product computed herein. Note that there may
4623 still be some elements with single occurrence count left in OPS;
4624 those will be handled by the normal reassociation logic. */
4625 return result;
4628 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4630 static void
4631 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
4633 tree rhs1;
4635 if (dump_file && (dump_flags & TDF_DETAILS))
4637 fprintf (dump_file, "Transforming ");
4638 print_gimple_stmt (dump_file, stmt, 0, 0);
4641 rhs1 = gimple_assign_rhs1 (stmt);
4642 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4643 update_stmt (stmt);
4644 remove_visited_stmt_chain (rhs1);
4646 if (dump_file && (dump_flags & TDF_DETAILS))
4648 fprintf (dump_file, " into ");
4649 print_gimple_stmt (dump_file, stmt, 0, 0);
4653 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4655 static void
4656 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
4657 tree rhs1, tree rhs2)
4659 if (dump_file && (dump_flags & TDF_DETAILS))
4661 fprintf (dump_file, "Transforming ");
4662 print_gimple_stmt (dump_file, stmt, 0, 0);
4665 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4666 update_stmt (gsi_stmt (*gsi));
4667 remove_visited_stmt_chain (rhs1);
4669 if (dump_file && (dump_flags & TDF_DETAILS))
4671 fprintf (dump_file, " into ");
4672 print_gimple_stmt (dump_file, stmt, 0, 0);
4676 /* Reassociate expressions in basic block BB and its post-dominator as
4677 children. */
4679 static void
4680 reassociate_bb (basic_block bb)
4682 gimple_stmt_iterator gsi;
4683 basic_block son;
4684 gimple *stmt = last_stmt (bb);
4686 if (stmt && !gimple_visited_p (stmt))
4687 maybe_optimize_range_tests (stmt);
4689 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4691 stmt = gsi_stmt (gsi);
4693 if (is_gimple_assign (stmt)
4694 && !stmt_could_throw_p (stmt))
4696 tree lhs, rhs1, rhs2;
4697 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4699 /* If this is not a gimple binary expression, there is
4700 nothing for us to do with it. */
4701 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4702 continue;
4704 /* If this was part of an already processed statement,
4705 we don't need to touch it again. */
4706 if (gimple_visited_p (stmt))
4708 /* This statement might have become dead because of previous
4709 reassociations. */
4710 if (has_zero_uses (gimple_get_lhs (stmt)))
4712 reassoc_remove_stmt (&gsi);
4713 release_defs (stmt);
4714 /* We might end up removing the last stmt above which
4715 places the iterator to the end of the sequence.
4716 Reset it to the last stmt in this case which might
4717 be the end of the sequence as well if we removed
4718 the last statement of the sequence. In which case
4719 we need to bail out. */
4720 if (gsi_end_p (gsi))
4722 gsi = gsi_last_bb (bb);
4723 if (gsi_end_p (gsi))
4724 break;
4727 continue;
4730 lhs = gimple_assign_lhs (stmt);
4731 rhs1 = gimple_assign_rhs1 (stmt);
4732 rhs2 = gimple_assign_rhs2 (stmt);
4734 /* For non-bit or min/max operations we can't associate
4735 all types. Verify that here. */
4736 if (rhs_code != BIT_IOR_EXPR
4737 && rhs_code != BIT_AND_EXPR
4738 && rhs_code != BIT_XOR_EXPR
4739 && rhs_code != MIN_EXPR
4740 && rhs_code != MAX_EXPR
4741 && (!can_reassociate_p (lhs)
4742 || !can_reassociate_p (rhs1)
4743 || !can_reassociate_p (rhs2)))
4744 continue;
4746 if (associative_tree_code (rhs_code))
4748 auto_vec<operand_entry_t> ops;
4749 tree powi_result = NULL_TREE;
4751 /* There may be no immediate uses left by the time we
4752 get here because we may have eliminated them all. */
4753 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4754 continue;
4756 gimple_set_visited (stmt, true);
4757 linearize_expr_tree (&ops, stmt, true, true);
4758 ops.qsort (sort_by_operand_rank);
4759 optimize_ops_list (rhs_code, &ops);
4760 if (undistribute_ops_list (rhs_code, &ops,
4761 loop_containing_stmt (stmt)))
4763 ops.qsort (sort_by_operand_rank);
4764 optimize_ops_list (rhs_code, &ops);
4767 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4768 optimize_range_tests (rhs_code, &ops);
4770 if (first_pass_instance
4771 && rhs_code == MULT_EXPR
4772 && flag_unsafe_math_optimizations)
4773 powi_result = attempt_builtin_powi (stmt, &ops);
4775 /* If the operand vector is now empty, all operands were
4776 consumed by the __builtin_powi optimization. */
4777 if (ops.length () == 0)
4778 transform_stmt_to_copy (&gsi, stmt, powi_result);
4779 else if (ops.length () == 1)
4781 tree last_op = ops.last ()->op;
4783 if (powi_result)
4784 transform_stmt_to_multiply (&gsi, stmt, last_op,
4785 powi_result);
4786 else
4787 transform_stmt_to_copy (&gsi, stmt, last_op);
4789 else
4791 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4792 int ops_num = ops.length ();
4793 int width = get_reassociation_width (ops_num, rhs_code, mode);
4794 tree new_lhs = lhs;
4796 if (dump_file && (dump_flags & TDF_DETAILS))
4797 fprintf (dump_file,
4798 "Width = %d was chosen for reassociation\n", width);
4800 if (width > 1
4801 && ops.length () > 3)
4802 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
4803 width, ops);
4804 else
4806 /* When there are three operands left, we want
4807 to make sure the ones that get the double
4808 binary op are chosen wisely. */
4809 int len = ops.length ();
4810 if (len >= 3)
4811 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4813 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4814 powi_result != NULL);
4817 /* If we combined some repeated factors into a
4818 __builtin_powi call, multiply that result by the
4819 reassociated operands. */
4820 if (powi_result)
4822 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4823 tree type = TREE_TYPE (lhs);
4824 tree target_ssa = make_temp_ssa_name (type, NULL,
4825 "reassocpow");
4826 gimple_set_lhs (lhs_stmt, target_ssa);
4827 update_stmt (lhs_stmt);
4828 if (lhs != new_lhs)
4829 target_ssa = new_lhs;
4830 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
4831 powi_result, target_ssa);
4832 gimple_set_location (mul_stmt, gimple_location (stmt));
4833 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4839 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4840 son;
4841 son = next_dom_son (CDI_POST_DOMINATORS, son))
4842 reassociate_bb (son);
4845 /* Add jumps around shifts for range tests turned into bit tests.
4846 For each SSA_NAME VAR we have code like:
4847 VAR = ...; // final stmt of range comparison
4848 // bit test here...;
4849 OTHERVAR = ...; // final stmt of the bit test sequence
4850 RES = VAR | OTHERVAR;
4851 Turn the above into:
4852 VAR = ...;
4853 if (VAR != 0)
4854 goto <l3>;
4855 else
4856 goto <l2>;
4857 <l2>:
4858 // bit test here...;
4859 OTHERVAR = ...;
4860 <l3>:
4861 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4863 static void
4864 branch_fixup (void)
4866 tree var;
4867 unsigned int i;
4869 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4871 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
4872 gimple *use_stmt;
4873 use_operand_p use;
4874 bool ok = single_imm_use (var, &use, &use_stmt);
4875 gcc_assert (ok
4876 && is_gimple_assign (use_stmt)
4877 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4878 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4880 basic_block cond_bb = gimple_bb (def_stmt);
4881 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4882 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4884 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4885 gimple *g = gimple_build_cond (NE_EXPR, var,
4886 build_zero_cst (TREE_TYPE (var)),
4887 NULL_TREE, NULL_TREE);
4888 location_t loc = gimple_location (use_stmt);
4889 gimple_set_location (g, loc);
4890 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4892 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4893 etrue->probability = REG_BR_PROB_BASE / 2;
4894 etrue->count = cond_bb->count / 2;
4895 edge efalse = find_edge (cond_bb, then_bb);
4896 efalse->flags = EDGE_FALSE_VALUE;
4897 efalse->probability -= etrue->probability;
4898 efalse->count -= etrue->count;
4899 then_bb->count -= etrue->count;
4901 tree othervar = NULL_TREE;
4902 if (gimple_assign_rhs1 (use_stmt) == var)
4903 othervar = gimple_assign_rhs2 (use_stmt);
4904 else if (gimple_assign_rhs2 (use_stmt) == var)
4905 othervar = gimple_assign_rhs1 (use_stmt);
4906 else
4907 gcc_unreachable ();
4908 tree lhs = gimple_assign_lhs (use_stmt);
4909 gphi *phi = create_phi_node (lhs, merge_bb);
4910 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
4911 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
4912 gsi = gsi_for_stmt (use_stmt);
4913 gsi_remove (&gsi, true);
4915 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
4916 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
4918 reassoc_branch_fixups.release ();
4921 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4922 void debug_ops_vector (vec<operand_entry_t> ops);
4924 /* Dump the operand entry vector OPS to FILE. */
4926 void
4927 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4929 operand_entry_t oe;
4930 unsigned int i;
4932 FOR_EACH_VEC_ELT (ops, i, oe)
4934 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4935 print_generic_expr (file, oe->op, 0);
4939 /* Dump the operand entry vector OPS to STDERR. */
4941 DEBUG_FUNCTION void
4942 debug_ops_vector (vec<operand_entry_t> ops)
4944 dump_ops_vector (stderr, ops);
4947 static void
4948 do_reassoc (void)
4950 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4951 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4954 /* Initialize the reassociation pass. */
4956 static void
4957 init_reassoc (void)
4959 int i;
4960 long rank = 2;
4961 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4963 /* Find the loops, so that we can prevent moving calculations in
4964 them. */
4965 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4967 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4969 next_operand_entry_id = 0;
4971 /* Reverse RPO (Reverse Post Order) will give us something where
4972 deeper loops come later. */
4973 pre_and_rev_post_order_compute (NULL, bbs, false);
4974 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4975 operand_rank = new hash_map<tree, long>;
4977 /* Give each default definition a distinct rank. This includes
4978 parameters and the static chain. Walk backwards over all
4979 SSA names so that we get proper rank ordering according
4980 to tree_swap_operands_p. */
4981 for (i = num_ssa_names - 1; i > 0; --i)
4983 tree name = ssa_name (i);
4984 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4985 insert_operand_rank (name, ++rank);
4988 /* Set up rank for each BB */
4989 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4990 bb_rank[bbs[i]] = ++rank << 16;
4992 free (bbs);
4993 calculate_dominance_info (CDI_POST_DOMINATORS);
4994 plus_negates = vNULL;
4997 /* Cleanup after the reassociation pass, and print stats if
4998 requested. */
5000 static void
5001 fini_reassoc (void)
5003 statistics_counter_event (cfun, "Linearized",
5004 reassociate_stats.linearized);
5005 statistics_counter_event (cfun, "Constants eliminated",
5006 reassociate_stats.constants_eliminated);
5007 statistics_counter_event (cfun, "Ops eliminated",
5008 reassociate_stats.ops_eliminated);
5009 statistics_counter_event (cfun, "Statements rewritten",
5010 reassociate_stats.rewritten);
5011 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5012 reassociate_stats.pows_encountered);
5013 statistics_counter_event (cfun, "Built-in powi calls created",
5014 reassociate_stats.pows_created);
5016 delete operand_rank;
5017 operand_entry_pool.release ();
5018 free (bb_rank);
5019 plus_negates.release ();
5020 free_dominance_info (CDI_POST_DOMINATORS);
5021 loop_optimizer_finalize ();
5024 /* Gate and execute functions for Reassociation. */
5026 static unsigned int
5027 execute_reassoc (void)
5029 init_reassoc ();
5031 do_reassoc ();
5032 repropagate_negates ();
5033 branch_fixup ();
5035 fini_reassoc ();
5036 return 0;
5039 namespace {
5041 const pass_data pass_data_reassoc =
5043 GIMPLE_PASS, /* type */
5044 "reassoc", /* name */
5045 OPTGROUP_NONE, /* optinfo_flags */
5046 TV_TREE_REASSOC, /* tv_id */
5047 ( PROP_cfg | PROP_ssa ), /* properties_required */
5048 0, /* properties_provided */
5049 0, /* properties_destroyed */
5050 0, /* todo_flags_start */
5051 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5054 class pass_reassoc : public gimple_opt_pass
5056 public:
5057 pass_reassoc (gcc::context *ctxt)
5058 : gimple_opt_pass (pass_data_reassoc, ctxt)
5061 /* opt_pass methods: */
5062 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5063 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5064 virtual unsigned int execute (function *) { return execute_reassoc (); }
5066 }; // class pass_reassoc
5068 } // anon namespace
5070 gimple_opt_pass *
5071 make_pass_reassoc (gcc::context *ctxt)
5073 return new pass_reassoc (ctxt);