testsuite: Correct vec-rlmi-rlnm.c testsuite expected result
[official-gcc.git] / gcc / tree-ssa-reassoc.c
bloba2ca1713d4b50abad1700c9ea92bb814c52e7a4d
1 /* Reassociation for trees.
2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
59 It consists of five steps:
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_*
69 3. Optimization of the operand lists, eliminating things like a +
70 -a, a & a, etc.
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
79 5. Repropagate negates, as nothing else will clean it up ATM.
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
100 a (1), b (1), c (1), d (2), e (2)
103 We start with our merge worklist empty, and the ops list with all of
104 those on it.
106 You want to first merge all leaves of the same rank, as much as
107 possible.
109 So first build a binary op of
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
121 Then build a binary op of d + e
122 mergetmp2 = d + e
124 and put mergetmp2 on the merge worklist.
126 so merge worklist = {mergetmp, c, mergetmp2}
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
131 So we have
133 build binary op
134 mergetmp3 = mergetmp + c
136 worklist = {mergetmp2, mergetmp3}
138 mergetmp4 = mergetmp2 + mergetmp3
140 worklist = {mergetmp4}
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
151 So why don't we do this?
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
159 mergetmp = op1 + op2
160 newstmt = mergetmp + op3
162 instead of
163 mergetmp = op2 + op3
164 newstmt = mergetmp + op1
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
168 can do.
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
176 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
177 point 3a in the pass header comment. */
178 static bool reassoc_insert_powi_p;
180 /* Statistics */
181 static struct
183 int linearized;
184 int constants_eliminated;
185 int ops_eliminated;
186 int rewritten;
187 int pows_encountered;
188 int pows_created;
189 } reassociate_stats;
191 /* Operator, rank pair. */
192 struct operand_entry
194 unsigned int rank;
195 unsigned int id;
196 tree op;
197 unsigned int count;
198 gimple *stmt_to_insert;
201 static object_allocator<operand_entry> operand_entry_pool
202 ("operand entry pool");
204 /* This is used to assign a unique ID to each struct operand_entry
205 so that qsort results are identical on different hosts. */
206 static unsigned int next_operand_entry_id;
208 /* Starting rank number for a given basic block, so that we can rank
209 operations using unmovable instructions in that BB based on the bb
210 depth. */
211 static long *bb_rank;
213 /* Operand->rank hashtable. */
214 static hash_map<tree, long> *operand_rank;
216 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
217 all basic blocks the CFG should be adjusted - basic blocks
218 split right after that SSA_NAME's definition statement and before
219 the only use, which must be a bit ior. */
220 static vec<tree> reassoc_branch_fixups;
222 /* Forward decls. */
223 static long get_rank (tree);
224 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
226 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
227 possibly added by gsi_remove. */
229 bool
230 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
232 gimple *stmt = gsi_stmt (*gsi);
234 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
235 return gsi_remove (gsi, true);
237 gimple_stmt_iterator prev = *gsi;
238 gsi_prev (&prev);
239 unsigned uid = gimple_uid (stmt);
240 basic_block bb = gimple_bb (stmt);
241 bool ret = gsi_remove (gsi, true);
242 if (!gsi_end_p (prev))
243 gsi_next (&prev);
244 else
245 prev = gsi_start_bb (bb);
246 gimple *end_stmt = gsi_stmt (*gsi);
247 while ((stmt = gsi_stmt (prev)) != end_stmt)
249 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
250 gimple_set_uid (stmt, uid);
251 gsi_next (&prev);
253 return ret;
256 /* Bias amount for loop-carried phis. We want this to be larger than
257 the depth of any reassociation tree we can see, but not larger than
258 the rank difference between two blocks. */
259 #define PHI_LOOP_BIAS (1 << 15)
261 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
262 an innermost loop, and the phi has only a single use which is inside
263 the loop, then the rank is the block rank of the loop latch plus an
264 extra bias for the loop-carried dependence. This causes expressions
265 calculated into an accumulator variable to be independent for each
266 iteration of the loop. If STMT is some other phi, the rank is the
267 block rank of its containing block. */
268 static long
269 phi_rank (gimple *stmt)
271 basic_block bb = gimple_bb (stmt);
272 class loop *father = bb->loop_father;
273 tree res;
274 unsigned i;
275 use_operand_p use;
276 gimple *use_stmt;
278 /* We only care about real loops (those with a latch). */
279 if (!father->latch)
280 return bb_rank[bb->index];
282 /* Interesting phis must be in headers of innermost loops. */
283 if (bb != father->header
284 || father->inner)
285 return bb_rank[bb->index];
287 /* Ignore virtual SSA_NAMEs. */
288 res = gimple_phi_result (stmt);
289 if (virtual_operand_p (res))
290 return bb_rank[bb->index];
292 /* The phi definition must have a single use, and that use must be
293 within the loop. Otherwise this isn't an accumulator pattern. */
294 if (!single_imm_use (res, &use, &use_stmt)
295 || gimple_bb (use_stmt)->loop_father != father)
296 return bb_rank[bb->index];
298 /* Look for phi arguments from within the loop. If found, bias this phi. */
299 for (i = 0; i < gimple_phi_num_args (stmt); i++)
301 tree arg = gimple_phi_arg_def (stmt, i);
302 if (TREE_CODE (arg) == SSA_NAME
303 && !SSA_NAME_IS_DEFAULT_DEF (arg))
305 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
306 if (gimple_bb (def_stmt)->loop_father == father)
307 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
311 /* Must be an uninteresting phi. */
312 return bb_rank[bb->index];
315 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
316 loop-carried dependence of an innermost loop, return TRUE; else
317 return FALSE. */
318 static bool
319 loop_carried_phi (tree exp)
321 gimple *phi_stmt;
322 long block_rank;
324 if (TREE_CODE (exp) != SSA_NAME
325 || SSA_NAME_IS_DEFAULT_DEF (exp))
326 return false;
328 phi_stmt = SSA_NAME_DEF_STMT (exp);
330 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
331 return false;
333 /* Non-loop-carried phis have block rank. Loop-carried phis have
334 an additional bias added in. If this phi doesn't have block rank,
335 it's biased and should not be propagated. */
336 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
338 if (phi_rank (phi_stmt) != block_rank)
339 return true;
341 return false;
344 /* Return the maximum of RANK and the rank that should be propagated
345 from expression OP. For most operands, this is just the rank of OP.
346 For loop-carried phis, the value is zero to avoid undoing the bias
347 in favor of the phi. */
348 static long
349 propagate_rank (long rank, tree op)
351 long op_rank;
353 if (loop_carried_phi (op))
354 return rank;
356 op_rank = get_rank (op);
358 return MAX (rank, op_rank);
361 /* Look up the operand rank structure for expression E. */
363 static inline long
364 find_operand_rank (tree e)
366 long *slot = operand_rank->get (e);
367 return slot ? *slot : -1;
370 /* Insert {E,RANK} into the operand rank hashtable. */
372 static inline void
373 insert_operand_rank (tree e, long rank)
375 gcc_assert (rank > 0);
376 gcc_assert (!operand_rank->put (e, rank));
379 /* Given an expression E, return the rank of the expression. */
381 static long
382 get_rank (tree e)
384 /* SSA_NAME's have the rank of the expression they are the result
386 For globals and uninitialized values, the rank is 0.
387 For function arguments, use the pre-setup rank.
388 For PHI nodes, stores, asm statements, etc, we use the rank of
389 the BB.
390 For simple operations, the rank is the maximum rank of any of
391 its operands, or the bb_rank, whichever is less.
392 I make no claims that this is optimal, however, it gives good
393 results. */
395 /* We make an exception to the normal ranking system to break
396 dependences of accumulator variables in loops. Suppose we
397 have a simple one-block loop containing:
399 x_1 = phi(x_0, x_2)
400 b = a + x_1
401 c = b + d
402 x_2 = c + e
404 As shown, each iteration of the calculation into x is fully
405 dependent upon the iteration before it. We would prefer to
406 see this in the form:
408 x_1 = phi(x_0, x_2)
409 b = a + d
410 c = b + e
411 x_2 = c + x_1
413 If the loop is unrolled, the calculations of b and c from
414 different iterations can be interleaved.
416 To obtain this result during reassociation, we bias the rank
417 of the phi definition x_1 upward, when it is recognized as an
418 accumulator pattern. The artificial rank causes it to be
419 added last, providing the desired independence. */
421 if (TREE_CODE (e) == SSA_NAME)
423 ssa_op_iter iter;
424 gimple *stmt;
425 long rank;
426 tree op;
428 if (SSA_NAME_IS_DEFAULT_DEF (e))
429 return find_operand_rank (e);
431 stmt = SSA_NAME_DEF_STMT (e);
432 if (gimple_code (stmt) == GIMPLE_PHI)
433 return phi_rank (stmt);
435 if (!is_gimple_assign (stmt))
436 return bb_rank[gimple_bb (stmt)->index];
438 /* If we already have a rank for this expression, use that. */
439 rank = find_operand_rank (e);
440 if (rank != -1)
441 return rank;
443 /* Otherwise, find the maximum rank for the operands. As an
444 exception, remove the bias from loop-carried phis when propagating
445 the rank so that dependent operations are not also biased. */
446 /* Simply walk over all SSA uses - this takes advatage of the
447 fact that non-SSA operands are is_gimple_min_invariant and
448 thus have rank 0. */
449 rank = 0;
450 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
451 rank = propagate_rank (rank, op);
453 if (dump_file && (dump_flags & TDF_DETAILS))
455 fprintf (dump_file, "Rank for ");
456 print_generic_expr (dump_file, e);
457 fprintf (dump_file, " is %ld\n", (rank + 1));
460 /* Note the rank in the hashtable so we don't recompute it. */
461 insert_operand_rank (e, (rank + 1));
462 return (rank + 1);
465 /* Constants, globals, etc., are rank 0 */
466 return 0;
470 /* We want integer ones to end up last no matter what, since they are
471 the ones we can do the most with. */
472 #define INTEGER_CONST_TYPE 1 << 4
473 #define FLOAT_ONE_CONST_TYPE 1 << 3
474 #define FLOAT_CONST_TYPE 1 << 2
475 #define OTHER_CONST_TYPE 1 << 1
477 /* Classify an invariant tree into integer, float, or other, so that
478 we can sort them to be near other constants of the same type. */
479 static inline int
480 constant_type (tree t)
482 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
483 return INTEGER_CONST_TYPE;
484 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
486 /* Sort -1.0 and 1.0 constants last, while in some cases
487 const_binop can't optimize some inexact operations, multiplication
488 by -1.0 or 1.0 can be always merged with others. */
489 if (real_onep (t) || real_minus_onep (t))
490 return FLOAT_ONE_CONST_TYPE;
491 return FLOAT_CONST_TYPE;
493 else
494 return OTHER_CONST_TYPE;
497 /* qsort comparison function to sort operand entries PA and PB by rank
498 so that the sorted array is ordered by rank in decreasing order. */
499 static int
500 sort_by_operand_rank (const void *pa, const void *pb)
502 const operand_entry *oea = *(const operand_entry *const *)pa;
503 const operand_entry *oeb = *(const operand_entry *const *)pb;
505 if (oeb->rank != oea->rank)
506 return oeb->rank > oea->rank ? 1 : -1;
508 /* It's nicer for optimize_expression if constants that are likely
509 to fold when added/multiplied/whatever are put next to each
510 other. Since all constants have rank 0, order them by type. */
511 if (oea->rank == 0)
513 if (constant_type (oeb->op) != constant_type (oea->op))
514 return constant_type (oea->op) - constant_type (oeb->op);
515 else
516 /* To make sorting result stable, we use unique IDs to determine
517 order. */
518 return oeb->id > oea->id ? 1 : -1;
521 if (TREE_CODE (oea->op) != SSA_NAME)
523 if (TREE_CODE (oeb->op) != SSA_NAME)
524 return oeb->id > oea->id ? 1 : -1;
525 else
526 return 1;
528 else if (TREE_CODE (oeb->op) != SSA_NAME)
529 return -1;
531 /* Lastly, make sure the versions that are the same go next to each
532 other. */
533 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
535 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
536 versions of removed SSA_NAMEs, so if possible, prefer to sort
537 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
538 See PR60418. */
539 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
540 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
541 basic_block bba = gimple_bb (stmta);
542 basic_block bbb = gimple_bb (stmtb);
543 if (bbb != bba)
545 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
546 but the other might not. */
547 if (!bba)
548 return 1;
549 if (!bbb)
550 return -1;
551 /* If neither is, compare bb_rank. */
552 if (bb_rank[bbb->index] != bb_rank[bba->index])
553 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
556 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
557 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
558 if (da != db)
559 return da ? 1 : -1;
561 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
564 return oeb->id > oea->id ? 1 : -1;
567 /* Add an operand entry to *OPS for the tree operand OP. */
569 static void
570 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
572 operand_entry *oe = operand_entry_pool.allocate ();
574 oe->op = op;
575 oe->rank = get_rank (op);
576 oe->id = next_operand_entry_id++;
577 oe->count = 1;
578 oe->stmt_to_insert = stmt_to_insert;
579 ops->safe_push (oe);
582 /* Add an operand entry to *OPS for the tree operand OP with repeat
583 count REPEAT. */
585 static void
586 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
587 HOST_WIDE_INT repeat)
589 operand_entry *oe = operand_entry_pool.allocate ();
591 oe->op = op;
592 oe->rank = get_rank (op);
593 oe->id = next_operand_entry_id++;
594 oe->count = repeat;
595 oe->stmt_to_insert = NULL;
596 ops->safe_push (oe);
598 reassociate_stats.pows_encountered++;
601 /* Return true if STMT is reassociable operation containing a binary
602 operation with tree code CODE, and is inside LOOP. */
604 static bool
605 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
607 basic_block bb = gimple_bb (stmt);
609 if (gimple_bb (stmt) == NULL)
610 return false;
612 if (!flow_bb_inside_loop_p (loop, bb))
613 return false;
615 if (is_gimple_assign (stmt)
616 && gimple_assign_rhs_code (stmt) == code
617 && has_single_use (gimple_assign_lhs (stmt)))
619 tree rhs1 = gimple_assign_rhs1 (stmt);
620 tree rhs2 = gimple_assign_rhs2 (stmt);
621 if (TREE_CODE (rhs1) == SSA_NAME
622 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
623 return false;
624 if (rhs2
625 && TREE_CODE (rhs2) == SSA_NAME
626 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
627 return false;
628 return true;
631 return false;
635 /* Return true if STMT is a nop-conversion. */
637 static bool
638 gimple_nop_conversion_p (gimple *stmt)
640 if (gassign *ass = dyn_cast <gassign *> (stmt))
642 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
643 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
644 TREE_TYPE (gimple_assign_rhs1 (ass))))
645 return true;
647 return false;
650 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
651 operand of the negate operation. Otherwise, return NULL. */
653 static tree
654 get_unary_op (tree name, enum tree_code opcode)
656 gimple *stmt = SSA_NAME_DEF_STMT (name);
658 /* Look through nop conversions (sign changes). */
659 if (gimple_nop_conversion_p (stmt)
660 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
661 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
663 if (!is_gimple_assign (stmt))
664 return NULL_TREE;
666 if (gimple_assign_rhs_code (stmt) == opcode)
667 return gimple_assign_rhs1 (stmt);
668 return NULL_TREE;
671 /* Return true if OP1 and OP2 have the same value if casted to either type. */
673 static bool
674 ops_equal_values_p (tree op1, tree op2)
676 if (op1 == op2)
677 return true;
679 tree orig_op1 = op1;
680 if (TREE_CODE (op1) == SSA_NAME)
682 gimple *stmt = SSA_NAME_DEF_STMT (op1);
683 if (gimple_nop_conversion_p (stmt))
685 op1 = gimple_assign_rhs1 (stmt);
686 if (op1 == op2)
687 return true;
691 if (TREE_CODE (op2) == SSA_NAME)
693 gimple *stmt = SSA_NAME_DEF_STMT (op2);
694 if (gimple_nop_conversion_p (stmt))
696 op2 = gimple_assign_rhs1 (stmt);
697 if (op1 == op2
698 || orig_op1 == op2)
699 return true;
703 return false;
707 /* If CURR and LAST are a pair of ops that OPCODE allows us to
708 eliminate through equivalences, do so, remove them from OPS, and
709 return true. Otherwise, return false. */
711 static bool
712 eliminate_duplicate_pair (enum tree_code opcode,
713 vec<operand_entry *> *ops,
714 bool *all_done,
715 unsigned int i,
716 operand_entry *curr,
717 operand_entry *last)
720 /* If we have two of the same op, and the opcode is & |, min, or max,
721 we can eliminate one of them.
722 If we have two of the same op, and the opcode is ^, we can
723 eliminate both of them. */
725 if (last && last->op == curr->op)
727 switch (opcode)
729 case MAX_EXPR:
730 case MIN_EXPR:
731 case BIT_IOR_EXPR:
732 case BIT_AND_EXPR:
733 if (dump_file && (dump_flags & TDF_DETAILS))
735 fprintf (dump_file, "Equivalence: ");
736 print_generic_expr (dump_file, curr->op);
737 fprintf (dump_file, " [&|minmax] ");
738 print_generic_expr (dump_file, last->op);
739 fprintf (dump_file, " -> ");
740 print_generic_stmt (dump_file, last->op);
743 ops->ordered_remove (i);
744 reassociate_stats.ops_eliminated ++;
746 return true;
748 case BIT_XOR_EXPR:
749 if (dump_file && (dump_flags & TDF_DETAILS))
751 fprintf (dump_file, "Equivalence: ");
752 print_generic_expr (dump_file, curr->op);
753 fprintf (dump_file, " ^ ");
754 print_generic_expr (dump_file, last->op);
755 fprintf (dump_file, " -> nothing\n");
758 reassociate_stats.ops_eliminated += 2;
760 if (ops->length () == 2)
762 ops->truncate (0);
763 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
764 *all_done = true;
766 else
768 ops->ordered_remove (i-1);
769 ops->ordered_remove (i-1);
772 return true;
774 default:
775 break;
778 return false;
781 static vec<tree> plus_negates;
783 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
784 expression, look in OPS for a corresponding positive operation to cancel
785 it out. If we find one, remove the other from OPS, replace
786 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
787 return false. */
789 static bool
790 eliminate_plus_minus_pair (enum tree_code opcode,
791 vec<operand_entry *> *ops,
792 unsigned int currindex,
793 operand_entry *curr)
795 tree negateop;
796 tree notop;
797 unsigned int i;
798 operand_entry *oe;
800 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
801 return false;
803 negateop = get_unary_op (curr->op, NEGATE_EXPR);
804 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
805 if (negateop == NULL_TREE && notop == NULL_TREE)
806 return false;
808 /* Any non-negated version will have a rank that is one less than
809 the current rank. So once we hit those ranks, if we don't find
810 one, we can stop. */
812 for (i = currindex + 1;
813 ops->iterate (i, &oe)
814 && oe->rank >= curr->rank - 1 ;
815 i++)
817 if (negateop
818 && ops_equal_values_p (oe->op, negateop))
820 if (dump_file && (dump_flags & TDF_DETAILS))
822 fprintf (dump_file, "Equivalence: ");
823 print_generic_expr (dump_file, negateop);
824 fprintf (dump_file, " + -");
825 print_generic_expr (dump_file, oe->op);
826 fprintf (dump_file, " -> 0\n");
829 ops->ordered_remove (i);
830 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
831 ops->ordered_remove (currindex);
832 reassociate_stats.ops_eliminated ++;
834 return true;
836 else if (notop
837 && ops_equal_values_p (oe->op, notop))
839 tree op_type = TREE_TYPE (oe->op);
841 if (dump_file && (dump_flags & TDF_DETAILS))
843 fprintf (dump_file, "Equivalence: ");
844 print_generic_expr (dump_file, notop);
845 fprintf (dump_file, " + ~");
846 print_generic_expr (dump_file, oe->op);
847 fprintf (dump_file, " -> -1\n");
850 ops->ordered_remove (i);
851 add_to_ops_vec (ops, build_all_ones_cst (op_type));
852 ops->ordered_remove (currindex);
853 reassociate_stats.ops_eliminated ++;
855 return true;
859 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
860 save it for later inspection in repropagate_negates(). */
861 if (negateop != NULL_TREE
862 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
863 plus_negates.safe_push (curr->op);
865 return false;
868 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
869 bitwise not expression, look in OPS for a corresponding operand to
870 cancel it out. If we find one, remove the other from OPS, replace
871 OPS[CURRINDEX] with 0, and return true. Otherwise, return
872 false. */
874 static bool
875 eliminate_not_pairs (enum tree_code opcode,
876 vec<operand_entry *> *ops,
877 unsigned int currindex,
878 operand_entry *curr)
880 tree notop;
881 unsigned int i;
882 operand_entry *oe;
884 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
885 || TREE_CODE (curr->op) != SSA_NAME)
886 return false;
888 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
889 if (notop == NULL_TREE)
890 return false;
892 /* Any non-not version will have a rank that is one less than
893 the current rank. So once we hit those ranks, if we don't find
894 one, we can stop. */
896 for (i = currindex + 1;
897 ops->iterate (i, &oe)
898 && oe->rank >= curr->rank - 1;
899 i++)
901 if (oe->op == notop)
903 if (dump_file && (dump_flags & TDF_DETAILS))
905 fprintf (dump_file, "Equivalence: ");
906 print_generic_expr (dump_file, notop);
907 if (opcode == BIT_AND_EXPR)
908 fprintf (dump_file, " & ~");
909 else if (opcode == BIT_IOR_EXPR)
910 fprintf (dump_file, " | ~");
911 print_generic_expr (dump_file, oe->op);
912 if (opcode == BIT_AND_EXPR)
913 fprintf (dump_file, " -> 0\n");
914 else if (opcode == BIT_IOR_EXPR)
915 fprintf (dump_file, " -> -1\n");
918 if (opcode == BIT_AND_EXPR)
919 oe->op = build_zero_cst (TREE_TYPE (oe->op));
920 else if (opcode == BIT_IOR_EXPR)
921 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
923 reassociate_stats.ops_eliminated += ops->length () - 1;
924 ops->truncate (0);
925 ops->quick_push (oe);
926 return true;
930 return false;
933 /* Use constant value that may be present in OPS to try to eliminate
934 operands. Note that this function is only really used when we've
935 eliminated ops for other reasons, or merged constants. Across
936 single statements, fold already does all of this, plus more. There
937 is little point in duplicating logic, so I've only included the
938 identities that I could ever construct testcases to trigger. */
940 static void
941 eliminate_using_constants (enum tree_code opcode,
942 vec<operand_entry *> *ops)
944 operand_entry *oelast = ops->last ();
945 tree type = TREE_TYPE (oelast->op);
947 if (oelast->rank == 0
948 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
950 switch (opcode)
952 case BIT_AND_EXPR:
953 if (integer_zerop (oelast->op))
955 if (ops->length () != 1)
957 if (dump_file && (dump_flags & TDF_DETAILS))
958 fprintf (dump_file, "Found & 0, removing all other ops\n");
960 reassociate_stats.ops_eliminated += ops->length () - 1;
962 ops->truncate (0);
963 ops->quick_push (oelast);
964 return;
967 else if (integer_all_onesp (oelast->op))
969 if (ops->length () != 1)
971 if (dump_file && (dump_flags & TDF_DETAILS))
972 fprintf (dump_file, "Found & -1, removing\n");
973 ops->pop ();
974 reassociate_stats.ops_eliminated++;
977 break;
978 case BIT_IOR_EXPR:
979 if (integer_all_onesp (oelast->op))
981 if (ops->length () != 1)
983 if (dump_file && (dump_flags & TDF_DETAILS))
984 fprintf (dump_file, "Found | -1, removing all other ops\n");
986 reassociate_stats.ops_eliminated += ops->length () - 1;
988 ops->truncate (0);
989 ops->quick_push (oelast);
990 return;
993 else if (integer_zerop (oelast->op))
995 if (ops->length () != 1)
997 if (dump_file && (dump_flags & TDF_DETAILS))
998 fprintf (dump_file, "Found | 0, removing\n");
999 ops->pop ();
1000 reassociate_stats.ops_eliminated++;
1003 break;
1004 case MULT_EXPR:
1005 if (integer_zerop (oelast->op)
1006 || (FLOAT_TYPE_P (type)
1007 && !HONOR_NANS (type)
1008 && !HONOR_SIGNED_ZEROS (type)
1009 && real_zerop (oelast->op)))
1011 if (ops->length () != 1)
1013 if (dump_file && (dump_flags & TDF_DETAILS))
1014 fprintf (dump_file, "Found * 0, removing all other ops\n");
1016 reassociate_stats.ops_eliminated += ops->length () - 1;
1017 ops->truncate (0);
1018 ops->quick_push (oelast);
1019 return;
1022 else if (integer_onep (oelast->op)
1023 || (FLOAT_TYPE_P (type)
1024 && !HONOR_SNANS (type)
1025 && real_onep (oelast->op)))
1027 if (ops->length () != 1)
1029 if (dump_file && (dump_flags & TDF_DETAILS))
1030 fprintf (dump_file, "Found * 1, removing\n");
1031 ops->pop ();
1032 reassociate_stats.ops_eliminated++;
1033 return;
1036 break;
1037 case BIT_XOR_EXPR:
1038 case PLUS_EXPR:
1039 case MINUS_EXPR:
1040 if (integer_zerop (oelast->op)
1041 || (FLOAT_TYPE_P (type)
1042 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1043 && fold_real_zero_addition_p (type, oelast->op,
1044 opcode == MINUS_EXPR)))
1046 if (ops->length () != 1)
1048 if (dump_file && (dump_flags & TDF_DETAILS))
1049 fprintf (dump_file, "Found [|^+] 0, removing\n");
1050 ops->pop ();
1051 reassociate_stats.ops_eliminated++;
1052 return;
1055 break;
1056 default:
1057 break;
1063 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1064 bool, bool);
1066 /* Structure for tracking and counting operands. */
1067 struct oecount {
1068 unsigned int cnt;
1069 unsigned int id;
1070 enum tree_code oecode;
1071 tree op;
1075 /* The heap for the oecount hashtable and the sorted list of operands. */
1076 static vec<oecount> cvec;
1079 /* Oecount hashtable helpers. */
1081 struct oecount_hasher : int_hash <int, 0, 1>
1083 static inline hashval_t hash (int);
1084 static inline bool equal (int, int);
1087 /* Hash function for oecount. */
1089 inline hashval_t
1090 oecount_hasher::hash (int p)
1092 const oecount *c = &cvec[p - 42];
1093 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1096 /* Comparison function for oecount. */
1098 inline bool
1099 oecount_hasher::equal (int p1, int p2)
1101 const oecount *c1 = &cvec[p1 - 42];
1102 const oecount *c2 = &cvec[p2 - 42];
1103 return c1->oecode == c2->oecode && c1->op == c2->op;
1106 /* Comparison function for qsort sorting oecount elements by count. */
1108 static int
1109 oecount_cmp (const void *p1, const void *p2)
1111 const oecount *c1 = (const oecount *)p1;
1112 const oecount *c2 = (const oecount *)p2;
1113 if (c1->cnt != c2->cnt)
1114 return c1->cnt > c2->cnt ? 1 : -1;
1115 else
1116 /* If counts are identical, use unique IDs to stabilize qsort. */
1117 return c1->id > c2->id ? 1 : -1;
1120 /* Return TRUE iff STMT represents a builtin call that raises OP
1121 to some exponent. */
1123 static bool
1124 stmt_is_power_of_op (gimple *stmt, tree op)
1126 if (!is_gimple_call (stmt))
1127 return false;
1129 switch (gimple_call_combined_fn (stmt))
1131 CASE_CFN_POW:
1132 CASE_CFN_POWI:
1133 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1135 default:
1136 return false;
1140 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1141 in place and return the result. Assumes that stmt_is_power_of_op
1142 was previously called for STMT and returned TRUE. */
1144 static HOST_WIDE_INT
1145 decrement_power (gimple *stmt)
1147 REAL_VALUE_TYPE c, cint;
1148 HOST_WIDE_INT power;
1149 tree arg1;
1151 switch (gimple_call_combined_fn (stmt))
1153 CASE_CFN_POW:
1154 arg1 = gimple_call_arg (stmt, 1);
1155 c = TREE_REAL_CST (arg1);
1156 power = real_to_integer (&c) - 1;
1157 real_from_integer (&cint, VOIDmode, power, SIGNED);
1158 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1159 return power;
1161 CASE_CFN_POWI:
1162 arg1 = gimple_call_arg (stmt, 1);
1163 power = TREE_INT_CST_LOW (arg1) - 1;
1164 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1165 return power;
1167 default:
1168 gcc_unreachable ();
1172 /* Replace SSA defined by STMT and replace all its uses with new
1173 SSA. Also return the new SSA. */
1175 static tree
1176 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1178 gimple *use_stmt;
1179 use_operand_p use;
1180 imm_use_iterator iter;
1181 tree new_lhs, new_debug_lhs = NULL_TREE;
1182 tree lhs = gimple_get_lhs (stmt);
1184 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1185 gimple_set_lhs (stmt, new_lhs);
1187 /* Also need to update GIMPLE_DEBUGs. */
1188 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1190 tree repl = new_lhs;
1191 if (is_gimple_debug (use_stmt))
1193 if (new_debug_lhs == NULL_TREE)
1195 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1196 gdebug *def_temp
1197 = gimple_build_debug_bind (new_debug_lhs,
1198 build2 (opcode, TREE_TYPE (lhs),
1199 new_lhs, op),
1200 stmt);
1201 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1202 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1203 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1204 gimple_set_uid (def_temp, gimple_uid (stmt));
1205 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1206 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1208 repl = new_debug_lhs;
1210 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1211 SET_USE (use, repl);
1212 update_stmt (use_stmt);
1214 return new_lhs;
1217 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1218 uses with new SSAs. Also do this for the stmt that defines DEF
1219 if *DEF is not OP. */
1221 static void
1222 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1223 vec<gimple *> &stmts_to_fix)
1225 unsigned i;
1226 gimple *stmt;
1228 if (*def != op
1229 && TREE_CODE (*def) == SSA_NAME
1230 && (stmt = SSA_NAME_DEF_STMT (*def))
1231 && gimple_code (stmt) != GIMPLE_NOP)
1232 *def = make_new_ssa_for_def (stmt, opcode, op);
1234 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1235 make_new_ssa_for_def (stmt, opcode, op);
1238 /* Find the single immediate use of STMT's LHS, and replace it
1239 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1240 replace *DEF with OP as well. */
1242 static void
1243 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1245 tree lhs;
1246 gimple *use_stmt;
1247 use_operand_p use;
1248 gimple_stmt_iterator gsi;
1250 if (is_gimple_call (stmt))
1251 lhs = gimple_call_lhs (stmt);
1252 else
1253 lhs = gimple_assign_lhs (stmt);
1255 gcc_assert (has_single_use (lhs));
1256 single_imm_use (lhs, &use, &use_stmt);
1257 if (lhs == *def)
1258 *def = op;
1259 SET_USE (use, op);
1260 if (TREE_CODE (op) != SSA_NAME)
1261 update_stmt (use_stmt);
1262 gsi = gsi_for_stmt (stmt);
1263 unlink_stmt_vdef (stmt);
1264 reassoc_remove_stmt (&gsi);
1265 release_defs (stmt);
1268 /* Walks the linear chain with result *DEF searching for an operation
1269 with operand OP and code OPCODE removing that from the chain. *DEF
1270 is updated if there is only one operand but no operation left. */
1272 static void
1273 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1275 tree orig_def = *def;
1276 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1277 /* PR72835 - Record the stmt chain that has to be updated such that
1278 we dont use the same LHS when the values computed are different. */
1279 auto_vec<gimple *, 64> stmts_to_fix;
1283 tree name;
1285 if (opcode == MULT_EXPR)
1287 if (stmt_is_power_of_op (stmt, op))
1289 if (decrement_power (stmt) == 1)
1291 if (stmts_to_fix.length () > 0)
1292 stmts_to_fix.pop ();
1293 propagate_op_to_single_use (op, stmt, def);
1295 break;
1297 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1299 if (gimple_assign_rhs1 (stmt) == op)
1301 tree cst = build_minus_one_cst (TREE_TYPE (op));
1302 if (stmts_to_fix.length () > 0)
1303 stmts_to_fix.pop ();
1304 propagate_op_to_single_use (cst, stmt, def);
1305 break;
1307 else if (integer_minus_onep (op)
1308 || real_minus_onep (op))
1310 gimple_assign_set_rhs_code
1311 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1312 break;
1317 name = gimple_assign_rhs1 (stmt);
1319 /* If this is the operation we look for and one of the operands
1320 is ours simply propagate the other operand into the stmts
1321 single use. */
1322 if (gimple_assign_rhs_code (stmt) == opcode
1323 && (name == op
1324 || gimple_assign_rhs2 (stmt) == op))
1326 if (name == op)
1327 name = gimple_assign_rhs2 (stmt);
1328 if (stmts_to_fix.length () > 0)
1329 stmts_to_fix.pop ();
1330 propagate_op_to_single_use (name, stmt, def);
1331 break;
1334 /* We might have a multiply of two __builtin_pow* calls, and
1335 the operand might be hiding in the rightmost one. Likewise
1336 this can happen for a negate. */
1337 if (opcode == MULT_EXPR
1338 && gimple_assign_rhs_code (stmt) == opcode
1339 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1340 && has_single_use (gimple_assign_rhs2 (stmt)))
1342 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1343 if (stmt_is_power_of_op (stmt2, op))
1345 if (decrement_power (stmt2) == 1)
1346 propagate_op_to_single_use (op, stmt2, def);
1347 else
1348 stmts_to_fix.safe_push (stmt2);
1349 break;
1351 else if (is_gimple_assign (stmt2)
1352 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1354 if (gimple_assign_rhs1 (stmt2) == op)
1356 tree cst = build_minus_one_cst (TREE_TYPE (op));
1357 propagate_op_to_single_use (cst, stmt2, def);
1358 break;
1360 else if (integer_minus_onep (op)
1361 || real_minus_onep (op))
1363 stmts_to_fix.safe_push (stmt2);
1364 gimple_assign_set_rhs_code
1365 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1366 break;
1371 /* Continue walking the chain. */
1372 gcc_assert (name != op
1373 && TREE_CODE (name) == SSA_NAME);
1374 stmt = SSA_NAME_DEF_STMT (name);
1375 stmts_to_fix.safe_push (stmt);
1377 while (1);
1379 if (stmts_to_fix.length () > 0 || *def == orig_def)
1380 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1383 /* Returns true if statement S1 dominates statement S2. Like
1384 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1386 static bool
1387 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1389 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1391 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1392 SSA_NAME. Assume it lives at the beginning of function and
1393 thus dominates everything. */
1394 if (!bb1 || s1 == s2)
1395 return true;
1397 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1398 if (!bb2)
1399 return false;
1401 if (bb1 == bb2)
1403 /* PHIs in the same basic block are assumed to be
1404 executed all in parallel, if only one stmt is a PHI,
1405 it dominates the other stmt in the same basic block. */
1406 if (gimple_code (s1) == GIMPLE_PHI)
1407 return true;
1409 if (gimple_code (s2) == GIMPLE_PHI)
1410 return false;
1412 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1414 if (gimple_uid (s1) < gimple_uid (s2))
1415 return true;
1417 if (gimple_uid (s1) > gimple_uid (s2))
1418 return false;
1420 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1421 unsigned int uid = gimple_uid (s1);
1422 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1424 gimple *s = gsi_stmt (gsi);
1425 if (gimple_uid (s) != uid)
1426 break;
1427 if (s == s2)
1428 return true;
1431 return false;
1434 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1437 /* Insert STMT after INSERT_POINT. */
1439 static void
1440 insert_stmt_after (gimple *stmt, gimple *insert_point)
1442 gimple_stmt_iterator gsi;
1443 basic_block bb;
1445 if (gimple_code (insert_point) == GIMPLE_PHI)
1446 bb = gimple_bb (insert_point);
1447 else if (!stmt_ends_bb_p (insert_point))
1449 gsi = gsi_for_stmt (insert_point);
1450 gimple_set_uid (stmt, gimple_uid (insert_point));
1451 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1452 return;
1454 else
1455 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1456 thus if it must end a basic block, it should be a call that can
1457 throw, or some assignment that can throw. If it throws, the LHS
1458 of it will not be initialized though, so only valid places using
1459 the SSA_NAME should be dominated by the fallthru edge. */
1460 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1461 gsi = gsi_after_labels (bb);
1462 if (gsi_end_p (gsi))
1464 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1465 gimple_set_uid (stmt,
1466 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1468 else
1469 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1470 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1473 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1474 the result. Places the statement after the definition of either
1475 OP1 or OP2. Returns the new statement. */
1477 static gimple *
1478 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1480 gimple *op1def = NULL, *op2def = NULL;
1481 gimple_stmt_iterator gsi;
1482 tree op;
1483 gassign *sum;
1485 /* Create the addition statement. */
1486 op = make_ssa_name (type);
1487 sum = gimple_build_assign (op, opcode, op1, op2);
1489 /* Find an insertion place and insert. */
1490 if (TREE_CODE (op1) == SSA_NAME)
1491 op1def = SSA_NAME_DEF_STMT (op1);
1492 if (TREE_CODE (op2) == SSA_NAME)
1493 op2def = SSA_NAME_DEF_STMT (op2);
1494 if ((!op1def || gimple_nop_p (op1def))
1495 && (!op2def || gimple_nop_p (op2def)))
1497 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1498 if (gsi_end_p (gsi))
1500 gimple_stmt_iterator gsi2
1501 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1502 gimple_set_uid (sum,
1503 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1505 else
1506 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1507 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1509 else
1511 gimple *insert_point;
1512 if ((!op1def || gimple_nop_p (op1def))
1513 || (op2def && !gimple_nop_p (op2def)
1514 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1515 insert_point = op2def;
1516 else
1517 insert_point = op1def;
1518 insert_stmt_after (sum, insert_point);
1520 update_stmt (sum);
1522 return sum;
1525 /* Perform un-distribution of divisions and multiplications.
1526 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1527 to (A + B) / X for real X.
1529 The algorithm is organized as follows.
1531 - First we walk the addition chain *OPS looking for summands that
1532 are defined by a multiplication or a real division. This results
1533 in the candidates bitmap with relevant indices into *OPS.
1535 - Second we build the chains of multiplications or divisions for
1536 these candidates, counting the number of occurrences of (operand, code)
1537 pairs in all of the candidates chains.
1539 - Third we sort the (operand, code) pairs by number of occurrence and
1540 process them starting with the pair with the most uses.
1542 * For each such pair we walk the candidates again to build a
1543 second candidate bitmap noting all multiplication/division chains
1544 that have at least one occurrence of (operand, code).
1546 * We build an alternate addition chain only covering these
1547 candidates with one (operand, code) operation removed from their
1548 multiplication/division chain.
1550 * The first candidate gets replaced by the alternate addition chain
1551 multiplied/divided by the operand.
1553 * All candidate chains get disabled for further processing and
1554 processing of (operand, code) pairs continues.
1556 The alternate addition chains built are re-processed by the main
1557 reassociation algorithm which allows optimizing a * x * y + b * y * x
1558 to (a + b ) * x * y in one invocation of the reassociation pass. */
1560 static bool
1561 undistribute_ops_list (enum tree_code opcode,
1562 vec<operand_entry *> *ops, class loop *loop)
1564 unsigned int length = ops->length ();
1565 operand_entry *oe1;
1566 unsigned i, j;
1567 unsigned nr_candidates, nr_candidates2;
1568 sbitmap_iterator sbi0;
1569 vec<operand_entry *> *subops;
1570 bool changed = false;
1571 unsigned int next_oecount_id = 0;
1573 if (length <= 1
1574 || opcode != PLUS_EXPR)
1575 return false;
1577 /* Build a list of candidates to process. */
1578 auto_sbitmap candidates (length);
1579 bitmap_clear (candidates);
1580 nr_candidates = 0;
1581 FOR_EACH_VEC_ELT (*ops, i, oe1)
1583 enum tree_code dcode;
1584 gimple *oe1def;
1586 if (TREE_CODE (oe1->op) != SSA_NAME)
1587 continue;
1588 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1589 if (!is_gimple_assign (oe1def))
1590 continue;
1591 dcode = gimple_assign_rhs_code (oe1def);
1592 if ((dcode != MULT_EXPR
1593 && dcode != RDIV_EXPR)
1594 || !is_reassociable_op (oe1def, dcode, loop))
1595 continue;
1597 bitmap_set_bit (candidates, i);
1598 nr_candidates++;
1601 if (nr_candidates < 2)
1602 return false;
1604 if (dump_file && (dump_flags & TDF_DETAILS))
1606 fprintf (dump_file, "searching for un-distribute opportunities ");
1607 print_generic_expr (dump_file,
1608 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1609 fprintf (dump_file, " %d\n", nr_candidates);
1612 /* Build linearized sub-operand lists and the counting table. */
1613 cvec.create (0);
1615 hash_table<oecount_hasher> ctable (15);
1617 /* ??? Macro arguments cannot have multi-argument template types in
1618 them. This typedef is needed to workaround that limitation. */
1619 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1620 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1621 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1623 gimple *oedef;
1624 enum tree_code oecode;
1625 unsigned j;
1627 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1628 oecode = gimple_assign_rhs_code (oedef);
1629 linearize_expr_tree (&subops[i], oedef,
1630 associative_tree_code (oecode), false);
1632 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1634 oecount c;
1635 int *slot;
1636 int idx;
1637 c.oecode = oecode;
1638 c.cnt = 1;
1639 c.id = next_oecount_id++;
1640 c.op = oe1->op;
1641 cvec.safe_push (c);
1642 idx = cvec.length () + 41;
1643 slot = ctable.find_slot (idx, INSERT);
1644 if (!*slot)
1646 *slot = idx;
1648 else
1650 cvec.pop ();
1651 cvec[*slot - 42].cnt++;
1656 /* Sort the counting table. */
1657 cvec.qsort (oecount_cmp);
1659 if (dump_file && (dump_flags & TDF_DETAILS))
1661 oecount *c;
1662 fprintf (dump_file, "Candidates:\n");
1663 FOR_EACH_VEC_ELT (cvec, j, c)
1665 fprintf (dump_file, " %u %s: ", c->cnt,
1666 c->oecode == MULT_EXPR
1667 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1668 print_generic_expr (dump_file, c->op);
1669 fprintf (dump_file, "\n");
1673 /* Process the (operand, code) pairs in order of most occurrence. */
1674 auto_sbitmap candidates2 (length);
1675 while (!cvec.is_empty ())
1677 oecount *c = &cvec.last ();
1678 if (c->cnt < 2)
1679 break;
1681 /* Now collect the operands in the outer chain that contain
1682 the common operand in their inner chain. */
1683 bitmap_clear (candidates2);
1684 nr_candidates2 = 0;
1685 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1687 gimple *oedef;
1688 enum tree_code oecode;
1689 unsigned j;
1690 tree op = (*ops)[i]->op;
1692 /* If we undistributed in this chain already this may be
1693 a constant. */
1694 if (TREE_CODE (op) != SSA_NAME)
1695 continue;
1697 oedef = SSA_NAME_DEF_STMT (op);
1698 oecode = gimple_assign_rhs_code (oedef);
1699 if (oecode != c->oecode)
1700 continue;
1702 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1704 if (oe1->op == c->op)
1706 bitmap_set_bit (candidates2, i);
1707 ++nr_candidates2;
1708 break;
1713 if (nr_candidates2 >= 2)
1715 operand_entry *oe1, *oe2;
1716 gimple *prod;
1717 int first = bitmap_first_set_bit (candidates2);
1719 /* Build the new addition chain. */
1720 oe1 = (*ops)[first];
1721 if (dump_file && (dump_flags & TDF_DETAILS))
1723 fprintf (dump_file, "Building (");
1724 print_generic_expr (dump_file, oe1->op);
1726 zero_one_operation (&oe1->op, c->oecode, c->op);
1727 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1729 gimple *sum;
1730 oe2 = (*ops)[i];
1731 if (dump_file && (dump_flags & TDF_DETAILS))
1733 fprintf (dump_file, " + ");
1734 print_generic_expr (dump_file, oe2->op);
1736 zero_one_operation (&oe2->op, c->oecode, c->op);
1737 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1738 oe1->op, oe2->op, opcode);
1739 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1740 oe2->rank = 0;
1741 oe1->op = gimple_get_lhs (sum);
1744 /* Apply the multiplication/division. */
1745 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1746 oe1->op, c->op, c->oecode);
1747 if (dump_file && (dump_flags & TDF_DETAILS))
1749 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1750 print_generic_expr (dump_file, c->op);
1751 fprintf (dump_file, "\n");
1754 /* Record it in the addition chain and disable further
1755 undistribution with this op. */
1756 oe1->op = gimple_assign_lhs (prod);
1757 oe1->rank = get_rank (oe1->op);
1758 subops[first].release ();
1760 changed = true;
1763 cvec.pop ();
1766 for (i = 0; i < ops->length (); ++i)
1767 subops[i].release ();
1768 free (subops);
1769 cvec.release ();
1771 return changed;
1774 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1775 first: element index for each relevant BIT_FIELD_REF.
1776 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1777 typedef std::pair<unsigned, unsigned> v_info_elem;
1778 struct v_info {
1779 tree vec_type;
1780 auto_vec<v_info_elem, 32> vec;
1782 typedef v_info *v_info_ptr;
1784 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1785 static int
1786 sort_by_mach_mode (const void *p_i, const void *p_j)
1788 const tree tr1 = *((const tree *) p_i);
1789 const tree tr2 = *((const tree *) p_j);
1790 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1791 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1792 if (mode1 > mode2)
1793 return 1;
1794 else if (mode1 < mode2)
1795 return -1;
1796 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1797 return -1;
1798 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1799 return 1;
1800 return 0;
1803 /* Cleanup hash map for VECTOR information. */
1804 static void
1805 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1807 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1808 it != info_map.end (); ++it)
1810 v_info_ptr info = (*it).second;
1811 delete info;
1812 (*it).second = NULL;
1816 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1817 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1818 is transformed to
1819 Vs = (V1 + V2 + ... + Vn)
1820 Vs[0] + Vs[1] + ... + Vs[k]
1822 The basic steps are listed below:
1824 1) Check the addition chain *OPS by looking those summands coming from
1825 VECTOR bit_field_ref on VECTOR type. Put the information into
1826 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1828 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1829 continuous, they can cover the whole VECTOR perfectly without any holes.
1830 Obtain one VECTOR list which contain candidates to be transformed.
1832 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1833 candidates with same mode, build the addition statements for them and
1834 generate BIT_FIELD_REFs accordingly.
1836 TODO:
1837 The current implementation requires the whole VECTORs should be fully
1838 covered, but it can be extended to support partial, checking adjacent
1839 but not fill the whole, it may need some cost model to define the
1840 boundary to do or not.
1842 static bool
1843 undistribute_bitref_for_vector (enum tree_code opcode,
1844 vec<operand_entry *> *ops, struct loop *loop)
1846 if (ops->length () <= 1)
1847 return false;
1849 if (opcode != PLUS_EXPR
1850 && opcode != MULT_EXPR
1851 && opcode != BIT_XOR_EXPR
1852 && opcode != BIT_IOR_EXPR
1853 && opcode != BIT_AND_EXPR)
1854 return false;
1856 hash_map<tree, v_info_ptr> v_info_map;
1857 operand_entry *oe1;
1858 unsigned i;
1860 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1861 information into map. */
1862 FOR_EACH_VEC_ELT (*ops, i, oe1)
1864 enum tree_code dcode;
1865 gimple *oe1def;
1867 if (TREE_CODE (oe1->op) != SSA_NAME)
1868 continue;
1869 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1870 if (!is_gimple_assign (oe1def))
1871 continue;
1872 dcode = gimple_assign_rhs_code (oe1def);
1873 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1874 continue;
1876 tree rhs = gimple_assign_rhs1 (oe1def);
1877 tree vec = TREE_OPERAND (rhs, 0);
1878 tree vec_type = TREE_TYPE (vec);
1880 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1881 continue;
1883 /* Ignore it if target machine can't support this VECTOR type. */
1884 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1885 continue;
1887 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1888 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1889 continue;
1891 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1892 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1893 continue;
1895 /* The type of BIT_FIELD_REF might not be equal to the element type of
1896 the vector. We want to use a vector type with element type the
1897 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1898 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1900 machine_mode simd_mode;
1901 unsigned HOST_WIDE_INT size, nunits;
1902 unsigned HOST_WIDE_INT elem_size
1903 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1904 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1905 continue;
1906 if (size <= elem_size || (size % elem_size) != 0)
1907 continue;
1908 nunits = size / elem_size;
1909 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1910 nunits).exists (&simd_mode))
1911 continue;
1912 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1914 /* Ignore it if target machine can't support this VECTOR type. */
1915 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1916 continue;
1918 /* Check const vector type, constrain BIT_FIELD_REF offset and
1919 size. */
1920 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1921 continue;
1923 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1924 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1925 continue;
1928 tree elem_type = TREE_TYPE (vec_type);
1929 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
1930 if (maybe_ne (bit_field_size (rhs), elem_size))
1931 continue;
1933 unsigned idx;
1934 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
1935 continue;
1937 /* Ignore it if target machine can't support this type of VECTOR
1938 operation. */
1939 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
1940 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
1941 continue;
1943 bool existed;
1944 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
1945 if (!existed)
1947 info = new v_info;
1948 info->vec_type = vec_type;
1950 else if (!types_compatible_p (vec_type, info->vec_type))
1951 continue;
1952 info->vec.safe_push (std::make_pair (idx, i));
1955 /* At least two VECTOR to combine. */
1956 if (v_info_map.elements () <= 1)
1958 cleanup_vinfo_map (v_info_map);
1959 return false;
1962 /* Verify all VECTOR candidates by checking two conditions:
1963 1) sorted offsets are adjacent, no holes.
1964 2) can fill the whole VECTOR perfectly.
1965 And add the valid candidates to a vector for further handling. */
1966 auto_vec<tree> valid_vecs (v_info_map.elements ());
1967 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
1968 it != v_info_map.end (); ++it)
1970 tree cand_vec = (*it).first;
1971 v_info_ptr cand_info = (*it).second;
1972 unsigned int num_elems
1973 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
1974 if (cand_info->vec.length () != num_elems)
1975 continue;
1976 sbitmap holes = sbitmap_alloc (num_elems);
1977 bitmap_ones (holes);
1978 bool valid = true;
1979 v_info_elem *curr;
1980 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
1982 if (!bitmap_bit_p (holes, curr->first))
1984 valid = false;
1985 break;
1987 else
1988 bitmap_clear_bit (holes, curr->first);
1990 if (valid && bitmap_empty_p (holes))
1991 valid_vecs.quick_push (cand_vec);
1992 sbitmap_free (holes);
1995 /* At least two VECTOR to combine. */
1996 if (valid_vecs.length () <= 1)
1998 cleanup_vinfo_map (v_info_map);
1999 return false;
2002 valid_vecs.qsort (sort_by_mach_mode);
2003 /* Go through all candidates by machine mode order, query the mode_to_total
2004 to get the total number for each mode and skip the single one. */
2005 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2007 tree tvec = valid_vecs[i];
2008 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2010 /* Skip modes with only a single candidate. */
2011 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2012 continue;
2014 unsigned int idx, j;
2015 gimple *sum = NULL;
2016 tree sum_vec = tvec;
2017 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2018 v_info_elem *elem;
2019 tree vec_type = info_ptr->vec_type;
2021 /* Build the sum for all candidates with same mode. */
2024 sum = build_and_add_sum (vec_type, sum_vec,
2025 valid_vecs[i + 1], opcode);
2026 if (!useless_type_conversion_p (vec_type,
2027 TREE_TYPE (valid_vecs[i + 1])))
2029 /* Update the operands only after build_and_add_sum,
2030 so that we don't have to repeat the placement algorithm
2031 of build_and_add_sum. */
2032 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2033 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2034 valid_vecs[i + 1]);
2035 tree lhs = make_ssa_name (vec_type);
2036 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2037 gimple_set_uid (g, gimple_uid (sum));
2038 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2039 gimple_assign_set_rhs2 (sum, lhs);
2040 if (sum_vec == tvec)
2042 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2043 lhs = make_ssa_name (vec_type);
2044 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2045 gimple_set_uid (g, gimple_uid (sum));
2046 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2047 gimple_assign_set_rhs1 (sum, lhs);
2049 update_stmt (sum);
2051 sum_vec = gimple_get_lhs (sum);
2052 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2053 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2054 /* Update those related ops of current candidate VECTOR. */
2055 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2057 idx = elem->second;
2058 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2059 /* Set this then op definition will get DCEd later. */
2060 gimple_set_visited (def, true);
2061 if (opcode == PLUS_EXPR
2062 || opcode == BIT_XOR_EXPR
2063 || opcode == BIT_IOR_EXPR)
2064 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2065 else if (opcode == MULT_EXPR)
2066 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2067 else
2069 gcc_assert (opcode == BIT_AND_EXPR);
2070 (*ops)[idx]->op
2071 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2073 (*ops)[idx]->rank = 0;
2075 if (dump_file && (dump_flags & TDF_DETAILS))
2077 fprintf (dump_file, "Generating addition -> ");
2078 print_gimple_stmt (dump_file, sum, 0);
2080 i++;
2082 while ((i < valid_vecs.length () - 1)
2083 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2085 /* Referring to first valid VECTOR with this mode, generate the
2086 BIT_FIELD_REF statements accordingly. */
2087 info_ptr = *(v_info_map.get (tvec));
2088 gcc_assert (sum);
2089 tree elem_type = TREE_TYPE (vec_type);
2090 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2092 idx = elem->second;
2093 tree dst = make_ssa_name (elem_type);
2094 tree pos = bitsize_int (elem->first
2095 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2096 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2097 TYPE_SIZE (elem_type), pos);
2098 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2099 insert_stmt_after (gs, sum);
2100 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2101 /* Set this then op definition will get DCEd later. */
2102 gimple_set_visited (def, true);
2103 (*ops)[idx]->op = gimple_assign_lhs (gs);
2104 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2105 if (dump_file && (dump_flags & TDF_DETAILS))
2107 fprintf (dump_file, "Generating bit_field_ref -> ");
2108 print_gimple_stmt (dump_file, gs, 0);
2113 if (dump_file && (dump_flags & TDF_DETAILS))
2114 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2116 cleanup_vinfo_map (v_info_map);
2118 return true;
2121 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2122 expression, examine the other OPS to see if any of them are comparisons
2123 of the same values, which we may be able to combine or eliminate.
2124 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2126 static bool
2127 eliminate_redundant_comparison (enum tree_code opcode,
2128 vec<operand_entry *> *ops,
2129 unsigned int currindex,
2130 operand_entry *curr)
2132 tree op1, op2;
2133 enum tree_code lcode, rcode;
2134 gimple *def1, *def2;
2135 int i;
2136 operand_entry *oe;
2138 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2139 return false;
2141 /* Check that CURR is a comparison. */
2142 if (TREE_CODE (curr->op) != SSA_NAME)
2143 return false;
2144 def1 = SSA_NAME_DEF_STMT (curr->op);
2145 if (!is_gimple_assign (def1))
2146 return false;
2147 lcode = gimple_assign_rhs_code (def1);
2148 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2149 return false;
2150 op1 = gimple_assign_rhs1 (def1);
2151 op2 = gimple_assign_rhs2 (def1);
2153 /* Now look for a similar comparison in the remaining OPS. */
2154 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2156 tree t;
2158 if (TREE_CODE (oe->op) != SSA_NAME)
2159 continue;
2160 def2 = SSA_NAME_DEF_STMT (oe->op);
2161 if (!is_gimple_assign (def2))
2162 continue;
2163 rcode = gimple_assign_rhs_code (def2);
2164 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2165 continue;
2167 /* If we got here, we have a match. See if we can combine the
2168 two comparisons. */
2169 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2170 if (opcode == BIT_IOR_EXPR)
2171 t = maybe_fold_or_comparisons (type,
2172 lcode, op1, op2,
2173 rcode, gimple_assign_rhs1 (def2),
2174 gimple_assign_rhs2 (def2));
2175 else
2176 t = maybe_fold_and_comparisons (type,
2177 lcode, op1, op2,
2178 rcode, gimple_assign_rhs1 (def2),
2179 gimple_assign_rhs2 (def2));
2180 if (!t)
2181 continue;
2183 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2184 always give us a boolean_type_node value back. If the original
2185 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2186 we need to convert. */
2187 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2188 t = fold_convert (TREE_TYPE (curr->op), t);
2190 if (TREE_CODE (t) != INTEGER_CST
2191 && !operand_equal_p (t, curr->op, 0))
2193 enum tree_code subcode;
2194 tree newop1, newop2;
2195 if (!COMPARISON_CLASS_P (t))
2196 continue;
2197 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2198 STRIP_USELESS_TYPE_CONVERSION (newop1);
2199 STRIP_USELESS_TYPE_CONVERSION (newop2);
2200 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2201 continue;
2204 if (dump_file && (dump_flags & TDF_DETAILS))
2206 fprintf (dump_file, "Equivalence: ");
2207 print_generic_expr (dump_file, curr->op);
2208 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2209 print_generic_expr (dump_file, oe->op);
2210 fprintf (dump_file, " -> ");
2211 print_generic_expr (dump_file, t);
2212 fprintf (dump_file, "\n");
2215 /* Now we can delete oe, as it has been subsumed by the new combined
2216 expression t. */
2217 ops->ordered_remove (i);
2218 reassociate_stats.ops_eliminated ++;
2220 /* If t is the same as curr->op, we're done. Otherwise we must
2221 replace curr->op with t. Special case is if we got a constant
2222 back, in which case we add it to the end instead of in place of
2223 the current entry. */
2224 if (TREE_CODE (t) == INTEGER_CST)
2226 ops->ordered_remove (currindex);
2227 add_to_ops_vec (ops, t);
2229 else if (!operand_equal_p (t, curr->op, 0))
2231 gimple *sum;
2232 enum tree_code subcode;
2233 tree newop1;
2234 tree newop2;
2235 gcc_assert (COMPARISON_CLASS_P (t));
2236 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2237 STRIP_USELESS_TYPE_CONVERSION (newop1);
2238 STRIP_USELESS_TYPE_CONVERSION (newop2);
2239 gcc_checking_assert (is_gimple_val (newop1)
2240 && is_gimple_val (newop2));
2241 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2242 curr->op = gimple_get_lhs (sum);
2244 return true;
2247 return false;
2251 /* Transform repeated addition of same values into multiply with
2252 constant. */
2253 static bool
2254 transform_add_to_multiply (vec<operand_entry *> *ops)
2256 operand_entry *oe;
2257 tree op = NULL_TREE;
2258 int j;
2259 int i, start = -1, end = 0, count = 0;
2260 auto_vec<std::pair <int, int> > indxs;
2261 bool changed = false;
2263 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2264 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2265 || !flag_unsafe_math_optimizations))
2266 return false;
2268 /* Look for repeated operands. */
2269 FOR_EACH_VEC_ELT (*ops, i, oe)
2271 if (start == -1)
2273 count = 1;
2274 op = oe->op;
2275 start = i;
2277 else if (operand_equal_p (oe->op, op, 0))
2279 count++;
2280 end = i;
2282 else
2284 if (count > 1)
2285 indxs.safe_push (std::make_pair (start, end));
2286 count = 1;
2287 op = oe->op;
2288 start = i;
2292 if (count > 1)
2293 indxs.safe_push (std::make_pair (start, end));
2295 for (j = indxs.length () - 1; j >= 0; --j)
2297 /* Convert repeated operand addition to multiplication. */
2298 start = indxs[j].first;
2299 end = indxs[j].second;
2300 op = (*ops)[start]->op;
2301 count = end - start + 1;
2302 for (i = end; i >= start; --i)
2303 ops->unordered_remove (i);
2304 tree tmp = make_ssa_name (TREE_TYPE (op));
2305 tree cst = build_int_cst (integer_type_node, count);
2306 gassign *mul_stmt
2307 = gimple_build_assign (tmp, MULT_EXPR,
2308 op, fold_convert (TREE_TYPE (op), cst));
2309 gimple_set_visited (mul_stmt, true);
2310 add_to_ops_vec (ops, tmp, mul_stmt);
2311 changed = true;
2314 return changed;
2318 /* Perform various identities and other optimizations on the list of
2319 operand entries, stored in OPS. The tree code for the binary
2320 operation between all the operands is OPCODE. */
2322 static void
2323 optimize_ops_list (enum tree_code opcode,
2324 vec<operand_entry *> *ops)
2326 unsigned int length = ops->length ();
2327 unsigned int i;
2328 operand_entry *oe;
2329 operand_entry *oelast = NULL;
2330 bool iterate = false;
2332 if (length == 1)
2333 return;
2335 oelast = ops->last ();
2337 /* If the last two are constants, pop the constants off, merge them
2338 and try the next two. */
2339 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2341 operand_entry *oelm1 = (*ops)[length - 2];
2343 if (oelm1->rank == 0
2344 && is_gimple_min_invariant (oelm1->op)
2345 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2346 TREE_TYPE (oelast->op)))
2348 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2349 oelm1->op, oelast->op);
2351 if (folded && is_gimple_min_invariant (folded))
2353 if (dump_file && (dump_flags & TDF_DETAILS))
2354 fprintf (dump_file, "Merging constants\n");
2356 ops->pop ();
2357 ops->pop ();
2359 add_to_ops_vec (ops, folded);
2360 reassociate_stats.constants_eliminated++;
2362 optimize_ops_list (opcode, ops);
2363 return;
2368 eliminate_using_constants (opcode, ops);
2369 oelast = NULL;
2371 for (i = 0; ops->iterate (i, &oe);)
2373 bool done = false;
2375 if (eliminate_not_pairs (opcode, ops, i, oe))
2376 return;
2377 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2378 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2379 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2381 if (done)
2382 return;
2383 iterate = true;
2384 oelast = NULL;
2385 continue;
2387 oelast = oe;
2388 i++;
2391 if (iterate)
2392 optimize_ops_list (opcode, ops);
2395 /* The following functions are subroutines to optimize_range_tests and allow
2396 it to try to change a logical combination of comparisons into a range
2397 test.
2399 For example, both
2400 X == 2 || X == 5 || X == 3 || X == 4
2402 X >= 2 && X <= 5
2403 are converted to
2404 (unsigned) (X - 2) <= 3
2406 For more information see comments above fold_test_range in fold-const.c,
2407 this implementation is for GIMPLE. */
2409 struct range_entry
2411 tree exp;
2412 tree low;
2413 tree high;
2414 bool in_p;
2415 bool strict_overflow_p;
2416 unsigned int idx, next;
2419 void dump_range_entry (FILE *file, struct range_entry *r);
2420 void debug_range_entry (struct range_entry *r);
2422 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2424 void
2425 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2427 if (!skip_exp)
2428 print_generic_expr (file, r->exp);
2429 fprintf (file, " %c[", r->in_p ? '+' : '-');
2430 print_generic_expr (file, r->low);
2431 fputs (", ", file);
2432 print_generic_expr (file, r->high);
2433 fputc (']', file);
2436 /* Dump the range entry R to STDERR. */
2438 DEBUG_FUNCTION void
2439 debug_range_entry (struct range_entry *r)
2441 dump_range_entry (stderr, r, false);
2442 fputc ('\n', stderr);
2445 /* This is similar to make_range in fold-const.c, but on top of
2446 GIMPLE instead of trees. If EXP is non-NULL, it should be
2447 an SSA_NAME and STMT argument is ignored, otherwise STMT
2448 argument should be a GIMPLE_COND. */
2450 static void
2451 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2453 int in_p;
2454 tree low, high;
2455 bool is_bool, strict_overflow_p;
2457 r->exp = NULL_TREE;
2458 r->in_p = false;
2459 r->strict_overflow_p = false;
2460 r->low = NULL_TREE;
2461 r->high = NULL_TREE;
2462 if (exp != NULL_TREE
2463 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2464 return;
2466 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2467 and see if we can refine the range. Some of the cases below may not
2468 happen, but it doesn't seem worth worrying about this. We "continue"
2469 the outer loop when we've changed something; otherwise we "break"
2470 the switch, which will "break" the while. */
2471 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2472 high = low;
2473 in_p = 0;
2474 strict_overflow_p = false;
2475 is_bool = false;
2476 if (exp == NULL_TREE)
2477 is_bool = true;
2478 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2480 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2481 is_bool = true;
2482 else
2483 return;
2485 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2486 is_bool = true;
2488 while (1)
2490 enum tree_code code;
2491 tree arg0, arg1, exp_type;
2492 tree nexp;
2493 location_t loc;
2495 if (exp != NULL_TREE)
2497 if (TREE_CODE (exp) != SSA_NAME
2498 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2499 break;
2501 stmt = SSA_NAME_DEF_STMT (exp);
2502 if (!is_gimple_assign (stmt))
2503 break;
2505 code = gimple_assign_rhs_code (stmt);
2506 arg0 = gimple_assign_rhs1 (stmt);
2507 arg1 = gimple_assign_rhs2 (stmt);
2508 exp_type = TREE_TYPE (exp);
2510 else
2512 code = gimple_cond_code (stmt);
2513 arg0 = gimple_cond_lhs (stmt);
2514 arg1 = gimple_cond_rhs (stmt);
2515 exp_type = boolean_type_node;
2518 if (TREE_CODE (arg0) != SSA_NAME
2519 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2520 break;
2521 loc = gimple_location (stmt);
2522 switch (code)
2524 case BIT_NOT_EXPR:
2525 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2526 /* Ensure the range is either +[-,0], +[0,0],
2527 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2528 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2529 or similar expression of unconditional true or
2530 false, it should not be negated. */
2531 && ((high && integer_zerop (high))
2532 || (low && integer_onep (low))))
2534 in_p = !in_p;
2535 exp = arg0;
2536 continue;
2538 break;
2539 case SSA_NAME:
2540 exp = arg0;
2541 continue;
2542 CASE_CONVERT:
2543 if (is_bool)
2545 if ((TYPE_PRECISION (exp_type) == 1
2546 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2547 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2548 return;
2550 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2552 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2553 is_bool = true;
2554 else
2555 return;
2557 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2558 is_bool = true;
2559 goto do_default;
2560 case EQ_EXPR:
2561 case NE_EXPR:
2562 case LT_EXPR:
2563 case LE_EXPR:
2564 case GE_EXPR:
2565 case GT_EXPR:
2566 is_bool = true;
2567 /* FALLTHRU */
2568 default:
2569 if (!is_bool)
2570 return;
2571 do_default:
2572 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2573 &low, &high, &in_p,
2574 &strict_overflow_p);
2575 if (nexp != NULL_TREE)
2577 exp = nexp;
2578 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2579 continue;
2581 break;
2583 break;
2585 if (is_bool)
2587 r->exp = exp;
2588 r->in_p = in_p;
2589 r->low = low;
2590 r->high = high;
2591 r->strict_overflow_p = strict_overflow_p;
2595 /* Comparison function for qsort. Sort entries
2596 without SSA_NAME exp first, then with SSA_NAMEs sorted
2597 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2598 by increasing ->low and if ->low is the same, by increasing
2599 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2600 maximum. */
2602 static int
2603 range_entry_cmp (const void *a, const void *b)
2605 const struct range_entry *p = (const struct range_entry *) a;
2606 const struct range_entry *q = (const struct range_entry *) b;
2608 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2610 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2612 /* Group range_entries for the same SSA_NAME together. */
2613 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2614 return -1;
2615 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2616 return 1;
2617 /* If ->low is different, NULL low goes first, then by
2618 ascending low. */
2619 if (p->low != NULL_TREE)
2621 if (q->low != NULL_TREE)
2623 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2624 p->low, q->low);
2625 if (tem && integer_onep (tem))
2626 return -1;
2627 tem = fold_binary (GT_EXPR, boolean_type_node,
2628 p->low, q->low);
2629 if (tem && integer_onep (tem))
2630 return 1;
2632 else
2633 return 1;
2635 else if (q->low != NULL_TREE)
2636 return -1;
2637 /* If ->high is different, NULL high goes last, before that by
2638 ascending high. */
2639 if (p->high != NULL_TREE)
2641 if (q->high != NULL_TREE)
2643 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2644 p->high, q->high);
2645 if (tem && integer_onep (tem))
2646 return -1;
2647 tem = fold_binary (GT_EXPR, boolean_type_node,
2648 p->high, q->high);
2649 if (tem && integer_onep (tem))
2650 return 1;
2652 else
2653 return -1;
2655 else if (q->high != NULL_TREE)
2656 return 1;
2657 /* If both ranges are the same, sort below by ascending idx. */
2659 else
2660 return 1;
2662 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2663 return -1;
2665 if (p->idx < q->idx)
2666 return -1;
2667 else
2669 gcc_checking_assert (p->idx > q->idx);
2670 return 1;
2674 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2675 insert needed statements BEFORE or after GSI. */
2677 static tree
2678 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2680 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2681 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2682 if (TREE_CODE (ret) != SSA_NAME)
2684 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2685 if (before)
2686 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2687 else
2688 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2689 ret = gimple_assign_lhs (g);
2691 return ret;
2694 /* Helper routine of optimize_range_test.
2695 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2696 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2697 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2698 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2699 an array of COUNT pointers to other ranges. Return
2700 true if the range merge has been successful.
2701 If OPCODE is ERROR_MARK, this is called from within
2702 maybe_optimize_range_tests and is performing inter-bb range optimization.
2703 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2704 oe->rank. */
2706 static bool
2707 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2708 struct range_entry **otherrangep,
2709 unsigned int count, enum tree_code opcode,
2710 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2711 bool in_p, tree low, tree high, bool strict_overflow_p)
2713 operand_entry *oe = (*ops)[range->idx];
2714 tree op = oe->op;
2715 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2716 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2717 location_t loc = gimple_location (stmt);
2718 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2719 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2720 in_p, low, high);
2721 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2722 gimple_stmt_iterator gsi;
2723 unsigned int i, uid;
2725 if (tem == NULL_TREE)
2726 return false;
2728 /* If op is default def SSA_NAME, there is no place to insert the
2729 new comparison. Give up, unless we can use OP itself as the
2730 range test. */
2731 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2733 if (op == range->exp
2734 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2735 || TREE_CODE (optype) == BOOLEAN_TYPE)
2736 && (op == tem
2737 || (TREE_CODE (tem) == EQ_EXPR
2738 && TREE_OPERAND (tem, 0) == op
2739 && integer_onep (TREE_OPERAND (tem, 1))))
2740 && opcode != BIT_IOR_EXPR
2741 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2743 stmt = NULL;
2744 tem = op;
2746 else
2747 return false;
2750 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2751 warning_at (loc, OPT_Wstrict_overflow,
2752 "assuming signed overflow does not occur "
2753 "when simplifying range test");
2755 if (dump_file && (dump_flags & TDF_DETAILS))
2757 struct range_entry *r;
2758 fprintf (dump_file, "Optimizing range tests ");
2759 dump_range_entry (dump_file, range, false);
2760 for (i = 0; i < count; i++)
2762 if (otherrange)
2763 r = otherrange + i;
2764 else
2765 r = otherrangep[i];
2766 if (r->exp
2767 && r->exp != range->exp
2768 && TREE_CODE (r->exp) == SSA_NAME)
2770 fprintf (dump_file, " and ");
2771 dump_range_entry (dump_file, r, false);
2773 else
2775 fprintf (dump_file, " and");
2776 dump_range_entry (dump_file, r, true);
2779 fprintf (dump_file, "\n into ");
2780 print_generic_expr (dump_file, tem);
2781 fprintf (dump_file, "\n");
2784 if (opcode == BIT_IOR_EXPR
2785 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2786 tem = invert_truthvalue_loc (loc, tem);
2788 tem = fold_convert_loc (loc, optype, tem);
2789 if (stmt)
2791 gsi = gsi_for_stmt (stmt);
2792 uid = gimple_uid (stmt);
2794 else
2796 gsi = gsi_none ();
2797 uid = 0;
2799 if (stmt == NULL)
2800 gcc_checking_assert (tem == op);
2801 /* In rare cases range->exp can be equal to lhs of stmt.
2802 In that case we have to insert after the stmt rather then before
2803 it. If stmt is a PHI, insert it at the start of the basic block. */
2804 else if (op != range->exp)
2806 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2807 tem = force_into_ssa_name (&gsi, tem, true);
2808 gsi_prev (&gsi);
2810 else if (gimple_code (stmt) != GIMPLE_PHI)
2812 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2813 tem = force_into_ssa_name (&gsi, tem, false);
2815 else
2817 gsi = gsi_after_labels (gimple_bb (stmt));
2818 if (!gsi_end_p (gsi))
2819 uid = gimple_uid (gsi_stmt (gsi));
2820 else
2822 gsi = gsi_start_bb (gimple_bb (stmt));
2823 uid = 1;
2824 while (!gsi_end_p (gsi))
2826 uid = gimple_uid (gsi_stmt (gsi));
2827 gsi_next (&gsi);
2830 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2831 tem = force_into_ssa_name (&gsi, tem, true);
2832 if (gsi_end_p (gsi))
2833 gsi = gsi_last_bb (gimple_bb (stmt));
2834 else
2835 gsi_prev (&gsi);
2837 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2838 if (gimple_uid (gsi_stmt (gsi)))
2839 break;
2840 else
2841 gimple_set_uid (gsi_stmt (gsi), uid);
2843 oe->op = tem;
2844 range->exp = exp;
2845 range->low = low;
2846 range->high = high;
2847 range->in_p = in_p;
2848 range->strict_overflow_p = false;
2850 for (i = 0; i < count; i++)
2852 if (otherrange)
2853 range = otherrange + i;
2854 else
2855 range = otherrangep[i];
2856 oe = (*ops)[range->idx];
2857 /* Now change all the other range test immediate uses, so that
2858 those tests will be optimized away. */
2859 if (opcode == ERROR_MARK)
2861 if (oe->op)
2862 oe->op = build_int_cst (TREE_TYPE (oe->op),
2863 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2864 else
2865 oe->op = (oe->rank == BIT_IOR_EXPR
2866 ? boolean_false_node : boolean_true_node);
2868 else
2869 oe->op = error_mark_node;
2870 range->exp = NULL_TREE;
2871 range->low = NULL_TREE;
2872 range->high = NULL_TREE;
2874 return true;
2877 /* Optimize X == CST1 || X == CST2
2878 if popcount (CST1 ^ CST2) == 1 into
2879 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2880 Similarly for ranges. E.g.
2881 X != 2 && X != 3 && X != 10 && X != 11
2882 will be transformed by the previous optimization into
2883 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2884 and this loop can transform that into
2885 !(((X & ~8) - 2U) <= 1U). */
2887 static bool
2888 optimize_range_tests_xor (enum tree_code opcode, tree type,
2889 tree lowi, tree lowj, tree highi, tree highj,
2890 vec<operand_entry *> *ops,
2891 struct range_entry *rangei,
2892 struct range_entry *rangej)
2894 tree lowxor, highxor, tem, exp;
2895 /* Check lowi ^ lowj == highi ^ highj and
2896 popcount (lowi ^ lowj) == 1. */
2897 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2898 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2899 return false;
2900 if (!integer_pow2p (lowxor))
2901 return false;
2902 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2903 if (!tree_int_cst_equal (lowxor, highxor))
2904 return false;
2906 exp = rangei->exp;
2907 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2908 int prec = GET_MODE_PRECISION (mode);
2909 if (TYPE_PRECISION (type) < prec
2910 || (wi::to_wide (TYPE_MIN_VALUE (type))
2911 != wi::min_value (prec, TYPE_SIGN (type)))
2912 || (wi::to_wide (TYPE_MAX_VALUE (type))
2913 != wi::max_value (prec, TYPE_SIGN (type))))
2915 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2916 exp = fold_convert (type, exp);
2917 lowxor = fold_convert (type, lowxor);
2918 lowi = fold_convert (type, lowi);
2919 highi = fold_convert (type, highi);
2921 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2922 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2923 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2924 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2925 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2926 NULL, rangei->in_p, lowj, highj,
2927 rangei->strict_overflow_p
2928 || rangej->strict_overflow_p))
2929 return true;
2930 return false;
2933 /* Optimize X == CST1 || X == CST2
2934 if popcount (CST2 - CST1) == 1 into
2935 ((X - CST1) & ~(CST2 - CST1)) == 0.
2936 Similarly for ranges. E.g.
2937 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2938 || X == 75 || X == 45
2939 will be transformed by the previous optimization into
2940 (X - 43U) <= 3U || (X - 75U) <= 3U
2941 and this loop can transform that into
2942 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2943 static bool
2944 optimize_range_tests_diff (enum tree_code opcode, tree type,
2945 tree lowi, tree lowj, tree highi, tree highj,
2946 vec<operand_entry *> *ops,
2947 struct range_entry *rangei,
2948 struct range_entry *rangej)
2950 tree tem1, tem2, mask;
2951 /* Check highi - lowi == highj - lowj. */
2952 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2953 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2954 return false;
2955 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2956 if (!tree_int_cst_equal (tem1, tem2))
2957 return false;
2958 /* Check popcount (lowj - lowi) == 1. */
2959 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2960 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2961 return false;
2962 if (!integer_pow2p (tem1))
2963 return false;
2965 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2966 int prec = GET_MODE_PRECISION (mode);
2967 if (TYPE_PRECISION (type) < prec
2968 || (wi::to_wide (TYPE_MIN_VALUE (type))
2969 != wi::min_value (prec, TYPE_SIGN (type)))
2970 || (wi::to_wide (TYPE_MAX_VALUE (type))
2971 != wi::max_value (prec, TYPE_SIGN (type))))
2972 type = build_nonstandard_integer_type (prec, 1);
2973 else
2974 type = unsigned_type_for (type);
2975 tem1 = fold_convert (type, tem1);
2976 tem2 = fold_convert (type, tem2);
2977 lowi = fold_convert (type, lowi);
2978 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2979 tem1 = fold_build2 (MINUS_EXPR, type,
2980 fold_convert (type, rangei->exp), lowi);
2981 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2982 lowj = build_int_cst (type, 0);
2983 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2984 NULL, rangei->in_p, lowj, tem2,
2985 rangei->strict_overflow_p
2986 || rangej->strict_overflow_p))
2987 return true;
2988 return false;
2991 /* It does some common checks for function optimize_range_tests_xor and
2992 optimize_range_tests_diff.
2993 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2994 Else it calls optimize_range_tests_diff. */
2996 static bool
2997 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2998 bool optimize_xor, vec<operand_entry *> *ops,
2999 struct range_entry *ranges)
3001 int i, j;
3002 bool any_changes = false;
3003 for (i = first; i < length; i++)
3005 tree lowi, highi, lowj, highj, type, tem;
3007 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3008 continue;
3009 type = TREE_TYPE (ranges[i].exp);
3010 if (!INTEGRAL_TYPE_P (type))
3011 continue;
3012 lowi = ranges[i].low;
3013 if (lowi == NULL_TREE)
3014 lowi = TYPE_MIN_VALUE (type);
3015 highi = ranges[i].high;
3016 if (highi == NULL_TREE)
3017 continue;
3018 for (j = i + 1; j < length && j < i + 64; j++)
3020 bool changes;
3021 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3022 continue;
3023 lowj = ranges[j].low;
3024 if (lowj == NULL_TREE)
3025 continue;
3026 highj = ranges[j].high;
3027 if (highj == NULL_TREE)
3028 highj = TYPE_MAX_VALUE (type);
3029 /* Check lowj > highi. */
3030 tem = fold_binary (GT_EXPR, boolean_type_node,
3031 lowj, highi);
3032 if (tem == NULL_TREE || !integer_onep (tem))
3033 continue;
3034 if (optimize_xor)
3035 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3036 highi, highj, ops,
3037 ranges + i, ranges + j);
3038 else
3039 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3040 highi, highj, ops,
3041 ranges + i, ranges + j);
3042 if (changes)
3044 any_changes = true;
3045 break;
3049 return any_changes;
3052 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3053 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3054 EXP on success, NULL otherwise. */
3056 static tree
3057 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3058 wide_int *mask, tree *totallowp)
3060 tree tem = int_const_binop (MINUS_EXPR, high, low);
3061 if (tem == NULL_TREE
3062 || TREE_CODE (tem) != INTEGER_CST
3063 || TREE_OVERFLOW (tem)
3064 || tree_int_cst_sgn (tem) == -1
3065 || compare_tree_int (tem, prec) != -1)
3066 return NULL_TREE;
3068 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3069 *mask = wi::shifted_mask (0, max, false, prec);
3070 if (TREE_CODE (exp) == BIT_AND_EXPR
3071 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3073 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3074 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3075 if (wi::popcount (msk) == 1
3076 && wi::ltu_p (msk, prec - max))
3078 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3079 max += msk.to_uhwi ();
3080 exp = TREE_OPERAND (exp, 0);
3081 if (integer_zerop (low)
3082 && TREE_CODE (exp) == PLUS_EXPR
3083 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3085 tree ret = TREE_OPERAND (exp, 0);
3086 STRIP_NOPS (ret);
3087 widest_int bias
3088 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3089 TYPE_PRECISION (TREE_TYPE (low))));
3090 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3091 if (totallowp)
3093 *totallowp = tbias;
3094 return ret;
3096 else if (!tree_int_cst_lt (totallow, tbias))
3097 return NULL_TREE;
3098 bias = wi::to_widest (tbias);
3099 bias -= wi::to_widest (totallow);
3100 if (bias >= 0 && bias < prec - max)
3102 *mask = wi::lshift (*mask, bias);
3103 return ret;
3108 if (totallowp)
3109 return exp;
3110 if (!tree_int_cst_lt (totallow, low))
3111 return exp;
3112 tem = int_const_binop (MINUS_EXPR, low, totallow);
3113 if (tem == NULL_TREE
3114 || TREE_CODE (tem) != INTEGER_CST
3115 || TREE_OVERFLOW (tem)
3116 || compare_tree_int (tem, prec - max) == 1)
3117 return NULL_TREE;
3119 *mask = wi::lshift (*mask, wi::to_widest (tem));
3120 return exp;
3123 /* Attempt to optimize small range tests using bit test.
3124 E.g.
3125 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3126 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3127 has been by earlier optimizations optimized into:
3128 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3129 As all the 43 through 82 range is less than 64 numbers,
3130 for 64-bit word targets optimize that into:
3131 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3133 static bool
3134 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3135 vec<operand_entry *> *ops,
3136 struct range_entry *ranges)
3138 int i, j;
3139 bool any_changes = false;
3140 int prec = GET_MODE_BITSIZE (word_mode);
3141 auto_vec<struct range_entry *, 64> candidates;
3143 for (i = first; i < length - 1; i++)
3145 tree lowi, highi, lowj, highj, type;
3147 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3148 continue;
3149 type = TREE_TYPE (ranges[i].exp);
3150 if (!INTEGRAL_TYPE_P (type))
3151 continue;
3152 lowi = ranges[i].low;
3153 if (lowi == NULL_TREE)
3154 lowi = TYPE_MIN_VALUE (type);
3155 highi = ranges[i].high;
3156 if (highi == NULL_TREE)
3157 continue;
3158 wide_int mask;
3159 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3160 highi, &mask, &lowi);
3161 if (exp == NULL_TREE)
3162 continue;
3163 bool strict_overflow_p = ranges[i].strict_overflow_p;
3164 candidates.truncate (0);
3165 int end = MIN (i + 64, length);
3166 for (j = i + 1; j < end; j++)
3168 tree exp2;
3169 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3170 continue;
3171 if (ranges[j].exp == exp)
3173 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3175 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3176 if (exp2 == exp)
3178 else if (TREE_CODE (exp2) == PLUS_EXPR)
3180 exp2 = TREE_OPERAND (exp2, 0);
3181 STRIP_NOPS (exp2);
3182 if (exp2 != exp)
3183 continue;
3185 else
3186 continue;
3188 else
3189 continue;
3190 lowj = ranges[j].low;
3191 if (lowj == NULL_TREE)
3192 continue;
3193 highj = ranges[j].high;
3194 if (highj == NULL_TREE)
3195 highj = TYPE_MAX_VALUE (type);
3196 wide_int mask2;
3197 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3198 highj, &mask2, NULL);
3199 if (exp2 != exp)
3200 continue;
3201 mask |= mask2;
3202 strict_overflow_p |= ranges[j].strict_overflow_p;
3203 candidates.safe_push (&ranges[j]);
3206 /* If every possible relative value of the expression is a valid shift
3207 amount, then we can merge the entry test in the bit test. In this
3208 case, if we would need otherwise 2 or more comparisons, then use
3209 the bit test; in the other cases, the threshold is 3 comparisons. */
3210 wide_int min, max;
3211 bool entry_test_needed;
3212 if (TREE_CODE (exp) == SSA_NAME
3213 && get_range_info (exp, &min, &max) == VR_RANGE
3214 && wi::leu_p (max - min, prec - 1))
3216 wide_int ilowi = wi::to_wide (lowi);
3217 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3219 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3220 mask = wi::lshift (mask, ilowi - min);
3222 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3224 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3225 mask = wi::lrshift (mask, min - ilowi);
3227 entry_test_needed = false;
3229 else
3230 entry_test_needed = true;
3231 if (candidates.length () >= (entry_test_needed ? 2 : 1))
3233 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3234 wi::to_widest (lowi)
3235 + prec - 1 - wi::clz (mask));
3236 operand_entry *oe = (*ops)[ranges[i].idx];
3237 tree op = oe->op;
3238 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3239 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3240 location_t loc = gimple_location (stmt);
3241 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3243 /* See if it isn't cheaper to pretend the minimum value of the
3244 range is 0, if maximum value is small enough.
3245 We can avoid then subtraction of the minimum value, but the
3246 mask constant could be perhaps more expensive. */
3247 if (compare_tree_int (lowi, 0) > 0
3248 && compare_tree_int (high, prec) < 0)
3250 int cost_diff;
3251 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3252 rtx reg = gen_raw_REG (word_mode, 10000);
3253 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3254 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3255 GEN_INT (-m)),
3256 word_mode, speed_p);
3257 rtx r = immed_wide_int_const (mask, word_mode);
3258 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3259 word_mode, speed_p);
3260 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3261 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3262 word_mode, speed_p);
3263 if (cost_diff > 0)
3265 mask = wi::lshift (mask, m);
3266 lowi = build_zero_cst (TREE_TYPE (lowi));
3270 tree tem;
3271 if (entry_test_needed)
3273 tem = build_range_check (loc, optype, unshare_expr (exp),
3274 false, lowi, high);
3275 if (tem == NULL_TREE || is_gimple_val (tem))
3276 continue;
3278 else
3279 tem = NULL_TREE;
3280 tree etype = unsigned_type_for (TREE_TYPE (exp));
3281 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3282 fold_convert_loc (loc, etype, exp),
3283 fold_convert_loc (loc, etype, lowi));
3284 exp = fold_convert_loc (loc, integer_type_node, exp);
3285 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3286 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3287 build_int_cst (word_type, 1), exp);
3288 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3289 wide_int_to_tree (word_type, mask));
3290 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3291 build_zero_cst (word_type));
3292 if (is_gimple_val (exp))
3293 continue;
3295 /* The shift might have undefined behavior if TEM is true,
3296 but reassociate_bb isn't prepared to have basic blocks
3297 split when it is running. So, temporarily emit a code
3298 with BIT_IOR_EXPR instead of &&, and fix it up in
3299 branch_fixup. */
3300 gimple_seq seq = NULL;
3301 if (tem)
3303 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3304 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3305 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3307 gimple_seq seq2;
3308 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3309 gimple_seq_add_seq_without_update (&seq, seq2);
3310 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3311 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3312 if (tem)
3314 gimple *g = gimple_build_assign (make_ssa_name (optype),
3315 BIT_IOR_EXPR, tem, exp);
3316 gimple_set_location (g, loc);
3317 gimple_seq_add_stmt_without_update (&seq, g);
3318 exp = gimple_assign_lhs (g);
3320 tree val = build_zero_cst (optype);
3321 if (update_range_test (&ranges[i], NULL, candidates.address (),
3322 candidates.length (), opcode, ops, exp,
3323 seq, false, val, val, strict_overflow_p))
3325 any_changes = true;
3326 if (tem)
3327 reassoc_branch_fixups.safe_push (tem);
3329 else
3330 gimple_seq_discard (seq);
3333 return any_changes;
3336 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3337 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
3339 static bool
3340 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3341 vec<operand_entry *> *ops,
3342 struct range_entry *ranges)
3344 int i;
3345 unsigned int b;
3346 bool any_changes = false;
3347 auto_vec<int, 128> buckets;
3348 auto_vec<int, 32> chains;
3349 auto_vec<struct range_entry *, 32> candidates;
3351 for (i = first; i < length; i++)
3353 if (ranges[i].exp == NULL_TREE
3354 || TREE_CODE (ranges[i].exp) != SSA_NAME
3355 || !ranges[i].in_p
3356 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3357 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
3358 || ranges[i].low == NULL_TREE
3359 || ranges[i].low != ranges[i].high)
3360 continue;
3362 bool zero_p = integer_zerop (ranges[i].low);
3363 if (!zero_p && !integer_all_onesp (ranges[i].low))
3364 continue;
3366 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
3367 if (buckets.length () <= b)
3368 buckets.safe_grow_cleared (b + 1, true);
3369 if (chains.length () <= (unsigned) i)
3370 chains.safe_grow (i + 1, true);
3371 chains[i] = buckets[b];
3372 buckets[b] = i + 1;
3375 FOR_EACH_VEC_ELT (buckets, b, i)
3376 if (i && chains[i - 1])
3378 int j, k = i;
3379 for (j = chains[i - 1]; j; j = chains[j - 1])
3381 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3382 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3383 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3384 k = j;
3386 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3387 tree type2 = NULL_TREE;
3388 bool strict_overflow_p = false;
3389 candidates.truncate (0);
3390 for (j = i; j; j = chains[j - 1])
3392 tree type = TREE_TYPE (ranges[j - 1].exp);
3393 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3394 if (j == k
3395 || useless_type_conversion_p (type1, type))
3397 else if (type2 == NULL_TREE
3398 || useless_type_conversion_p (type2, type))
3400 if (type2 == NULL_TREE)
3401 type2 = type;
3402 candidates.safe_push (&ranges[j - 1]);
3405 unsigned l = candidates.length ();
3406 for (j = i; j; j = chains[j - 1])
3408 tree type = TREE_TYPE (ranges[j - 1].exp);
3409 if (j == k)
3410 continue;
3411 if (useless_type_conversion_p (type1, type))
3413 else if (type2 == NULL_TREE
3414 || useless_type_conversion_p (type2, type))
3415 continue;
3416 candidates.safe_push (&ranges[j - 1]);
3418 gimple_seq seq = NULL;
3419 tree op = NULL_TREE;
3420 unsigned int id;
3421 struct range_entry *r;
3422 candidates.safe_push (&ranges[k - 1]);
3423 FOR_EACH_VEC_ELT (candidates, id, r)
3425 gimple *g;
3426 if (id == 0)
3428 op = r->exp;
3429 continue;
3431 if (id == l)
3433 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3434 gimple_seq_add_stmt_without_update (&seq, g);
3435 op = gimple_assign_lhs (g);
3437 tree type = TREE_TYPE (r->exp);
3438 tree exp = r->exp;
3439 if (id >= l && !useless_type_conversion_p (type1, type))
3441 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3442 gimple_seq_add_stmt_without_update (&seq, g);
3443 exp = gimple_assign_lhs (g);
3445 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3446 (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3447 op, exp);
3448 gimple_seq_add_stmt_without_update (&seq, g);
3449 op = gimple_assign_lhs (g);
3451 candidates.pop ();
3452 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3453 candidates.length (), opcode, ops, op,
3454 seq, true, ranges[k - 1].low,
3455 ranges[k - 1].low, strict_overflow_p))
3456 any_changes = true;
3457 else
3458 gimple_seq_discard (seq);
3461 return any_changes;
3464 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3465 a >= 0 && a < b into (unsigned) a < (unsigned) b
3466 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3468 static bool
3469 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3470 vec<operand_entry *> *ops,
3471 struct range_entry *ranges,
3472 basic_block first_bb)
3474 int i;
3475 bool any_changes = false;
3476 hash_map<tree, int> *map = NULL;
3478 for (i = first; i < length; i++)
3480 if (ranges[i].exp == NULL_TREE
3481 || TREE_CODE (ranges[i].exp) != SSA_NAME
3482 || !ranges[i].in_p)
3483 continue;
3485 tree type = TREE_TYPE (ranges[i].exp);
3486 if (!INTEGRAL_TYPE_P (type)
3487 || TYPE_UNSIGNED (type)
3488 || ranges[i].low == NULL_TREE
3489 || !integer_zerop (ranges[i].low)
3490 || ranges[i].high != NULL_TREE)
3491 continue;
3492 /* EXP >= 0 here. */
3493 if (map == NULL)
3494 map = new hash_map <tree, int>;
3495 map->put (ranges[i].exp, i);
3498 if (map == NULL)
3499 return false;
3501 for (i = 0; i < length; i++)
3503 bool in_p = ranges[i].in_p;
3504 if (ranges[i].low == NULL_TREE
3505 || ranges[i].high == NULL_TREE)
3506 continue;
3507 if (!integer_zerop (ranges[i].low)
3508 || !integer_zerop (ranges[i].high))
3510 if (ranges[i].exp
3511 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3512 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3513 && integer_onep (ranges[i].low)
3514 && integer_onep (ranges[i].high))
3515 in_p = !in_p;
3516 else
3517 continue;
3520 gimple *stmt;
3521 tree_code ccode;
3522 tree rhs1, rhs2;
3523 if (ranges[i].exp)
3525 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3526 continue;
3527 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3528 if (!is_gimple_assign (stmt))
3529 continue;
3530 ccode = gimple_assign_rhs_code (stmt);
3531 rhs1 = gimple_assign_rhs1 (stmt);
3532 rhs2 = gimple_assign_rhs2 (stmt);
3534 else
3536 operand_entry *oe = (*ops)[ranges[i].idx];
3537 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3538 if (gimple_code (stmt) != GIMPLE_COND)
3539 continue;
3540 ccode = gimple_cond_code (stmt);
3541 rhs1 = gimple_cond_lhs (stmt);
3542 rhs2 = gimple_cond_rhs (stmt);
3545 if (TREE_CODE (rhs1) != SSA_NAME
3546 || rhs2 == NULL_TREE
3547 || TREE_CODE (rhs2) != SSA_NAME)
3548 continue;
3550 switch (ccode)
3552 case GT_EXPR:
3553 case GE_EXPR:
3554 case LT_EXPR:
3555 case LE_EXPR:
3556 break;
3557 default:
3558 continue;
3560 if (in_p)
3561 ccode = invert_tree_comparison (ccode, false);
3562 switch (ccode)
3564 case GT_EXPR:
3565 case GE_EXPR:
3566 std::swap (rhs1, rhs2);
3567 ccode = swap_tree_comparison (ccode);
3568 break;
3569 case LT_EXPR:
3570 case LE_EXPR:
3571 break;
3572 default:
3573 gcc_unreachable ();
3576 int *idx = map->get (rhs1);
3577 if (idx == NULL)
3578 continue;
3580 /* maybe_optimize_range_tests allows statements without side-effects
3581 in the basic blocks as long as they are consumed in the same bb.
3582 Make sure rhs2's def stmt is not among them, otherwise we can't
3583 use safely get_nonzero_bits on it. E.g. in:
3584 # RANGE [-83, 1] NONZERO 173
3585 # k_32 = PHI <k_47(13), k_12(9)>
3587 if (k_32 >= 0)
3588 goto <bb 5>; [26.46%]
3589 else
3590 goto <bb 9>; [73.54%]
3592 <bb 5> [local count: 140323371]:
3593 # RANGE [0, 1] NONZERO 1
3594 _5 = (int) k_32;
3595 # RANGE [0, 4] NONZERO 4
3596 _21 = _5 << 2;
3597 # RANGE [0, 4] NONZERO 4
3598 iftmp.0_44 = (char) _21;
3599 if (k_32 < iftmp.0_44)
3600 goto <bb 6>; [84.48%]
3601 else
3602 goto <bb 9>; [15.52%]
3603 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3604 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3605 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3606 those stmts even for negative k_32 and the value ranges would be no
3607 longer guaranteed and so the optimization would be invalid. */
3608 while (opcode == ERROR_MARK)
3610 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3611 basic_block bb2 = gimple_bb (g);
3612 if (bb2
3613 && bb2 != first_bb
3614 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3616 /* As an exception, handle a few common cases. */
3617 if (gimple_assign_cast_p (g)
3618 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3620 tree op0 = gimple_assign_rhs1 (g);
3621 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3622 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3623 > TYPE_PRECISION (TREE_TYPE (op0))))
3624 /* Zero-extension is always ok. */
3625 break;
3626 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3627 == TYPE_PRECISION (TREE_TYPE (op0))
3628 && TREE_CODE (op0) == SSA_NAME)
3630 /* Cast from signed to unsigned or vice versa. Retry
3631 with the op0 as new rhs2. */
3632 rhs2 = op0;
3633 continue;
3636 else if (is_gimple_assign (g)
3637 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3638 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3639 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3640 /* Masking with INTEGER_CST with MSB clear is always ok
3641 too. */
3642 break;
3643 rhs2 = NULL_TREE;
3645 break;
3647 if (rhs2 == NULL_TREE)
3648 continue;
3650 wide_int nz = get_nonzero_bits (rhs2);
3651 if (wi::neg_p (nz))
3652 continue;
3654 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3655 and RHS2 is known to be RHS2 >= 0. */
3656 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3658 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3659 if ((ranges[*idx].strict_overflow_p
3660 || ranges[i].strict_overflow_p)
3661 && issue_strict_overflow_warning (wc))
3662 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3663 "assuming signed overflow does not occur "
3664 "when simplifying range test");
3666 if (dump_file && (dump_flags & TDF_DETAILS))
3668 struct range_entry *r = &ranges[*idx];
3669 fprintf (dump_file, "Optimizing range test ");
3670 print_generic_expr (dump_file, r->exp);
3671 fprintf (dump_file, " +[");
3672 print_generic_expr (dump_file, r->low);
3673 fprintf (dump_file, ", ");
3674 print_generic_expr (dump_file, r->high);
3675 fprintf (dump_file, "] and comparison ");
3676 print_generic_expr (dump_file, rhs1);
3677 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3678 print_generic_expr (dump_file, rhs2);
3679 fprintf (dump_file, "\n into (");
3680 print_generic_expr (dump_file, utype);
3681 fprintf (dump_file, ") ");
3682 print_generic_expr (dump_file, rhs1);
3683 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3684 print_generic_expr (dump_file, utype);
3685 fprintf (dump_file, ") ");
3686 print_generic_expr (dump_file, rhs2);
3687 fprintf (dump_file, "\n");
3690 operand_entry *oe = (*ops)[ranges[i].idx];
3691 ranges[i].in_p = 0;
3692 if (opcode == BIT_IOR_EXPR
3693 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3695 ranges[i].in_p = 1;
3696 ccode = invert_tree_comparison (ccode, false);
3699 unsigned int uid = gimple_uid (stmt);
3700 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3701 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3702 gimple_set_uid (g, uid);
3703 rhs1 = gimple_assign_lhs (g);
3704 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3705 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3707 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3708 gimple_set_uid (g, uid);
3709 rhs2 = gimple_assign_lhs (g);
3710 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3712 if (tree_swap_operands_p (rhs1, rhs2))
3714 std::swap (rhs1, rhs2);
3715 ccode = swap_tree_comparison (ccode);
3717 if (gimple_code (stmt) == GIMPLE_COND)
3719 gcond *c = as_a <gcond *> (stmt);
3720 gimple_cond_set_code (c, ccode);
3721 gimple_cond_set_lhs (c, rhs1);
3722 gimple_cond_set_rhs (c, rhs2);
3723 update_stmt (stmt);
3725 else
3727 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3728 if (!INTEGRAL_TYPE_P (ctype)
3729 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3730 && TYPE_PRECISION (ctype) != 1))
3731 ctype = boolean_type_node;
3732 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3733 gimple_set_uid (g, uid);
3734 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3735 if (oe->op && ctype != TREE_TYPE (oe->op))
3737 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3738 NOP_EXPR, gimple_assign_lhs (g));
3739 gimple_set_uid (g, uid);
3740 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3742 ranges[i].exp = gimple_assign_lhs (g);
3743 oe->op = ranges[i].exp;
3744 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3745 ranges[i].high = ranges[i].low;
3747 ranges[i].strict_overflow_p = false;
3748 oe = (*ops)[ranges[*idx].idx];
3749 /* Now change all the other range test immediate uses, so that
3750 those tests will be optimized away. */
3751 if (opcode == ERROR_MARK)
3753 if (oe->op)
3754 oe->op = build_int_cst (TREE_TYPE (oe->op),
3755 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3756 else
3757 oe->op = (oe->rank == BIT_IOR_EXPR
3758 ? boolean_false_node : boolean_true_node);
3760 else
3761 oe->op = error_mark_node;
3762 ranges[*idx].exp = NULL_TREE;
3763 ranges[*idx].low = NULL_TREE;
3764 ranges[*idx].high = NULL_TREE;
3765 any_changes = true;
3768 delete map;
3769 return any_changes;
3772 /* Optimize range tests, similarly how fold_range_test optimizes
3773 it on trees. The tree code for the binary
3774 operation between all the operands is OPCODE.
3775 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3776 maybe_optimize_range_tests for inter-bb range optimization.
3777 In that case if oe->op is NULL, oe->id is bb->index whose
3778 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3779 the actual opcode.
3780 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3782 static bool
3783 optimize_range_tests (enum tree_code opcode,
3784 vec<operand_entry *> *ops, basic_block first_bb)
3786 unsigned int length = ops->length (), i, j, first;
3787 operand_entry *oe;
3788 struct range_entry *ranges;
3789 bool any_changes = false;
3791 if (length == 1)
3792 return false;
3794 ranges = XNEWVEC (struct range_entry, length);
3795 for (i = 0; i < length; i++)
3797 oe = (*ops)[i];
3798 ranges[i].idx = i;
3799 init_range_entry (ranges + i, oe->op,
3800 oe->op
3801 ? NULL
3802 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3803 /* For | invert it now, we will invert it again before emitting
3804 the optimized expression. */
3805 if (opcode == BIT_IOR_EXPR
3806 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3807 ranges[i].in_p = !ranges[i].in_p;
3810 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3811 for (i = 0; i < length; i++)
3812 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3813 break;
3815 /* Try to merge ranges. */
3816 for (first = i; i < length; i++)
3818 tree low = ranges[i].low;
3819 tree high = ranges[i].high;
3820 int in_p = ranges[i].in_p;
3821 bool strict_overflow_p = ranges[i].strict_overflow_p;
3822 int update_fail_count = 0;
3824 for (j = i + 1; j < length; j++)
3826 if (ranges[i].exp != ranges[j].exp)
3827 break;
3828 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3829 ranges[j].in_p, ranges[j].low, ranges[j].high))
3830 break;
3831 strict_overflow_p |= ranges[j].strict_overflow_p;
3834 if (j == i + 1)
3835 continue;
3837 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3838 opcode, ops, ranges[i].exp, NULL, in_p,
3839 low, high, strict_overflow_p))
3841 i = j - 1;
3842 any_changes = true;
3844 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3845 while update_range_test would fail. */
3846 else if (update_fail_count == 64)
3847 i = j - 1;
3848 else
3849 ++update_fail_count;
3852 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3853 ops, ranges);
3855 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3856 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3857 ops, ranges);
3858 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3859 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3860 ops, ranges);
3861 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3862 ops, ranges);
3863 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3864 ranges, first_bb);
3866 if (any_changes && opcode != ERROR_MARK)
3868 j = 0;
3869 FOR_EACH_VEC_ELT (*ops, i, oe)
3871 if (oe->op == error_mark_node)
3872 continue;
3873 else if (i != j)
3874 (*ops)[j] = oe;
3875 j++;
3877 ops->truncate (j);
3880 XDELETEVEC (ranges);
3881 return any_changes;
3884 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3885 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3886 otherwise the comparison code. TYPE is a return value that is set
3887 to type of comparison. */
3889 static tree_code
3890 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
3891 tree *lhs, tree *rhs, gassign **vcond)
3893 if (TREE_CODE (var) != SSA_NAME)
3894 return ERROR_MARK;
3896 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3897 if (stmt == NULL)
3898 return ERROR_MARK;
3899 if (vcond)
3900 *vcond = stmt;
3902 /* ??? If we start creating more COND_EXPR, we could perform
3903 this same optimization with them. For now, simplify. */
3904 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3905 return ERROR_MARK;
3907 tree cond = gimple_assign_rhs1 (stmt);
3908 tree_code cmp = TREE_CODE (cond);
3909 if (cmp != SSA_NAME)
3910 return ERROR_MARK;
3912 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
3913 if (assign == NULL
3914 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
3915 return ERROR_MARK;
3917 cmp = gimple_assign_rhs_code (assign);
3918 if (lhs)
3919 *lhs = gimple_assign_rhs1 (assign);
3920 if (rhs)
3921 *rhs = gimple_assign_rhs2 (assign);
3923 /* ??? For now, allow only canonical true and false result vectors.
3924 We could expand this to other constants should the need arise,
3925 but at the moment we don't create them. */
3926 tree t = gimple_assign_rhs2 (stmt);
3927 tree f = gimple_assign_rhs3 (stmt);
3928 bool inv;
3929 if (integer_all_onesp (t))
3930 inv = false;
3931 else if (integer_all_onesp (f))
3933 cmp = invert_tree_comparison (cmp, false);
3934 inv = true;
3936 else
3937 return ERROR_MARK;
3938 if (!integer_zerop (f))
3939 return ERROR_MARK;
3941 /* Success! */
3942 if (rets)
3943 *rets = assign;
3944 if (reti)
3945 *reti = inv;
3946 if (type)
3947 *type = TREE_TYPE (cond);
3948 return cmp;
3951 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3952 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3954 static bool
3955 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3957 unsigned int length = ops->length (), i, j;
3958 bool any_changes = false;
3960 if (length == 1)
3961 return false;
3963 for (i = 0; i < length; ++i)
3965 tree elt0 = (*ops)[i]->op;
3967 gassign *stmt0, *vcond0;
3968 bool invert;
3969 tree type, lhs0, rhs0;
3970 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
3971 &rhs0, &vcond0);
3972 if (cmp0 == ERROR_MARK)
3973 continue;
3975 for (j = i + 1; j < length; ++j)
3977 tree &elt1 = (*ops)[j]->op;
3979 gassign *stmt1, *vcond1;
3980 tree lhs1, rhs1;
3981 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
3982 &rhs1, &vcond1);
3983 if (cmp1 == ERROR_MARK)
3984 continue;
3986 tree comb;
3987 if (opcode == BIT_AND_EXPR)
3988 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
3989 cmp1, lhs1, rhs1);
3990 else if (opcode == BIT_IOR_EXPR)
3991 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
3992 cmp1, lhs1, rhs1);
3993 else
3994 gcc_unreachable ();
3995 if (comb == NULL)
3996 continue;
3998 /* Success! */
3999 if (dump_file && (dump_flags & TDF_DETAILS))
4001 fprintf (dump_file, "Transforming ");
4002 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4003 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4004 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4005 fprintf (dump_file, " into ");
4006 print_generic_expr (dump_file, comb);
4007 fputc ('\n', dump_file);
4010 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4011 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4012 true, GSI_SAME_STMT);
4013 if (invert)
4014 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4015 gimple_assign_rhs3_ptr (vcond0));
4016 gimple_assign_set_rhs1 (vcond0, exp);
4017 update_stmt (vcond0);
4019 elt1 = error_mark_node;
4020 any_changes = true;
4024 if (any_changes)
4026 operand_entry *oe;
4027 j = 0;
4028 FOR_EACH_VEC_ELT (*ops, i, oe)
4030 if (oe->op == error_mark_node)
4031 continue;
4032 else if (i != j)
4033 (*ops)[j] = oe;
4034 j++;
4036 ops->truncate (j);
4039 return any_changes;
4042 /* Return true if STMT is a cast like:
4043 <bb N>:
4045 _123 = (int) _234;
4047 <bb M>:
4048 # _345 = PHI <_123(N), 1(...), 1(...)>
4049 where _234 has bool type, _123 has single use and
4050 bb N has a single successor M. This is commonly used in
4051 the last block of a range test.
4053 Also Return true if STMT is tcc_compare like:
4054 <bb N>:
4056 _234 = a_2(D) == 2;
4058 <bb M>:
4059 # _345 = PHI <_234(N), 1(...), 1(...)>
4060 _346 = (int) _345;
4061 where _234 has booltype, single use and
4062 bb N has a single successor M. This is commonly used in
4063 the last block of a range test. */
4065 static bool
4066 final_range_test_p (gimple *stmt)
4068 basic_block bb, rhs_bb, lhs_bb;
4069 edge e;
4070 tree lhs, rhs;
4071 use_operand_p use_p;
4072 gimple *use_stmt;
4074 if (!gimple_assign_cast_p (stmt)
4075 && (!is_gimple_assign (stmt)
4076 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4077 != tcc_comparison)))
4078 return false;
4079 bb = gimple_bb (stmt);
4080 if (!single_succ_p (bb))
4081 return false;
4082 e = single_succ_edge (bb);
4083 if (e->flags & EDGE_COMPLEX)
4084 return false;
4086 lhs = gimple_assign_lhs (stmt);
4087 rhs = gimple_assign_rhs1 (stmt);
4088 if (gimple_assign_cast_p (stmt)
4089 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4090 || TREE_CODE (rhs) != SSA_NAME
4091 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4092 return false;
4094 if (!gimple_assign_cast_p (stmt)
4095 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4096 return false;
4098 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4099 if (!single_imm_use (lhs, &use_p, &use_stmt))
4100 return false;
4102 if (gimple_code (use_stmt) != GIMPLE_PHI
4103 || gimple_bb (use_stmt) != e->dest)
4104 return false;
4106 /* And that the rhs is defined in the same loop. */
4107 if (gimple_assign_cast_p (stmt))
4109 if (TREE_CODE (rhs) != SSA_NAME
4110 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4111 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4112 return false;
4114 else
4116 if (TREE_CODE (lhs) != SSA_NAME
4117 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4118 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4119 return false;
4122 return true;
4125 /* Return true if BB is suitable basic block for inter-bb range test
4126 optimization. If BACKWARD is true, BB should be the only predecessor
4127 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4128 or compared with to find a common basic block to which all conditions
4129 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4130 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4131 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4132 block points to an empty block that falls through into *OTHER_BB and
4133 the phi args match that path. */
4135 static bool
4136 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4137 bool *test_swapped_p, bool backward)
4139 edge_iterator ei, ei2;
4140 edge e, e2;
4141 gimple *stmt;
4142 gphi_iterator gsi;
4143 bool other_edge_seen = false;
4144 bool is_cond;
4146 if (test_bb == bb)
4147 return false;
4148 /* Check last stmt first. */
4149 stmt = last_stmt (bb);
4150 if (stmt == NULL
4151 || (gimple_code (stmt) != GIMPLE_COND
4152 && (backward || !final_range_test_p (stmt)))
4153 || gimple_visited_p (stmt)
4154 || stmt_could_throw_p (cfun, stmt)
4155 || *other_bb == bb)
4156 return false;
4157 is_cond = gimple_code (stmt) == GIMPLE_COND;
4158 if (is_cond)
4160 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4161 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4162 to *OTHER_BB (if not set yet, try to find it out). */
4163 if (EDGE_COUNT (bb->succs) != 2)
4164 return false;
4165 FOR_EACH_EDGE (e, ei, bb->succs)
4167 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4168 return false;
4169 if (e->dest == test_bb)
4171 if (backward)
4172 continue;
4173 else
4174 return false;
4176 if (e->dest == bb)
4177 return false;
4178 if (*other_bb == NULL)
4180 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4181 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4182 return false;
4183 else if (e->dest == e2->dest)
4184 *other_bb = e->dest;
4185 if (*other_bb == NULL)
4186 return false;
4188 if (e->dest == *other_bb)
4189 other_edge_seen = true;
4190 else if (backward)
4191 return false;
4193 if (*other_bb == NULL || !other_edge_seen)
4194 return false;
4196 else if (single_succ (bb) != *other_bb)
4197 return false;
4199 /* Now check all PHIs of *OTHER_BB. */
4200 e = find_edge (bb, *other_bb);
4201 e2 = find_edge (test_bb, *other_bb);
4202 retry:;
4203 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4205 gphi *phi = gsi.phi ();
4206 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4207 corresponding to BB and TEST_BB predecessor must be the same. */
4208 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4209 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4211 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4212 one of the PHIs should have the lhs of the last stmt in
4213 that block as PHI arg and that PHI should have 0 or 1
4214 corresponding to it in all other range test basic blocks
4215 considered. */
4216 if (!is_cond)
4218 if (gimple_phi_arg_def (phi, e->dest_idx)
4219 == gimple_assign_lhs (stmt)
4220 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4221 || integer_onep (gimple_phi_arg_def (phi,
4222 e2->dest_idx))))
4223 continue;
4225 else
4227 gimple *test_last = last_stmt (test_bb);
4228 if (gimple_code (test_last) == GIMPLE_COND)
4230 if (backward ? e2->src != test_bb : e->src != bb)
4231 return false;
4233 /* For last_bb, handle also:
4234 if (x_3(D) == 3)
4235 goto <bb 6>; [34.00%]
4236 else
4237 goto <bb 7>; [66.00%]
4239 <bb 6> [local count: 79512730]:
4241 <bb 7> [local count: 1073741824]:
4242 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4243 where bb 7 is *OTHER_BB, but the PHI values from the
4244 earlier bbs match the path through the empty bb
4245 in between. */
4246 edge e3;
4247 if (backward)
4248 e3 = EDGE_SUCC (test_bb,
4249 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4250 else
4251 e3 = EDGE_SUCC (bb,
4252 e == EDGE_SUCC (bb, 0) ? 1 : 0);
4253 if (empty_block_p (e3->dest)
4254 && single_succ_p (e3->dest)
4255 && single_succ (e3->dest) == *other_bb
4256 && single_pred_p (e3->dest)
4257 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4259 if (backward)
4260 e2 = single_succ_edge (e3->dest);
4261 else
4262 e = single_succ_edge (e3->dest);
4263 if (test_swapped_p)
4264 *test_swapped_p = true;
4265 goto retry;
4268 else if (gimple_phi_arg_def (phi, e2->dest_idx)
4269 == gimple_assign_lhs (test_last)
4270 && (integer_zerop (gimple_phi_arg_def (phi,
4271 e->dest_idx))
4272 || integer_onep (gimple_phi_arg_def (phi,
4273 e->dest_idx))))
4274 continue;
4277 return false;
4280 return true;
4283 /* Return true if BB doesn't have side-effects that would disallow
4284 range test optimization, all SSA_NAMEs set in the bb are consumed
4285 in the bb and there are no PHIs. */
4287 static bool
4288 no_side_effect_bb (basic_block bb)
4290 gimple_stmt_iterator gsi;
4291 gimple *last;
4293 if (!gimple_seq_empty_p (phi_nodes (bb)))
4294 return false;
4295 last = last_stmt (bb);
4296 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4298 gimple *stmt = gsi_stmt (gsi);
4299 tree lhs;
4300 imm_use_iterator imm_iter;
4301 use_operand_p use_p;
4303 if (is_gimple_debug (stmt))
4304 continue;
4305 if (gimple_has_side_effects (stmt))
4306 return false;
4307 if (stmt == last)
4308 return true;
4309 if (!is_gimple_assign (stmt))
4310 return false;
4311 lhs = gimple_assign_lhs (stmt);
4312 if (TREE_CODE (lhs) != SSA_NAME)
4313 return false;
4314 if (gimple_assign_rhs_could_trap_p (stmt))
4315 return false;
4316 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4318 gimple *use_stmt = USE_STMT (use_p);
4319 if (is_gimple_debug (use_stmt))
4320 continue;
4321 if (gimple_bb (use_stmt) != bb)
4322 return false;
4325 return false;
4328 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4329 return true and fill in *OPS recursively. */
4331 static bool
4332 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4333 class loop *loop)
4335 gimple *stmt = SSA_NAME_DEF_STMT (var);
4336 tree rhs[2];
4337 int i;
4339 if (!is_reassociable_op (stmt, code, loop))
4340 return false;
4342 rhs[0] = gimple_assign_rhs1 (stmt);
4343 rhs[1] = gimple_assign_rhs2 (stmt);
4344 gimple_set_visited (stmt, true);
4345 for (i = 0; i < 2; i++)
4346 if (TREE_CODE (rhs[i]) == SSA_NAME
4347 && !get_ops (rhs[i], code, ops, loop)
4348 && has_single_use (rhs[i]))
4350 operand_entry *oe = operand_entry_pool.allocate ();
4352 oe->op = rhs[i];
4353 oe->rank = code;
4354 oe->id = 0;
4355 oe->count = 1;
4356 oe->stmt_to_insert = NULL;
4357 ops->safe_push (oe);
4359 return true;
4362 /* Find the ops that were added by get_ops starting from VAR, see if
4363 they were changed during update_range_test and if yes, create new
4364 stmts. */
4366 static tree
4367 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
4368 unsigned int *pidx, class loop *loop)
4370 gimple *stmt = SSA_NAME_DEF_STMT (var);
4371 tree rhs[4];
4372 int i;
4374 if (!is_reassociable_op (stmt, code, loop))
4375 return NULL;
4377 rhs[0] = gimple_assign_rhs1 (stmt);
4378 rhs[1] = gimple_assign_rhs2 (stmt);
4379 rhs[2] = rhs[0];
4380 rhs[3] = rhs[1];
4381 for (i = 0; i < 2; i++)
4382 if (TREE_CODE (rhs[i]) == SSA_NAME)
4384 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4385 if (rhs[2 + i] == NULL_TREE)
4387 if (has_single_use (rhs[i]))
4388 rhs[2 + i] = ops[(*pidx)++]->op;
4389 else
4390 rhs[2 + i] = rhs[i];
4393 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4394 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4396 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4397 var = make_ssa_name (TREE_TYPE (var));
4398 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4399 rhs[2], rhs[3]);
4400 gimple_set_uid (g, gimple_uid (stmt));
4401 gimple_set_visited (g, true);
4402 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4404 return var;
4407 /* Structure to track the initial value passed to get_ops and
4408 the range in the ops vector for each basic block. */
4410 struct inter_bb_range_test_entry
4412 tree op;
4413 unsigned int first_idx, last_idx;
4416 /* Inter-bb range test optimization.
4418 Returns TRUE if a gimple conditional is optimized to a true/false,
4419 otherwise return FALSE.
4421 This indicates to the caller that it should run a CFG cleanup pass
4422 once reassociation is completed. */
4424 static bool
4425 maybe_optimize_range_tests (gimple *stmt)
4427 basic_block first_bb = gimple_bb (stmt);
4428 basic_block last_bb = first_bb;
4429 basic_block other_bb = NULL;
4430 basic_block bb;
4431 edge_iterator ei;
4432 edge e;
4433 auto_vec<operand_entry *> ops;
4434 auto_vec<inter_bb_range_test_entry> bbinfo;
4435 bool any_changes = false;
4436 bool cfg_cleanup_needed = false;
4438 /* Consider only basic blocks that end with GIMPLE_COND or
4439 a cast statement satisfying final_range_test_p. All
4440 but the last bb in the first_bb .. last_bb range
4441 should end with GIMPLE_COND. */
4442 if (gimple_code (stmt) == GIMPLE_COND)
4444 if (EDGE_COUNT (first_bb->succs) != 2)
4445 return cfg_cleanup_needed;
4447 else if (final_range_test_p (stmt))
4448 other_bb = single_succ (first_bb);
4449 else
4450 return cfg_cleanup_needed;
4452 if (stmt_could_throw_p (cfun, stmt))
4453 return cfg_cleanup_needed;
4455 /* As relative ordering of post-dominator sons isn't fixed,
4456 maybe_optimize_range_tests can be called first on any
4457 bb in the range we want to optimize. So, start searching
4458 backwards, if first_bb can be set to a predecessor. */
4459 while (single_pred_p (first_bb))
4461 basic_block pred_bb = single_pred (first_bb);
4462 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4463 break;
4464 if (!no_side_effect_bb (first_bb))
4465 break;
4466 first_bb = pred_bb;
4468 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4469 Before starting forward search in last_bb successors, find
4470 out the other_bb. */
4471 if (first_bb == last_bb)
4473 other_bb = NULL;
4474 /* As non-GIMPLE_COND last stmt always terminates the range,
4475 if forward search didn't discover anything, just give up. */
4476 if (gimple_code (stmt) != GIMPLE_COND)
4477 return cfg_cleanup_needed;
4478 /* Look at both successors. Either it ends with a GIMPLE_COND
4479 and satisfies suitable_cond_bb, or ends with a cast and
4480 other_bb is that cast's successor. */
4481 FOR_EACH_EDGE (e, ei, first_bb->succs)
4482 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4483 || e->dest == first_bb)
4484 return cfg_cleanup_needed;
4485 else if (single_pred_p (e->dest))
4487 stmt = last_stmt (e->dest);
4488 if (stmt
4489 && gimple_code (stmt) == GIMPLE_COND
4490 && EDGE_COUNT (e->dest->succs) == 2)
4492 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4493 NULL, true))
4494 break;
4495 else
4496 other_bb = NULL;
4498 else if (stmt
4499 && final_range_test_p (stmt)
4500 && find_edge (first_bb, single_succ (e->dest)))
4502 other_bb = single_succ (e->dest);
4503 if (other_bb == first_bb)
4504 other_bb = NULL;
4507 if (other_bb == NULL)
4508 return cfg_cleanup_needed;
4510 /* Now do the forward search, moving last_bb to successor bbs
4511 that aren't other_bb. */
4512 while (EDGE_COUNT (last_bb->succs) == 2)
4514 FOR_EACH_EDGE (e, ei, last_bb->succs)
4515 if (e->dest != other_bb)
4516 break;
4517 if (e == NULL)
4518 break;
4519 if (!single_pred_p (e->dest))
4520 break;
4521 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4522 break;
4523 if (!no_side_effect_bb (e->dest))
4524 break;
4525 last_bb = e->dest;
4527 if (first_bb == last_bb)
4528 return cfg_cleanup_needed;
4529 /* Here basic blocks first_bb through last_bb's predecessor
4530 end with GIMPLE_COND, all of them have one of the edges to
4531 other_bb and another to another block in the range,
4532 all blocks except first_bb don't have side-effects and
4533 last_bb ends with either GIMPLE_COND, or cast satisfying
4534 final_range_test_p. */
4535 for (bb = last_bb; ; bb = single_pred (bb))
4537 enum tree_code code;
4538 tree lhs, rhs;
4539 inter_bb_range_test_entry bb_ent;
4541 bb_ent.op = NULL_TREE;
4542 bb_ent.first_idx = ops.length ();
4543 bb_ent.last_idx = bb_ent.first_idx;
4544 e = find_edge (bb, other_bb);
4545 stmt = last_stmt (bb);
4546 gimple_set_visited (stmt, true);
4547 if (gimple_code (stmt) != GIMPLE_COND)
4549 use_operand_p use_p;
4550 gimple *phi;
4551 edge e2;
4552 unsigned int d;
4554 lhs = gimple_assign_lhs (stmt);
4555 rhs = gimple_assign_rhs1 (stmt);
4556 gcc_assert (bb == last_bb);
4558 /* stmt is
4559 _123 = (int) _234;
4561 _234 = a_2(D) == 2;
4563 followed by:
4564 <bb M>:
4565 # _345 = PHI <_123(N), 1(...), 1(...)>
4567 or 0 instead of 1. If it is 0, the _234
4568 range test is anded together with all the
4569 other range tests, if it is 1, it is ored with
4570 them. */
4571 single_imm_use (lhs, &use_p, &phi);
4572 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4573 e2 = find_edge (first_bb, other_bb);
4574 d = e2->dest_idx;
4575 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4576 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4577 code = BIT_AND_EXPR;
4578 else
4580 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4581 code = BIT_IOR_EXPR;
4584 /* If _234 SSA_NAME_DEF_STMT is
4585 _234 = _567 | _789;
4586 (or &, corresponding to 1/0 in the phi arguments,
4587 push into ops the individual range test arguments
4588 of the bitwise or resp. and, recursively. */
4589 if (TREE_CODE (rhs) == SSA_NAME
4590 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4591 != tcc_comparison)
4592 && !get_ops (rhs, code, &ops,
4593 loop_containing_stmt (stmt))
4594 && has_single_use (rhs))
4596 /* Otherwise, push the _234 range test itself. */
4597 operand_entry *oe = operand_entry_pool.allocate ();
4599 oe->op = rhs;
4600 oe->rank = code;
4601 oe->id = 0;
4602 oe->count = 1;
4603 oe->stmt_to_insert = NULL;
4604 ops.safe_push (oe);
4605 bb_ent.last_idx++;
4606 bb_ent.op = rhs;
4608 else if (is_gimple_assign (stmt)
4609 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4610 == tcc_comparison)
4611 && !get_ops (lhs, code, &ops,
4612 loop_containing_stmt (stmt))
4613 && has_single_use (lhs))
4615 operand_entry *oe = operand_entry_pool.allocate ();
4616 oe->op = lhs;
4617 oe->rank = code;
4618 oe->id = 0;
4619 oe->count = 1;
4620 ops.safe_push (oe);
4621 bb_ent.last_idx++;
4622 bb_ent.op = lhs;
4624 else
4626 bb_ent.last_idx = ops.length ();
4627 bb_ent.op = rhs;
4629 bbinfo.safe_push (bb_ent);
4630 continue;
4632 else if (bb == last_bb)
4634 /* For last_bb, handle also:
4635 if (x_3(D) == 3)
4636 goto <bb 6>; [34.00%]
4637 else
4638 goto <bb 7>; [66.00%]
4640 <bb 6> [local count: 79512730]:
4642 <bb 7> [local count: 1073741824]:
4643 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4644 where bb 7 is OTHER_BB, but the PHI values from the
4645 earlier bbs match the path through the empty bb
4646 in between. */
4647 bool test_swapped_p = false;
4648 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4649 &other_bb, &test_swapped_p, true);
4650 gcc_assert (ok);
4651 if (test_swapped_p)
4652 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4654 /* Otherwise stmt is GIMPLE_COND. */
4655 code = gimple_cond_code (stmt);
4656 lhs = gimple_cond_lhs (stmt);
4657 rhs = gimple_cond_rhs (stmt);
4658 if (TREE_CODE (lhs) == SSA_NAME
4659 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4660 && ((code != EQ_EXPR && code != NE_EXPR)
4661 || rhs != boolean_false_node
4662 /* Either push into ops the individual bitwise
4663 or resp. and operands, depending on which
4664 edge is other_bb. */
4665 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4666 ^ (code == EQ_EXPR))
4667 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4668 loop_containing_stmt (stmt))))
4670 /* Or push the GIMPLE_COND stmt itself. */
4671 operand_entry *oe = operand_entry_pool.allocate ();
4673 oe->op = NULL;
4674 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4675 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4676 /* oe->op = NULL signs that there is no SSA_NAME
4677 for the range test, and oe->id instead is the
4678 basic block number, at which's end the GIMPLE_COND
4679 is. */
4680 oe->id = bb->index;
4681 oe->count = 1;
4682 oe->stmt_to_insert = NULL;
4683 ops.safe_push (oe);
4684 bb_ent.op = NULL;
4685 bb_ent.last_idx++;
4687 else if (ops.length () > bb_ent.first_idx)
4689 bb_ent.op = lhs;
4690 bb_ent.last_idx = ops.length ();
4692 bbinfo.safe_push (bb_ent);
4693 if (bb == first_bb)
4694 break;
4696 if (ops.length () > 1)
4697 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4698 if (any_changes)
4700 unsigned int idx, max_idx = 0;
4701 /* update_ops relies on has_single_use predicates returning the
4702 same values as it did during get_ops earlier. Additionally it
4703 never removes statements, only adds new ones and it should walk
4704 from the single imm use and check the predicate already before
4705 making those changes.
4706 On the other side, the handling of GIMPLE_COND directly can turn
4707 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4708 it needs to be done in a separate loop afterwards. */
4709 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4711 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4712 && bbinfo[idx].op != NULL_TREE)
4714 tree new_op;
4716 max_idx = idx;
4717 stmt = last_stmt (bb);
4718 new_op = update_ops (bbinfo[idx].op,
4719 (enum tree_code)
4720 ops[bbinfo[idx].first_idx]->rank,
4721 ops, &bbinfo[idx].first_idx,
4722 loop_containing_stmt (stmt));
4723 if (new_op == NULL_TREE)
4725 gcc_assert (bb == last_bb);
4726 new_op = ops[bbinfo[idx].first_idx++]->op;
4728 if (bbinfo[idx].op != new_op)
4730 imm_use_iterator iter;
4731 use_operand_p use_p;
4732 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4734 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4735 if (is_gimple_debug (use_stmt))
4736 continue;
4737 else if (gimple_code (use_stmt) == GIMPLE_COND
4738 || gimple_code (use_stmt) == GIMPLE_PHI)
4739 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4740 SET_USE (use_p, new_op);
4741 else if ((is_gimple_assign (use_stmt)
4742 && (TREE_CODE_CLASS
4743 (gimple_assign_rhs_code (use_stmt))
4744 == tcc_comparison)))
4745 cast_or_tcc_cmp_stmt = use_stmt;
4746 else if (gimple_assign_cast_p (use_stmt))
4747 cast_or_tcc_cmp_stmt = use_stmt;
4748 else
4749 gcc_unreachable ();
4751 if (cast_or_tcc_cmp_stmt)
4753 gcc_assert (bb == last_bb);
4754 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4755 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4756 enum tree_code rhs_code
4757 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4758 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4759 : CONVERT_EXPR;
4760 gassign *g;
4761 if (is_gimple_min_invariant (new_op))
4763 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4764 g = gimple_build_assign (new_lhs, new_op);
4766 else
4767 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4768 gimple_stmt_iterator gsi
4769 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4770 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4771 gimple_set_visited (g, true);
4772 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4773 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4774 if (is_gimple_debug (use_stmt))
4775 continue;
4776 else if (gimple_code (use_stmt) == GIMPLE_COND
4777 || gimple_code (use_stmt) == GIMPLE_PHI)
4778 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4779 SET_USE (use_p, new_lhs);
4780 else
4781 gcc_unreachable ();
4785 if (bb == first_bb)
4786 break;
4788 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4790 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4791 && bbinfo[idx].op == NULL_TREE
4792 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4794 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4796 if (idx > max_idx)
4797 max_idx = idx;
4799 /* If we collapse the conditional to a true/false
4800 condition, then bubble that knowledge up to our caller. */
4801 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4803 gimple_cond_make_false (cond_stmt);
4804 cfg_cleanup_needed = true;
4806 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4808 gimple_cond_make_true (cond_stmt);
4809 cfg_cleanup_needed = true;
4811 else
4813 gimple_cond_set_code (cond_stmt, NE_EXPR);
4814 gimple_cond_set_lhs (cond_stmt,
4815 ops[bbinfo[idx].first_idx]->op);
4816 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4818 update_stmt (cond_stmt);
4820 if (bb == first_bb)
4821 break;
4824 /* The above changes could result in basic blocks after the first
4825 modified one, up to and including last_bb, to be executed even if
4826 they would not be in the original program. If the value ranges of
4827 assignment lhs' in those bbs were dependent on the conditions
4828 guarding those basic blocks which now can change, the VRs might
4829 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4830 are only used within the same bb, it should be not a big deal if
4831 we just reset all the VRs in those bbs. See PR68671. */
4832 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4833 reset_flow_sensitive_info_in_bb (bb);
4835 return cfg_cleanup_needed;
4838 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4839 of STMT in it's operands. This is also known as a "destructive
4840 update" operation. */
4842 static bool
4843 is_phi_for_stmt (gimple *stmt, tree operand)
4845 gimple *def_stmt;
4846 gphi *def_phi;
4847 tree lhs;
4848 use_operand_p arg_p;
4849 ssa_op_iter i;
4851 if (TREE_CODE (operand) != SSA_NAME)
4852 return false;
4854 lhs = gimple_assign_lhs (stmt);
4856 def_stmt = SSA_NAME_DEF_STMT (operand);
4857 def_phi = dyn_cast <gphi *> (def_stmt);
4858 if (!def_phi)
4859 return false;
4861 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4862 if (lhs == USE_FROM_PTR (arg_p))
4863 return true;
4864 return false;
4867 /* Remove def stmt of VAR if VAR has zero uses and recurse
4868 on rhs1 operand if so. */
4870 static void
4871 remove_visited_stmt_chain (tree var)
4873 gimple *stmt;
4874 gimple_stmt_iterator gsi;
4876 while (1)
4878 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4879 return;
4880 stmt = SSA_NAME_DEF_STMT (var);
4881 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4883 var = gimple_assign_rhs1 (stmt);
4884 gsi = gsi_for_stmt (stmt);
4885 reassoc_remove_stmt (&gsi);
4886 release_defs (stmt);
4888 else
4889 return;
4893 /* This function checks three consequtive operands in
4894 passed operands vector OPS starting from OPINDEX and
4895 swaps two operands if it is profitable for binary operation
4896 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4898 We pair ops with the same rank if possible.
4900 The alternative we try is to see if STMT is a destructive
4901 update style statement, which is like:
4902 b = phi (a, ...)
4903 a = c + b;
4904 In that case, we want to use the destructive update form to
4905 expose the possible vectorizer sum reduction opportunity.
4906 In that case, the third operand will be the phi node. This
4907 check is not performed if STMT is null.
4909 We could, of course, try to be better as noted above, and do a
4910 lot of work to try to find these opportunities in >3 operand
4911 cases, but it is unlikely to be worth it. */
4913 static void
4914 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4915 unsigned int opindex, gimple *stmt)
4917 operand_entry *oe1, *oe2, *oe3;
4919 oe1 = ops[opindex];
4920 oe2 = ops[opindex + 1];
4921 oe3 = ops[opindex + 2];
4923 if ((oe1->rank == oe2->rank
4924 && oe2->rank != oe3->rank)
4925 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4926 && !is_phi_for_stmt (stmt, oe1->op)
4927 && !is_phi_for_stmt (stmt, oe2->op)))
4928 std::swap (*oe1, *oe3);
4929 else if ((oe1->rank == oe3->rank
4930 && oe2->rank != oe3->rank)
4931 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4932 && !is_phi_for_stmt (stmt, oe1->op)
4933 && !is_phi_for_stmt (stmt, oe3->op)))
4934 std::swap (*oe1, *oe2);
4937 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4938 two definitions, otherwise return STMT. */
4940 static inline gimple *
4941 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4943 if (TREE_CODE (rhs1) == SSA_NAME
4944 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4945 stmt = SSA_NAME_DEF_STMT (rhs1);
4946 if (TREE_CODE (rhs2) == SSA_NAME
4947 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4948 stmt = SSA_NAME_DEF_STMT (rhs2);
4949 return stmt;
4952 /* If the stmt that defines operand has to be inserted, insert it
4953 before the use. */
4954 static void
4955 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4957 gcc_assert (is_gimple_assign (stmt_to_insert));
4958 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4959 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4960 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4961 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4962 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4964 /* If the insert point is not stmt, then insert_point would be
4965 the point where operand rhs1 or rhs2 is defined. In this case,
4966 stmt_to_insert has to be inserted afterwards. This would
4967 only happen when the stmt insertion point is flexible. */
4968 if (stmt == insert_point)
4969 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4970 else
4971 insert_stmt_after (stmt_to_insert, insert_point);
4975 /* Recursively rewrite our linearized statements so that the operators
4976 match those in OPS[OPINDEX], putting the computation in rank
4977 order. Return new lhs.
4978 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4979 the current stmt and during recursive invocations.
4980 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4981 recursive invocations. */
4983 static tree
4984 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
4985 vec<operand_entry *> ops, bool changed, bool next_changed)
4987 tree rhs1 = gimple_assign_rhs1 (stmt);
4988 tree rhs2 = gimple_assign_rhs2 (stmt);
4989 tree lhs = gimple_assign_lhs (stmt);
4990 operand_entry *oe;
4992 /* The final recursion case for this function is that you have
4993 exactly two operations left.
4994 If we had exactly one op in the entire list to start with, we
4995 would have never called this function, and the tail recursion
4996 rewrites them one at a time. */
4997 if (opindex + 2 == ops.length ())
4999 operand_entry *oe1, *oe2;
5001 oe1 = ops[opindex];
5002 oe2 = ops[opindex + 1];
5004 if (rhs1 != oe1->op || rhs2 != oe2->op)
5006 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5007 unsigned int uid = gimple_uid (stmt);
5009 if (dump_file && (dump_flags & TDF_DETAILS))
5011 fprintf (dump_file, "Transforming ");
5012 print_gimple_stmt (dump_file, stmt, 0);
5015 /* If the stmt that defines operand has to be inserted, insert it
5016 before the use. */
5017 if (oe1->stmt_to_insert)
5018 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5019 if (oe2->stmt_to_insert)
5020 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5021 /* Even when changed is false, reassociation could have e.g. removed
5022 some redundant operations, so unless we are just swapping the
5023 arguments or unless there is no change at all (then we just
5024 return lhs), force creation of a new SSA_NAME. */
5025 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5027 gimple *insert_point
5028 = find_insert_point (stmt, oe1->op, oe2->op);
5029 lhs = make_ssa_name (TREE_TYPE (lhs));
5030 stmt
5031 = gimple_build_assign (lhs, rhs_code,
5032 oe1->op, oe2->op);
5033 gimple_set_uid (stmt, uid);
5034 gimple_set_visited (stmt, true);
5035 if (insert_point == gsi_stmt (gsi))
5036 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5037 else
5038 insert_stmt_after (stmt, insert_point);
5040 else
5042 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
5043 == stmt);
5044 gimple_assign_set_rhs1 (stmt, oe1->op);
5045 gimple_assign_set_rhs2 (stmt, oe2->op);
5046 update_stmt (stmt);
5049 if (rhs1 != oe1->op && rhs1 != oe2->op)
5050 remove_visited_stmt_chain (rhs1);
5052 if (dump_file && (dump_flags & TDF_DETAILS))
5054 fprintf (dump_file, " into ");
5055 print_gimple_stmt (dump_file, stmt, 0);
5058 return lhs;
5061 /* If we hit here, we should have 3 or more ops left. */
5062 gcc_assert (opindex + 2 < ops.length ());
5064 /* Rewrite the next operator. */
5065 oe = ops[opindex];
5067 /* If the stmt that defines operand has to be inserted, insert it
5068 before the use. */
5069 if (oe->stmt_to_insert)
5070 insert_stmt_before_use (stmt, oe->stmt_to_insert);
5072 /* Recurse on the LHS of the binary operator, which is guaranteed to
5073 be the non-leaf side. */
5074 tree new_rhs1
5075 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5076 changed || oe->op != rhs2 || next_changed,
5077 false);
5079 if (oe->op != rhs2 || new_rhs1 != rhs1)
5081 if (dump_file && (dump_flags & TDF_DETAILS))
5083 fprintf (dump_file, "Transforming ");
5084 print_gimple_stmt (dump_file, stmt, 0);
5087 /* If changed is false, this is either opindex == 0
5088 or all outer rhs2's were equal to corresponding oe->op,
5089 and powi_result is NULL.
5090 That means lhs is equivalent before and after reassociation.
5091 Otherwise ensure the old lhs SSA_NAME is not reused and
5092 create a new stmt as well, so that any debug stmts will be
5093 properly adjusted. */
5094 if (changed)
5096 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5097 unsigned int uid = gimple_uid (stmt);
5098 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
5100 lhs = make_ssa_name (TREE_TYPE (lhs));
5101 stmt = gimple_build_assign (lhs, rhs_code,
5102 new_rhs1, oe->op);
5103 gimple_set_uid (stmt, uid);
5104 gimple_set_visited (stmt, true);
5105 if (insert_point == gsi_stmt (gsi))
5106 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5107 else
5108 insert_stmt_after (stmt, insert_point);
5110 else
5112 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
5113 == stmt);
5114 gimple_assign_set_rhs1 (stmt, new_rhs1);
5115 gimple_assign_set_rhs2 (stmt, oe->op);
5116 update_stmt (stmt);
5119 if (dump_file && (dump_flags & TDF_DETAILS))
5121 fprintf (dump_file, " into ");
5122 print_gimple_stmt (dump_file, stmt, 0);
5125 return lhs;
5128 /* Find out how many cycles we need to compute statements chain.
5129 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5130 maximum number of independent statements we may execute per cycle. */
5132 static int
5133 get_required_cycles (int ops_num, int cpu_width)
5135 int res;
5136 int elog;
5137 unsigned int rest;
5139 /* While we have more than 2 * cpu_width operands
5140 we may reduce number of operands by cpu_width
5141 per cycle. */
5142 res = ops_num / (2 * cpu_width);
5144 /* Remained operands count may be reduced twice per cycle
5145 until we have only one operand. */
5146 rest = (unsigned)(ops_num - res * cpu_width);
5147 elog = exact_log2 (rest);
5148 if (elog >= 0)
5149 res += elog;
5150 else
5151 res += floor_log2 (rest) + 1;
5153 return res;
5156 /* Returns an optimal number of registers to use for computation of
5157 given statements. */
5159 static int
5160 get_reassociation_width (int ops_num, enum tree_code opc,
5161 machine_mode mode)
5163 int param_width = param_tree_reassoc_width;
5164 int width;
5165 int width_min;
5166 int cycles_best;
5168 if (param_width > 0)
5169 width = param_width;
5170 else
5171 width = targetm.sched.reassociation_width (opc, mode);
5173 if (width == 1)
5174 return width;
5176 /* Get the minimal time required for sequence computation. */
5177 cycles_best = get_required_cycles (ops_num, width);
5179 /* Check if we may use less width and still compute sequence for
5180 the same time. It will allow us to reduce registers usage.
5181 get_required_cycles is monotonically increasing with lower width
5182 so we can perform a binary search for the minimal width that still
5183 results in the optimal cycle count. */
5184 width_min = 1;
5185 while (width > width_min)
5187 int width_mid = (width + width_min) / 2;
5189 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5190 width = width_mid;
5191 else if (width_min < width_mid)
5192 width_min = width_mid;
5193 else
5194 break;
5197 return width;
5200 /* Recursively rewrite our linearized statements so that the operators
5201 match those in OPS[OPINDEX], putting the computation in rank
5202 order and trying to allow operations to be executed in
5203 parallel. */
5205 static void
5206 rewrite_expr_tree_parallel (gassign *stmt, int width,
5207 vec<operand_entry *> ops)
5209 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5210 int op_num = ops.length ();
5211 gcc_assert (op_num > 0);
5212 int stmt_num = op_num - 1;
5213 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5214 int op_index = op_num - 1;
5215 int stmt_index = 0;
5216 int ready_stmts_end = 0;
5217 int i = 0;
5218 gimple *stmt1 = NULL, *stmt2 = NULL;
5219 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5221 /* We start expression rewriting from the top statements.
5222 So, in this loop we create a full list of statements
5223 we will work with. */
5224 stmts[stmt_num - 1] = stmt;
5225 for (i = stmt_num - 2; i >= 0; i--)
5226 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5228 for (i = 0; i < stmt_num; i++)
5230 tree op1, op2;
5232 /* Determine whether we should use results of
5233 already handled statements or not. */
5234 if (ready_stmts_end == 0
5235 && (i - stmt_index >= width || op_index < 1))
5236 ready_stmts_end = i;
5238 /* Now we choose operands for the next statement. Non zero
5239 value in ready_stmts_end means here that we should use
5240 the result of already generated statements as new operand. */
5241 if (ready_stmts_end > 0)
5243 op1 = gimple_assign_lhs (stmts[stmt_index++]);
5244 if (ready_stmts_end > stmt_index)
5245 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5246 else if (op_index >= 0)
5248 operand_entry *oe = ops[op_index--];
5249 stmt2 = oe->stmt_to_insert;
5250 op2 = oe->op;
5252 else
5254 gcc_assert (stmt_index < i);
5255 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5258 if (stmt_index >= ready_stmts_end)
5259 ready_stmts_end = 0;
5261 else
5263 if (op_index > 1)
5264 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5265 operand_entry *oe2 = ops[op_index--];
5266 operand_entry *oe1 = ops[op_index--];
5267 op2 = oe2->op;
5268 stmt2 = oe2->stmt_to_insert;
5269 op1 = oe1->op;
5270 stmt1 = oe1->stmt_to_insert;
5273 /* If we emit the last statement then we should put
5274 operands into the last statement. It will also
5275 break the loop. */
5276 if (op_index < 0 && stmt_index == i)
5277 i = stmt_num - 1;
5279 if (dump_file && (dump_flags & TDF_DETAILS))
5281 fprintf (dump_file, "Transforming ");
5282 print_gimple_stmt (dump_file, stmts[i], 0);
5285 /* If the stmt that defines operand has to be inserted, insert it
5286 before the use. */
5287 if (stmt1)
5288 insert_stmt_before_use (stmts[i], stmt1);
5289 if (stmt2)
5290 insert_stmt_before_use (stmts[i], stmt2);
5291 stmt1 = stmt2 = NULL;
5293 /* We keep original statement only for the last one. All
5294 others are recreated. */
5295 if (i == stmt_num - 1)
5297 gimple_assign_set_rhs1 (stmts[i], op1);
5298 gimple_assign_set_rhs2 (stmts[i], op2);
5299 update_stmt (stmts[i]);
5301 else
5303 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5304 gimple_set_visited (stmts[i], true);
5306 if (dump_file && (dump_flags & TDF_DETAILS))
5308 fprintf (dump_file, " into ");
5309 print_gimple_stmt (dump_file, stmts[i], 0);
5313 remove_visited_stmt_chain (last_rhs1);
5316 /* Transform STMT, which is really (A +B) + (C + D) into the left
5317 linear form, ((A+B)+C)+D.
5318 Recurse on D if necessary. */
5320 static void
5321 linearize_expr (gimple *stmt)
5323 gimple_stmt_iterator gsi;
5324 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5325 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5326 gimple *oldbinrhs = binrhs;
5327 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5328 gimple *newbinrhs = NULL;
5329 class loop *loop = loop_containing_stmt (stmt);
5330 tree lhs = gimple_assign_lhs (stmt);
5332 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5333 && is_reassociable_op (binrhs, rhscode, loop));
5335 gsi = gsi_for_stmt (stmt);
5337 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5338 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5339 gimple_assign_rhs_code (binrhs),
5340 gimple_assign_lhs (binlhs),
5341 gimple_assign_rhs2 (binrhs));
5342 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5343 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5344 gimple_set_uid (binrhs, gimple_uid (stmt));
5346 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5347 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5349 if (dump_file && (dump_flags & TDF_DETAILS))
5351 fprintf (dump_file, "Linearized: ");
5352 print_gimple_stmt (dump_file, stmt, 0);
5355 reassociate_stats.linearized++;
5356 update_stmt (stmt);
5358 gsi = gsi_for_stmt (oldbinrhs);
5359 reassoc_remove_stmt (&gsi);
5360 release_defs (oldbinrhs);
5362 gimple_set_visited (stmt, true);
5363 gimple_set_visited (binlhs, true);
5364 gimple_set_visited (binrhs, true);
5366 /* Tail recurse on the new rhs if it still needs reassociation. */
5367 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5368 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5369 want to change the algorithm while converting to tuples. */
5370 linearize_expr (stmt);
5373 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5374 it. Otherwise, return NULL. */
5376 static gimple *
5377 get_single_immediate_use (tree lhs)
5379 use_operand_p immuse;
5380 gimple *immusestmt;
5382 if (TREE_CODE (lhs) == SSA_NAME
5383 && single_imm_use (lhs, &immuse, &immusestmt)
5384 && is_gimple_assign (immusestmt))
5385 return immusestmt;
5387 return NULL;
5390 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5391 representing the negated value. Insertions of any necessary
5392 instructions go before GSI.
5393 This function is recursive in that, if you hand it "a_5" as the
5394 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5395 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5397 static tree
5398 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5400 gimple *negatedefstmt = NULL;
5401 tree resultofnegate;
5402 gimple_stmt_iterator gsi;
5403 unsigned int uid;
5405 /* If we are trying to negate a name, defined by an add, negate the
5406 add operands instead. */
5407 if (TREE_CODE (tonegate) == SSA_NAME)
5408 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5409 if (TREE_CODE (tonegate) == SSA_NAME
5410 && is_gimple_assign (negatedefstmt)
5411 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5412 && has_single_use (gimple_assign_lhs (negatedefstmt))
5413 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5415 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5416 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5417 tree lhs = gimple_assign_lhs (negatedefstmt);
5418 gimple *g;
5420 gsi = gsi_for_stmt (negatedefstmt);
5421 rhs1 = negate_value (rhs1, &gsi);
5423 gsi = gsi_for_stmt (negatedefstmt);
5424 rhs2 = negate_value (rhs2, &gsi);
5426 gsi = gsi_for_stmt (negatedefstmt);
5427 lhs = make_ssa_name (TREE_TYPE (lhs));
5428 gimple_set_visited (negatedefstmt, true);
5429 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5430 gimple_set_uid (g, gimple_uid (negatedefstmt));
5431 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5432 return lhs;
5435 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5436 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5437 NULL_TREE, true, GSI_SAME_STMT);
5438 gsi = *gsip;
5439 uid = gimple_uid (gsi_stmt (gsi));
5440 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5442 gimple *stmt = gsi_stmt (gsi);
5443 if (gimple_uid (stmt) != 0)
5444 break;
5445 gimple_set_uid (stmt, uid);
5447 return resultofnegate;
5450 /* Return true if we should break up the subtract in STMT into an add
5451 with negate. This is true when we the subtract operands are really
5452 adds, or the subtract itself is used in an add expression. In
5453 either case, breaking up the subtract into an add with negate
5454 exposes the adds to reassociation. */
5456 static bool
5457 should_break_up_subtract (gimple *stmt)
5459 tree lhs = gimple_assign_lhs (stmt);
5460 tree binlhs = gimple_assign_rhs1 (stmt);
5461 tree binrhs = gimple_assign_rhs2 (stmt);
5462 gimple *immusestmt;
5463 class loop *loop = loop_containing_stmt (stmt);
5465 if (TREE_CODE (binlhs) == SSA_NAME
5466 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5467 return true;
5469 if (TREE_CODE (binrhs) == SSA_NAME
5470 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5471 return true;
5473 if (TREE_CODE (lhs) == SSA_NAME
5474 && (immusestmt = get_single_immediate_use (lhs))
5475 && is_gimple_assign (immusestmt)
5476 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5477 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5478 && gimple_assign_rhs1 (immusestmt) == lhs)
5479 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5480 return true;
5481 return false;
5484 /* Transform STMT from A - B into A + -B. */
5486 static void
5487 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5489 tree rhs1 = gimple_assign_rhs1 (stmt);
5490 tree rhs2 = gimple_assign_rhs2 (stmt);
5492 if (dump_file && (dump_flags & TDF_DETAILS))
5494 fprintf (dump_file, "Breaking up subtract ");
5495 print_gimple_stmt (dump_file, stmt, 0);
5498 rhs2 = negate_value (rhs2, gsip);
5499 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5500 update_stmt (stmt);
5503 /* Determine whether STMT is a builtin call that raises an SSA name
5504 to an integer power and has only one use. If so, and this is early
5505 reassociation and unsafe math optimizations are permitted, place
5506 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5507 If any of these conditions does not hold, return FALSE. */
5509 static bool
5510 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5512 tree arg1;
5513 REAL_VALUE_TYPE c, cint;
5515 switch (gimple_call_combined_fn (stmt))
5517 CASE_CFN_POW:
5518 if (flag_errno_math)
5519 return false;
5521 *base = gimple_call_arg (stmt, 0);
5522 arg1 = gimple_call_arg (stmt, 1);
5524 if (TREE_CODE (arg1) != REAL_CST)
5525 return false;
5527 c = TREE_REAL_CST (arg1);
5529 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5530 return false;
5532 *exponent = real_to_integer (&c);
5533 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5534 if (!real_identical (&c, &cint))
5535 return false;
5537 break;
5539 CASE_CFN_POWI:
5540 *base = gimple_call_arg (stmt, 0);
5541 arg1 = gimple_call_arg (stmt, 1);
5543 if (!tree_fits_shwi_p (arg1))
5544 return false;
5546 *exponent = tree_to_shwi (arg1);
5547 break;
5549 default:
5550 return false;
5553 /* Expanding negative exponents is generally unproductive, so we don't
5554 complicate matters with those. Exponents of zero and one should
5555 have been handled by expression folding. */
5556 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5557 return false;
5559 return true;
5562 /* Try to derive and add operand entry for OP to *OPS. Return false if
5563 unsuccessful. */
5565 static bool
5566 try_special_add_to_ops (vec<operand_entry *> *ops,
5567 enum tree_code code,
5568 tree op, gimple* def_stmt)
5570 tree base = NULL_TREE;
5571 HOST_WIDE_INT exponent = 0;
5573 if (TREE_CODE (op) != SSA_NAME
5574 || ! has_single_use (op))
5575 return false;
5577 if (code == MULT_EXPR
5578 && reassoc_insert_powi_p
5579 && flag_unsafe_math_optimizations
5580 && is_gimple_call (def_stmt)
5581 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5583 add_repeat_to_ops_vec (ops, base, exponent);
5584 gimple_set_visited (def_stmt, true);
5585 return true;
5587 else if (code == MULT_EXPR
5588 && is_gimple_assign (def_stmt)
5589 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5590 && !HONOR_SNANS (TREE_TYPE (op))
5591 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5592 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5594 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5595 tree cst = build_minus_one_cst (TREE_TYPE (op));
5596 add_to_ops_vec (ops, rhs1);
5597 add_to_ops_vec (ops, cst);
5598 gimple_set_visited (def_stmt, true);
5599 return true;
5602 return false;
5605 /* Recursively linearize a binary expression that is the RHS of STMT.
5606 Place the operands of the expression tree in the vector named OPS. */
5608 static void
5609 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5610 bool is_associative, bool set_visited)
5612 tree binlhs = gimple_assign_rhs1 (stmt);
5613 tree binrhs = gimple_assign_rhs2 (stmt);
5614 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5615 bool binlhsisreassoc = false;
5616 bool binrhsisreassoc = false;
5617 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5618 class loop *loop = loop_containing_stmt (stmt);
5620 if (set_visited)
5621 gimple_set_visited (stmt, true);
5623 if (TREE_CODE (binlhs) == SSA_NAME)
5625 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5626 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5627 && !stmt_could_throw_p (cfun, binlhsdef));
5630 if (TREE_CODE (binrhs) == SSA_NAME)
5632 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5633 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5634 && !stmt_could_throw_p (cfun, binrhsdef));
5637 /* If the LHS is not reassociable, but the RHS is, we need to swap
5638 them. If neither is reassociable, there is nothing we can do, so
5639 just put them in the ops vector. If the LHS is reassociable,
5640 linearize it. If both are reassociable, then linearize the RHS
5641 and the LHS. */
5643 if (!binlhsisreassoc)
5645 /* If this is not a associative operation like division, give up. */
5646 if (!is_associative)
5648 add_to_ops_vec (ops, binrhs);
5649 return;
5652 if (!binrhsisreassoc)
5654 bool swap = false;
5655 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5656 /* If we add ops for the rhs we expect to be able to recurse
5657 to it via the lhs during expression rewrite so swap
5658 operands. */
5659 swap = true;
5660 else
5661 add_to_ops_vec (ops, binrhs);
5663 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5664 add_to_ops_vec (ops, binlhs);
5666 if (!swap)
5667 return;
5670 if (dump_file && (dump_flags & TDF_DETAILS))
5672 fprintf (dump_file, "swapping operands of ");
5673 print_gimple_stmt (dump_file, stmt, 0);
5676 swap_ssa_operands (stmt,
5677 gimple_assign_rhs1_ptr (stmt),
5678 gimple_assign_rhs2_ptr (stmt));
5679 update_stmt (stmt);
5681 if (dump_file && (dump_flags & TDF_DETAILS))
5683 fprintf (dump_file, " is now ");
5684 print_gimple_stmt (dump_file, stmt, 0);
5686 if (!binrhsisreassoc)
5687 return;
5689 /* We want to make it so the lhs is always the reassociative op,
5690 so swap. */
5691 std::swap (binlhs, binrhs);
5693 else if (binrhsisreassoc)
5695 linearize_expr (stmt);
5696 binlhs = gimple_assign_rhs1 (stmt);
5697 binrhs = gimple_assign_rhs2 (stmt);
5700 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5701 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5702 rhscode, loop));
5703 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5704 is_associative, set_visited);
5706 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5707 add_to_ops_vec (ops, binrhs);
5710 /* Repropagate the negates back into subtracts, since no other pass
5711 currently does it. */
5713 static void
5714 repropagate_negates (void)
5716 unsigned int i = 0;
5717 tree negate;
5719 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5721 gimple *user = get_single_immediate_use (negate);
5723 if (!user || !is_gimple_assign (user))
5724 continue;
5726 /* The negate operand can be either operand of a PLUS_EXPR
5727 (it can be the LHS if the RHS is a constant for example).
5729 Force the negate operand to the RHS of the PLUS_EXPR, then
5730 transform the PLUS_EXPR into a MINUS_EXPR. */
5731 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5733 /* If the negated operand appears on the LHS of the
5734 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5735 to force the negated operand to the RHS of the PLUS_EXPR. */
5736 if (gimple_assign_rhs1 (user) == negate)
5738 swap_ssa_operands (user,
5739 gimple_assign_rhs1_ptr (user),
5740 gimple_assign_rhs2_ptr (user));
5743 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5744 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5745 if (gimple_assign_rhs2 (user) == negate)
5747 tree rhs1 = gimple_assign_rhs1 (user);
5748 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5749 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5750 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5751 update_stmt (user);
5754 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5756 if (gimple_assign_rhs1 (user) == negate)
5758 /* We have
5759 x = -a
5760 y = x - b
5761 which we transform into
5762 x = a + b
5763 y = -x .
5764 This pushes down the negate which we possibly can merge
5765 into some other operation, hence insert it into the
5766 plus_negates vector. */
5767 gimple *feed = SSA_NAME_DEF_STMT (negate);
5768 tree a = gimple_assign_rhs1 (feed);
5769 tree b = gimple_assign_rhs2 (user);
5770 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5771 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5772 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5773 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5774 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5775 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5776 user = gsi_stmt (gsi2);
5777 update_stmt (user);
5778 reassoc_remove_stmt (&gsi);
5779 release_defs (feed);
5780 plus_negates.safe_push (gimple_assign_lhs (user));
5782 else
5784 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5785 rid of one operation. */
5786 gimple *feed = SSA_NAME_DEF_STMT (negate);
5787 tree a = gimple_assign_rhs1 (feed);
5788 tree rhs1 = gimple_assign_rhs1 (user);
5789 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5790 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5791 update_stmt (gsi_stmt (gsi));
5797 /* Returns true if OP is of a type for which we can do reassociation.
5798 That is for integral or non-saturating fixed-point types, and for
5799 floating point type when associative-math is enabled. */
5801 static bool
5802 can_reassociate_p (tree op)
5804 tree type = TREE_TYPE (op);
5805 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5806 return false;
5807 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5808 || NON_SAT_FIXED_POINT_TYPE_P (type)
5809 || (flag_associative_math && FLOAT_TYPE_P (type)))
5810 return true;
5811 return false;
5814 /* Break up subtract operations in block BB.
5816 We do this top down because we don't know whether the subtract is
5817 part of a possible chain of reassociation except at the top.
5819 IE given
5820 d = f + g
5821 c = a + e
5822 b = c - d
5823 q = b - r
5824 k = t - q
5826 we want to break up k = t - q, but we won't until we've transformed q
5827 = b - r, which won't be broken up until we transform b = c - d.
5829 En passant, clear the GIMPLE visited flag on every statement
5830 and set UIDs within each basic block. */
5832 static void
5833 break_up_subtract_bb (basic_block bb)
5835 gimple_stmt_iterator gsi;
5836 basic_block son;
5837 unsigned int uid = 1;
5839 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5841 gimple *stmt = gsi_stmt (gsi);
5842 gimple_set_visited (stmt, false);
5843 gimple_set_uid (stmt, uid++);
5845 if (!is_gimple_assign (stmt)
5846 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5847 continue;
5849 /* Look for simple gimple subtract operations. */
5850 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5852 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5853 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5854 continue;
5856 /* Check for a subtract used only in an addition. If this
5857 is the case, transform it into add of a negate for better
5858 reassociation. IE transform C = A-B into C = A + -B if C
5859 is only used in an addition. */
5860 if (should_break_up_subtract (stmt))
5861 break_up_subtract (stmt, &gsi);
5863 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5864 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5865 plus_negates.safe_push (gimple_assign_lhs (stmt));
5867 for (son = first_dom_son (CDI_DOMINATORS, bb);
5868 son;
5869 son = next_dom_son (CDI_DOMINATORS, son))
5870 break_up_subtract_bb (son);
5873 /* Used for repeated factor analysis. */
5874 struct repeat_factor
5876 /* An SSA name that occurs in a multiply chain. */
5877 tree factor;
5879 /* Cached rank of the factor. */
5880 unsigned rank;
5882 /* Number of occurrences of the factor in the chain. */
5883 HOST_WIDE_INT count;
5885 /* An SSA name representing the product of this factor and
5886 all factors appearing later in the repeated factor vector. */
5887 tree repr;
5891 static vec<repeat_factor> repeat_factor_vec;
5893 /* Used for sorting the repeat factor vector. Sort primarily by
5894 ascending occurrence count, secondarily by descending rank. */
5896 static int
5897 compare_repeat_factors (const void *x1, const void *x2)
5899 const repeat_factor *rf1 = (const repeat_factor *) x1;
5900 const repeat_factor *rf2 = (const repeat_factor *) x2;
5902 if (rf1->count != rf2->count)
5903 return rf1->count - rf2->count;
5905 return rf2->rank - rf1->rank;
5908 /* Look for repeated operands in OPS in the multiply tree rooted at
5909 STMT. Replace them with an optimal sequence of multiplies and powi
5910 builtin calls, and remove the used operands from OPS. Return an
5911 SSA name representing the value of the replacement sequence. */
5913 static tree
5914 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5916 unsigned i, j, vec_len;
5917 int ii;
5918 operand_entry *oe;
5919 repeat_factor *rf1, *rf2;
5920 repeat_factor rfnew;
5921 tree result = NULL_TREE;
5922 tree target_ssa, iter_result;
5923 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5924 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5925 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5926 gimple *mul_stmt, *pow_stmt;
5928 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5929 target. */
5930 if (!powi_fndecl)
5931 return NULL_TREE;
5933 /* Allocate the repeated factor vector. */
5934 repeat_factor_vec.create (10);
5936 /* Scan the OPS vector for all SSA names in the product and build
5937 up a vector of occurrence counts for each factor. */
5938 FOR_EACH_VEC_ELT (*ops, i, oe)
5940 if (TREE_CODE (oe->op) == SSA_NAME)
5942 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5944 if (rf1->factor == oe->op)
5946 rf1->count += oe->count;
5947 break;
5951 if (j >= repeat_factor_vec.length ())
5953 rfnew.factor = oe->op;
5954 rfnew.rank = oe->rank;
5955 rfnew.count = oe->count;
5956 rfnew.repr = NULL_TREE;
5957 repeat_factor_vec.safe_push (rfnew);
5962 /* Sort the repeated factor vector by (a) increasing occurrence count,
5963 and (b) decreasing rank. */
5964 repeat_factor_vec.qsort (compare_repeat_factors);
5966 /* It is generally best to combine as many base factors as possible
5967 into a product before applying __builtin_powi to the result.
5968 However, the sort order chosen for the repeated factor vector
5969 allows us to cache partial results for the product of the base
5970 factors for subsequent use. When we already have a cached partial
5971 result from a previous iteration, it is best to make use of it
5972 before looking for another __builtin_pow opportunity.
5974 As an example, consider x * x * y * y * y * z * z * z * z.
5975 We want to first compose the product x * y * z, raise it to the
5976 second power, then multiply this by y * z, and finally multiply
5977 by z. This can be done in 5 multiplies provided we cache y * z
5978 for use in both expressions:
5980 t1 = y * z
5981 t2 = t1 * x
5982 t3 = t2 * t2
5983 t4 = t1 * t3
5984 result = t4 * z
5986 If we instead ignored the cached y * z and first multiplied by
5987 the __builtin_pow opportunity z * z, we would get the inferior:
5989 t1 = y * z
5990 t2 = t1 * x
5991 t3 = t2 * t2
5992 t4 = z * z
5993 t5 = t3 * t4
5994 result = t5 * y */
5996 vec_len = repeat_factor_vec.length ();
5998 /* Repeatedly look for opportunities to create a builtin_powi call. */
5999 while (true)
6001 HOST_WIDE_INT power;
6003 /* First look for the largest cached product of factors from
6004 preceding iterations. If found, create a builtin_powi for
6005 it if the minimum occurrence count for its factors is at
6006 least 2, or just use this cached product as our next
6007 multiplicand if the minimum occurrence count is 1. */
6008 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6010 if (rf1->repr && rf1->count > 0)
6011 break;
6014 if (j < vec_len)
6016 power = rf1->count;
6018 if (power == 1)
6020 iter_result = rf1->repr;
6022 if (dump_file && (dump_flags & TDF_DETAILS))
6024 unsigned elt;
6025 repeat_factor *rf;
6026 fputs ("Multiplying by cached product ", dump_file);
6027 for (elt = j; elt < vec_len; elt++)
6029 rf = &repeat_factor_vec[elt];
6030 print_generic_expr (dump_file, rf->factor);
6031 if (elt < vec_len - 1)
6032 fputs (" * ", dump_file);
6034 fputs ("\n", dump_file);
6037 else
6039 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6040 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6041 build_int_cst (integer_type_node,
6042 power));
6043 gimple_call_set_lhs (pow_stmt, iter_result);
6044 gimple_set_location (pow_stmt, gimple_location (stmt));
6045 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6046 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6048 if (dump_file && (dump_flags & TDF_DETAILS))
6050 unsigned elt;
6051 repeat_factor *rf;
6052 fputs ("Building __builtin_pow call for cached product (",
6053 dump_file);
6054 for (elt = j; elt < vec_len; elt++)
6056 rf = &repeat_factor_vec[elt];
6057 print_generic_expr (dump_file, rf->factor);
6058 if (elt < vec_len - 1)
6059 fputs (" * ", dump_file);
6061 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6062 power);
6066 else
6068 /* Otherwise, find the first factor in the repeated factor
6069 vector whose occurrence count is at least 2. If no such
6070 factor exists, there are no builtin_powi opportunities
6071 remaining. */
6072 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6074 if (rf1->count >= 2)
6075 break;
6078 if (j >= vec_len)
6079 break;
6081 power = rf1->count;
6083 if (dump_file && (dump_flags & TDF_DETAILS))
6085 unsigned elt;
6086 repeat_factor *rf;
6087 fputs ("Building __builtin_pow call for (", dump_file);
6088 for (elt = j; elt < vec_len; elt++)
6090 rf = &repeat_factor_vec[elt];
6091 print_generic_expr (dump_file, rf->factor);
6092 if (elt < vec_len - 1)
6093 fputs (" * ", dump_file);
6095 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6098 reassociate_stats.pows_created++;
6100 /* Visit each element of the vector in reverse order (so that
6101 high-occurrence elements are visited first, and within the
6102 same occurrence count, lower-ranked elements are visited
6103 first). Form a linear product of all elements in this order
6104 whose occurrencce count is at least that of element J.
6105 Record the SSA name representing the product of each element
6106 with all subsequent elements in the vector. */
6107 if (j == vec_len - 1)
6108 rf1->repr = rf1->factor;
6109 else
6111 for (ii = vec_len - 2; ii >= (int)j; ii--)
6113 tree op1, op2;
6115 rf1 = &repeat_factor_vec[ii];
6116 rf2 = &repeat_factor_vec[ii + 1];
6118 /* Init the last factor's representative to be itself. */
6119 if (!rf2->repr)
6120 rf2->repr = rf2->factor;
6122 op1 = rf1->factor;
6123 op2 = rf2->repr;
6125 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6126 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6127 op1, op2);
6128 gimple_set_location (mul_stmt, gimple_location (stmt));
6129 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6130 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6131 rf1->repr = target_ssa;
6133 /* Don't reprocess the multiply we just introduced. */
6134 gimple_set_visited (mul_stmt, true);
6138 /* Form a call to __builtin_powi for the maximum product
6139 just formed, raised to the power obtained earlier. */
6140 rf1 = &repeat_factor_vec[j];
6141 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6142 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6143 build_int_cst (integer_type_node,
6144 power));
6145 gimple_call_set_lhs (pow_stmt, iter_result);
6146 gimple_set_location (pow_stmt, gimple_location (stmt));
6147 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6148 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6151 /* If we previously formed at least one other builtin_powi call,
6152 form the product of this one and those others. */
6153 if (result)
6155 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6156 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6157 result, iter_result);
6158 gimple_set_location (mul_stmt, gimple_location (stmt));
6159 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6160 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6161 gimple_set_visited (mul_stmt, true);
6162 result = new_result;
6164 else
6165 result = iter_result;
6167 /* Decrement the occurrence count of each element in the product
6168 by the count found above, and remove this many copies of each
6169 factor from OPS. */
6170 for (i = j; i < vec_len; i++)
6172 unsigned k = power;
6173 unsigned n;
6175 rf1 = &repeat_factor_vec[i];
6176 rf1->count -= power;
6178 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6180 if (oe->op == rf1->factor)
6182 if (oe->count <= k)
6184 ops->ordered_remove (n);
6185 k -= oe->count;
6187 if (k == 0)
6188 break;
6190 else
6192 oe->count -= k;
6193 break;
6200 /* At this point all elements in the repeated factor vector have a
6201 remaining occurrence count of 0 or 1, and those with a count of 1
6202 don't have cached representatives. Re-sort the ops vector and
6203 clean up. */
6204 ops->qsort (sort_by_operand_rank);
6205 repeat_factor_vec.release ();
6207 /* Return the final product computed herein. Note that there may
6208 still be some elements with single occurrence count left in OPS;
6209 those will be handled by the normal reassociation logic. */
6210 return result;
6213 /* Attempt to optimize
6214 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6215 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6217 static void
6218 attempt_builtin_copysign (vec<operand_entry *> *ops)
6220 operand_entry *oe;
6221 unsigned int i;
6222 unsigned int length = ops->length ();
6223 tree cst = ops->last ()->op;
6225 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6226 return;
6228 FOR_EACH_VEC_ELT (*ops, i, oe)
6230 if (TREE_CODE (oe->op) == SSA_NAME
6231 && has_single_use (oe->op))
6233 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6234 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6236 tree arg0, arg1;
6237 switch (gimple_call_combined_fn (old_call))
6239 CASE_CFN_COPYSIGN:
6240 CASE_CFN_COPYSIGN_FN:
6241 arg0 = gimple_call_arg (old_call, 0);
6242 arg1 = gimple_call_arg (old_call, 1);
6243 /* The first argument of copysign must be a constant,
6244 otherwise there's nothing to do. */
6245 if (TREE_CODE (arg0) == REAL_CST)
6247 tree type = TREE_TYPE (arg0);
6248 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6249 /* If we couldn't fold to a single constant, skip it.
6250 That happens e.g. for inexact multiplication when
6251 -frounding-math. */
6252 if (mul == NULL_TREE)
6253 break;
6254 /* Instead of adjusting OLD_CALL, let's build a new
6255 call to not leak the LHS and prevent keeping bogus
6256 debug statements. DCE will clean up the old call. */
6257 gcall *new_call;
6258 if (gimple_call_internal_p (old_call))
6259 new_call = gimple_build_call_internal
6260 (IFN_COPYSIGN, 2, mul, arg1);
6261 else
6262 new_call = gimple_build_call
6263 (gimple_call_fndecl (old_call), 2, mul, arg1);
6264 tree lhs = make_ssa_name (type);
6265 gimple_call_set_lhs (new_call, lhs);
6266 gimple_set_location (new_call,
6267 gimple_location (old_call));
6268 insert_stmt_after (new_call, old_call);
6269 /* We've used the constant, get rid of it. */
6270 ops->pop ();
6271 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6272 /* Handle the CST1 < 0 case by negating the result. */
6273 if (cst1_neg)
6275 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6276 gimple *negate_stmt
6277 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6278 insert_stmt_after (negate_stmt, new_call);
6279 oe->op = negrhs;
6281 else
6282 oe->op = lhs;
6283 if (dump_file && (dump_flags & TDF_DETAILS))
6285 fprintf (dump_file, "Optimizing copysign: ");
6286 print_generic_expr (dump_file, cst);
6287 fprintf (dump_file, " * COPYSIGN (");
6288 print_generic_expr (dump_file, arg0);
6289 fprintf (dump_file, ", ");
6290 print_generic_expr (dump_file, arg1);
6291 fprintf (dump_file, ") into %sCOPYSIGN (",
6292 cst1_neg ? "-" : "");
6293 print_generic_expr (dump_file, mul);
6294 fprintf (dump_file, ", ");
6295 print_generic_expr (dump_file, arg1);
6296 fprintf (dump_file, "\n");
6298 return;
6300 break;
6301 default:
6302 break;
6309 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6311 static void
6312 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6314 tree rhs1;
6316 if (dump_file && (dump_flags & TDF_DETAILS))
6318 fprintf (dump_file, "Transforming ");
6319 print_gimple_stmt (dump_file, stmt, 0);
6322 rhs1 = gimple_assign_rhs1 (stmt);
6323 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6324 update_stmt (stmt);
6325 remove_visited_stmt_chain (rhs1);
6327 if (dump_file && (dump_flags & TDF_DETAILS))
6329 fprintf (dump_file, " into ");
6330 print_gimple_stmt (dump_file, stmt, 0);
6334 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6336 static void
6337 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6338 tree rhs1, tree rhs2)
6340 if (dump_file && (dump_flags & TDF_DETAILS))
6342 fprintf (dump_file, "Transforming ");
6343 print_gimple_stmt (dump_file, stmt, 0);
6346 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6347 update_stmt (gsi_stmt (*gsi));
6348 remove_visited_stmt_chain (rhs1);
6350 if (dump_file && (dump_flags & TDF_DETAILS))
6352 fprintf (dump_file, " into ");
6353 print_gimple_stmt (dump_file, stmt, 0);
6357 /* Reassociate expressions in basic block BB and its post-dominator as
6358 children.
6360 Bubble up return status from maybe_optimize_range_tests. */
6362 static bool
6363 reassociate_bb (basic_block bb)
6365 gimple_stmt_iterator gsi;
6366 basic_block son;
6367 gimple *stmt = last_stmt (bb);
6368 bool cfg_cleanup_needed = false;
6370 if (stmt && !gimple_visited_p (stmt))
6371 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6373 bool do_prev = false;
6374 for (gsi = gsi_last_bb (bb);
6375 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6377 do_prev = true;
6378 stmt = gsi_stmt (gsi);
6380 if (is_gimple_assign (stmt)
6381 && !stmt_could_throw_p (cfun, stmt))
6383 tree lhs, rhs1, rhs2;
6384 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6386 /* If this was part of an already processed statement,
6387 we don't need to touch it again. */
6388 if (gimple_visited_p (stmt))
6390 /* This statement might have become dead because of previous
6391 reassociations. */
6392 if (has_zero_uses (gimple_get_lhs (stmt)))
6394 reassoc_remove_stmt (&gsi);
6395 release_defs (stmt);
6396 /* We might end up removing the last stmt above which
6397 places the iterator to the end of the sequence.
6398 Reset it to the last stmt in this case and make sure
6399 we don't do gsi_prev in that case. */
6400 if (gsi_end_p (gsi))
6402 gsi = gsi_last_bb (bb);
6403 do_prev = false;
6406 continue;
6409 /* If this is not a gimple binary expression, there is
6410 nothing for us to do with it. */
6411 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6412 continue;
6414 lhs = gimple_assign_lhs (stmt);
6415 rhs1 = gimple_assign_rhs1 (stmt);
6416 rhs2 = gimple_assign_rhs2 (stmt);
6418 /* For non-bit or min/max operations we can't associate
6419 all types. Verify that here. */
6420 if (rhs_code != BIT_IOR_EXPR
6421 && rhs_code != BIT_AND_EXPR
6422 && rhs_code != BIT_XOR_EXPR
6423 && rhs_code != MIN_EXPR
6424 && rhs_code != MAX_EXPR
6425 && (!can_reassociate_p (lhs)
6426 || !can_reassociate_p (rhs1)
6427 || !can_reassociate_p (rhs2)))
6428 continue;
6430 if (associative_tree_code (rhs_code))
6432 auto_vec<operand_entry *> ops;
6433 tree powi_result = NULL_TREE;
6434 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6436 /* There may be no immediate uses left by the time we
6437 get here because we may have eliminated them all. */
6438 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6439 continue;
6441 gimple_set_visited (stmt, true);
6442 linearize_expr_tree (&ops, stmt, true, true);
6443 ops.qsort (sort_by_operand_rank);
6444 int orig_len = ops.length ();
6445 optimize_ops_list (rhs_code, &ops);
6446 if (undistribute_ops_list (rhs_code, &ops,
6447 loop_containing_stmt (stmt)))
6449 ops.qsort (sort_by_operand_rank);
6450 optimize_ops_list (rhs_code, &ops);
6452 if (undistribute_bitref_for_vector (rhs_code, &ops,
6453 loop_containing_stmt (stmt)))
6455 ops.qsort (sort_by_operand_rank);
6456 optimize_ops_list (rhs_code, &ops);
6458 if (rhs_code == PLUS_EXPR
6459 && transform_add_to_multiply (&ops))
6460 ops.qsort (sort_by_operand_rank);
6462 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6464 if (is_vector)
6465 optimize_vec_cond_expr (rhs_code, &ops);
6466 else
6467 optimize_range_tests (rhs_code, &ops, NULL);
6470 if (rhs_code == MULT_EXPR && !is_vector)
6472 attempt_builtin_copysign (&ops);
6474 if (reassoc_insert_powi_p
6475 && flag_unsafe_math_optimizations)
6476 powi_result = attempt_builtin_powi (stmt, &ops);
6479 operand_entry *last;
6480 bool negate_result = false;
6481 if (ops.length () > 1
6482 && rhs_code == MULT_EXPR)
6484 last = ops.last ();
6485 if ((integer_minus_onep (last->op)
6486 || real_minus_onep (last->op))
6487 && !HONOR_SNANS (TREE_TYPE (lhs))
6488 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6489 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6491 ops.pop ();
6492 negate_result = true;
6496 tree new_lhs = lhs;
6497 /* If the operand vector is now empty, all operands were
6498 consumed by the __builtin_powi optimization. */
6499 if (ops.length () == 0)
6500 transform_stmt_to_copy (&gsi, stmt, powi_result);
6501 else if (ops.length () == 1)
6503 tree last_op = ops.last ()->op;
6505 /* If the stmt that defines operand has to be inserted, insert it
6506 before the use. */
6507 if (ops.last ()->stmt_to_insert)
6508 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6509 if (powi_result)
6510 transform_stmt_to_multiply (&gsi, stmt, last_op,
6511 powi_result);
6512 else
6513 transform_stmt_to_copy (&gsi, stmt, last_op);
6515 else
6517 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6518 int ops_num = ops.length ();
6519 int width;
6521 /* For binary bit operations, if there are at least 3
6522 operands and the last operand in OPS is a constant,
6523 move it to the front. This helps ensure that we generate
6524 (X & Y) & C rather than (X & C) & Y. The former will
6525 often match a canonical bit test when we get to RTL. */
6526 if (ops.length () > 2
6527 && (rhs_code == BIT_AND_EXPR
6528 || rhs_code == BIT_IOR_EXPR
6529 || rhs_code == BIT_XOR_EXPR)
6530 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6531 std::swap (*ops[0], *ops[ops_num - 1]);
6533 /* Only rewrite the expression tree to parallel in the
6534 last reassoc pass to avoid useless work back-and-forth
6535 with initial linearization. */
6536 if (!reassoc_insert_powi_p
6537 && ops.length () > 3
6538 && (width = get_reassociation_width (ops_num, rhs_code,
6539 mode)) > 1)
6541 if (dump_file && (dump_flags & TDF_DETAILS))
6542 fprintf (dump_file,
6543 "Width = %d was chosen for reassociation\n",
6544 width);
6545 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6546 width, ops);
6548 else
6550 /* When there are three operands left, we want
6551 to make sure the ones that get the double
6552 binary op are chosen wisely. */
6553 int len = ops.length ();
6554 if (len >= 3)
6555 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6557 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6558 powi_result != NULL
6559 || negate_result,
6560 len != orig_len);
6563 /* If we combined some repeated factors into a
6564 __builtin_powi call, multiply that result by the
6565 reassociated operands. */
6566 if (powi_result)
6568 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6569 tree type = TREE_TYPE (lhs);
6570 tree target_ssa = make_temp_ssa_name (type, NULL,
6571 "reassocpow");
6572 gimple_set_lhs (lhs_stmt, target_ssa);
6573 update_stmt (lhs_stmt);
6574 if (lhs != new_lhs)
6576 target_ssa = new_lhs;
6577 new_lhs = lhs;
6579 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6580 powi_result, target_ssa);
6581 gimple_set_location (mul_stmt, gimple_location (stmt));
6582 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6583 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6587 if (negate_result)
6589 stmt = SSA_NAME_DEF_STMT (lhs);
6590 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6591 gimple_set_lhs (stmt, tmp);
6592 if (lhs != new_lhs)
6593 tmp = new_lhs;
6594 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6595 tmp);
6596 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6597 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6598 update_stmt (stmt);
6603 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6604 son;
6605 son = next_dom_son (CDI_POST_DOMINATORS, son))
6606 cfg_cleanup_needed |= reassociate_bb (son);
6608 return cfg_cleanup_needed;
6611 /* Add jumps around shifts for range tests turned into bit tests.
6612 For each SSA_NAME VAR we have code like:
6613 VAR = ...; // final stmt of range comparison
6614 // bit test here...;
6615 OTHERVAR = ...; // final stmt of the bit test sequence
6616 RES = VAR | OTHERVAR;
6617 Turn the above into:
6618 VAR = ...;
6619 if (VAR != 0)
6620 goto <l3>;
6621 else
6622 goto <l2>;
6623 <l2>:
6624 // bit test here...;
6625 OTHERVAR = ...;
6626 <l3>:
6627 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6629 static void
6630 branch_fixup (void)
6632 tree var;
6633 unsigned int i;
6635 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6637 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6638 gimple *use_stmt;
6639 use_operand_p use;
6640 bool ok = single_imm_use (var, &use, &use_stmt);
6641 gcc_assert (ok
6642 && is_gimple_assign (use_stmt)
6643 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6644 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6646 basic_block cond_bb = gimple_bb (def_stmt);
6647 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6648 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6650 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6651 gimple *g = gimple_build_cond (NE_EXPR, var,
6652 build_zero_cst (TREE_TYPE (var)),
6653 NULL_TREE, NULL_TREE);
6654 location_t loc = gimple_location (use_stmt);
6655 gimple_set_location (g, loc);
6656 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6658 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6659 etrue->probability = profile_probability::even ();
6660 edge efalse = find_edge (cond_bb, then_bb);
6661 efalse->flags = EDGE_FALSE_VALUE;
6662 efalse->probability -= etrue->probability;
6663 then_bb->count -= etrue->count ();
6665 tree othervar = NULL_TREE;
6666 if (gimple_assign_rhs1 (use_stmt) == var)
6667 othervar = gimple_assign_rhs2 (use_stmt);
6668 else if (gimple_assign_rhs2 (use_stmt) == var)
6669 othervar = gimple_assign_rhs1 (use_stmt);
6670 else
6671 gcc_unreachable ();
6672 tree lhs = gimple_assign_lhs (use_stmt);
6673 gphi *phi = create_phi_node (lhs, merge_bb);
6674 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6675 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6676 gsi = gsi_for_stmt (use_stmt);
6677 gsi_remove (&gsi, true);
6679 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6680 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6682 reassoc_branch_fixups.release ();
6685 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6686 void debug_ops_vector (vec<operand_entry *> ops);
6688 /* Dump the operand entry vector OPS to FILE. */
6690 void
6691 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6693 operand_entry *oe;
6694 unsigned int i;
6696 FOR_EACH_VEC_ELT (ops, i, oe)
6698 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6699 print_generic_expr (file, oe->op);
6700 fprintf (file, "\n");
6704 /* Dump the operand entry vector OPS to STDERR. */
6706 DEBUG_FUNCTION void
6707 debug_ops_vector (vec<operand_entry *> ops)
6709 dump_ops_vector (stderr, ops);
6712 /* Bubble up return status from reassociate_bb. */
6714 static bool
6715 do_reassoc (void)
6717 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6718 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6721 /* Initialize the reassociation pass. */
6723 static void
6724 init_reassoc (void)
6726 int i;
6727 long rank = 2;
6728 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6730 /* Find the loops, so that we can prevent moving calculations in
6731 them. */
6732 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6734 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6736 next_operand_entry_id = 0;
6738 /* Reverse RPO (Reverse Post Order) will give us something where
6739 deeper loops come later. */
6740 pre_and_rev_post_order_compute (NULL, bbs, false);
6741 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
6742 operand_rank = new hash_map<tree, long>;
6744 /* Give each default definition a distinct rank. This includes
6745 parameters and the static chain. Walk backwards over all
6746 SSA names so that we get proper rank ordering according
6747 to tree_swap_operands_p. */
6748 for (i = num_ssa_names - 1; i > 0; --i)
6750 tree name = ssa_name (i);
6751 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6752 insert_operand_rank (name, ++rank);
6755 /* Set up rank for each BB */
6756 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6757 bb_rank[bbs[i]] = ++rank << 16;
6759 free (bbs);
6760 calculate_dominance_info (CDI_POST_DOMINATORS);
6761 plus_negates = vNULL;
6764 /* Cleanup after the reassociation pass, and print stats if
6765 requested. */
6767 static void
6768 fini_reassoc (void)
6770 statistics_counter_event (cfun, "Linearized",
6771 reassociate_stats.linearized);
6772 statistics_counter_event (cfun, "Constants eliminated",
6773 reassociate_stats.constants_eliminated);
6774 statistics_counter_event (cfun, "Ops eliminated",
6775 reassociate_stats.ops_eliminated);
6776 statistics_counter_event (cfun, "Statements rewritten",
6777 reassociate_stats.rewritten);
6778 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6779 reassociate_stats.pows_encountered);
6780 statistics_counter_event (cfun, "Built-in powi calls created",
6781 reassociate_stats.pows_created);
6783 delete operand_rank;
6784 operand_entry_pool.release ();
6785 free (bb_rank);
6786 plus_negates.release ();
6787 free_dominance_info (CDI_POST_DOMINATORS);
6788 loop_optimizer_finalize ();
6791 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6792 insertion of __builtin_powi calls.
6794 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6795 optimization of a gimple conditional. Otherwise returns zero. */
6797 static unsigned int
6798 execute_reassoc (bool insert_powi_p)
6800 reassoc_insert_powi_p = insert_powi_p;
6802 init_reassoc ();
6804 bool cfg_cleanup_needed;
6805 cfg_cleanup_needed = do_reassoc ();
6806 repropagate_negates ();
6807 branch_fixup ();
6809 fini_reassoc ();
6810 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6813 namespace {
6815 const pass_data pass_data_reassoc =
6817 GIMPLE_PASS, /* type */
6818 "reassoc", /* name */
6819 OPTGROUP_NONE, /* optinfo_flags */
6820 TV_TREE_REASSOC, /* tv_id */
6821 ( PROP_cfg | PROP_ssa ), /* properties_required */
6822 0, /* properties_provided */
6823 0, /* properties_destroyed */
6824 0, /* todo_flags_start */
6825 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6828 class pass_reassoc : public gimple_opt_pass
6830 public:
6831 pass_reassoc (gcc::context *ctxt)
6832 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6835 /* opt_pass methods: */
6836 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
6837 void set_pass_param (unsigned int n, bool param)
6839 gcc_assert (n == 0);
6840 insert_powi_p = param;
6842 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
6843 virtual unsigned int execute (function *)
6844 { return execute_reassoc (insert_powi_p); }
6846 private:
6847 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6848 point 3a in the pass header comment. */
6849 bool insert_powi_p;
6850 }; // class pass_reassoc
6852 } // anon namespace
6854 gimple_opt_pass *
6855 make_pass_reassoc (gcc::context *ctxt)
6857 return new pass_reassoc (ctxt);