Daily bump.
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob359ccaee68d6c66c91d7920d7add3afc2bac4e88
1 /* Reassociation for trees.
2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
59 It consists of five steps:
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_*
69 3. Optimization of the operand lists, eliminating things like a +
70 -a, a & a, etc.
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
79 5. Repropagate negates, as nothing else will clean it up ATM.
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
100 a (1), b (1), c (1), d (2), e (2)
103 We start with our merge worklist empty, and the ops list with all of
104 those on it.
106 You want to first merge all leaves of the same rank, as much as
107 possible.
109 So first build a binary op of
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
121 Then build a binary op of d + e
122 mergetmp2 = d + e
124 and put mergetmp2 on the merge worklist.
126 so merge worklist = {mergetmp, c, mergetmp2}
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
131 So we have
133 build binary op
134 mergetmp3 = mergetmp + c
136 worklist = {mergetmp2, mergetmp3}
138 mergetmp4 = mergetmp2 + mergetmp3
140 worklist = {mergetmp4}
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
151 So why don't we do this?
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
159 mergetmp = op1 + op2
160 newstmt = mergetmp + op3
162 instead of
163 mergetmp = op2 + op3
164 newstmt = mergetmp + op1
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
168 can do.
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
176 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
177 point 3a in the pass header comment. */
178 static bool reassoc_insert_powi_p;
180 /* Statistics */
181 static struct
183 int linearized;
184 int constants_eliminated;
185 int ops_eliminated;
186 int rewritten;
187 int pows_encountered;
188 int pows_created;
189 } reassociate_stats;
191 /* Operator, rank pair. */
192 struct operand_entry
194 unsigned int rank;
195 unsigned int id;
196 tree op;
197 unsigned int count;
198 gimple *stmt_to_insert;
201 static object_allocator<operand_entry> operand_entry_pool
202 ("operand entry pool");
204 /* This is used to assign a unique ID to each struct operand_entry
205 so that qsort results are identical on different hosts. */
206 static unsigned int next_operand_entry_id;
208 /* Starting rank number for a given basic block, so that we can rank
209 operations using unmovable instructions in that BB based on the bb
210 depth. */
211 static long *bb_rank;
213 /* Operand->rank hashtable. */
214 static hash_map<tree, long> *operand_rank;
216 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
217 all basic blocks the CFG should be adjusted - basic blocks
218 split right after that SSA_NAME's definition statement and before
219 the only use, which must be a bit ior. */
220 static vec<tree> reassoc_branch_fixups;
222 /* Forward decls. */
223 static long get_rank (tree);
224 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
226 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
227 possibly added by gsi_remove. */
229 bool
230 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
232 gimple *stmt = gsi_stmt (*gsi);
234 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
235 return gsi_remove (gsi, true);
237 gimple_stmt_iterator prev = *gsi;
238 gsi_prev (&prev);
239 unsigned uid = gimple_uid (stmt);
240 basic_block bb = gimple_bb (stmt);
241 bool ret = gsi_remove (gsi, true);
242 if (!gsi_end_p (prev))
243 gsi_next (&prev);
244 else
245 prev = gsi_start_bb (bb);
246 gimple *end_stmt = gsi_stmt (*gsi);
247 while ((stmt = gsi_stmt (prev)) != end_stmt)
249 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
250 gimple_set_uid (stmt, uid);
251 gsi_next (&prev);
253 return ret;
256 /* Bias amount for loop-carried phis. We want this to be larger than
257 the depth of any reassociation tree we can see, but not larger than
258 the rank difference between two blocks. */
259 #define PHI_LOOP_BIAS (1 << 15)
261 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
262 an innermost loop, and the phi has only a single use which is inside
263 the loop, then the rank is the block rank of the loop latch plus an
264 extra bias for the loop-carried dependence. This causes expressions
265 calculated into an accumulator variable to be independent for each
266 iteration of the loop. If STMT is some other phi, the rank is the
267 block rank of its containing block. */
268 static long
269 phi_rank (gimple *stmt)
271 basic_block bb = gimple_bb (stmt);
272 class loop *father = bb->loop_father;
273 tree res;
274 unsigned i;
275 use_operand_p use;
276 gimple *use_stmt;
278 /* We only care about real loops (those with a latch). */
279 if (!father->latch)
280 return bb_rank[bb->index];
282 /* Interesting phis must be in headers of innermost loops. */
283 if (bb != father->header
284 || father->inner)
285 return bb_rank[bb->index];
287 /* Ignore virtual SSA_NAMEs. */
288 res = gimple_phi_result (stmt);
289 if (virtual_operand_p (res))
290 return bb_rank[bb->index];
292 /* The phi definition must have a single use, and that use must be
293 within the loop. Otherwise this isn't an accumulator pattern. */
294 if (!single_imm_use (res, &use, &use_stmt)
295 || gimple_bb (use_stmt)->loop_father != father)
296 return bb_rank[bb->index];
298 /* Look for phi arguments from within the loop. If found, bias this phi. */
299 for (i = 0; i < gimple_phi_num_args (stmt); i++)
301 tree arg = gimple_phi_arg_def (stmt, i);
302 if (TREE_CODE (arg) == SSA_NAME
303 && !SSA_NAME_IS_DEFAULT_DEF (arg))
305 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
306 if (gimple_bb (def_stmt)->loop_father == father)
307 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
311 /* Must be an uninteresting phi. */
312 return bb_rank[bb->index];
315 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
316 loop-carried dependence of an innermost loop, return TRUE; else
317 return FALSE. */
318 static bool
319 loop_carried_phi (tree exp)
321 gimple *phi_stmt;
322 long block_rank;
324 if (TREE_CODE (exp) != SSA_NAME
325 || SSA_NAME_IS_DEFAULT_DEF (exp))
326 return false;
328 phi_stmt = SSA_NAME_DEF_STMT (exp);
330 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
331 return false;
333 /* Non-loop-carried phis have block rank. Loop-carried phis have
334 an additional bias added in. If this phi doesn't have block rank,
335 it's biased and should not be propagated. */
336 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
338 if (phi_rank (phi_stmt) != block_rank)
339 return true;
341 return false;
344 /* Return the maximum of RANK and the rank that should be propagated
345 from expression OP. For most operands, this is just the rank of OP.
346 For loop-carried phis, the value is zero to avoid undoing the bias
347 in favor of the phi. */
348 static long
349 propagate_rank (long rank, tree op)
351 long op_rank;
353 if (loop_carried_phi (op))
354 return rank;
356 op_rank = get_rank (op);
358 return MAX (rank, op_rank);
361 /* Look up the operand rank structure for expression E. */
363 static inline long
364 find_operand_rank (tree e)
366 long *slot = operand_rank->get (e);
367 return slot ? *slot : -1;
370 /* Insert {E,RANK} into the operand rank hashtable. */
372 static inline void
373 insert_operand_rank (tree e, long rank)
375 gcc_assert (rank > 0);
376 gcc_assert (!operand_rank->put (e, rank));
379 /* Given an expression E, return the rank of the expression. */
381 static long
382 get_rank (tree e)
384 /* SSA_NAME's have the rank of the expression they are the result
386 For globals and uninitialized values, the rank is 0.
387 For function arguments, use the pre-setup rank.
388 For PHI nodes, stores, asm statements, etc, we use the rank of
389 the BB.
390 For simple operations, the rank is the maximum rank of any of
391 its operands, or the bb_rank, whichever is less.
392 I make no claims that this is optimal, however, it gives good
393 results. */
395 /* We make an exception to the normal ranking system to break
396 dependences of accumulator variables in loops. Suppose we
397 have a simple one-block loop containing:
399 x_1 = phi(x_0, x_2)
400 b = a + x_1
401 c = b + d
402 x_2 = c + e
404 As shown, each iteration of the calculation into x is fully
405 dependent upon the iteration before it. We would prefer to
406 see this in the form:
408 x_1 = phi(x_0, x_2)
409 b = a + d
410 c = b + e
411 x_2 = c + x_1
413 If the loop is unrolled, the calculations of b and c from
414 different iterations can be interleaved.
416 To obtain this result during reassociation, we bias the rank
417 of the phi definition x_1 upward, when it is recognized as an
418 accumulator pattern. The artificial rank causes it to be
419 added last, providing the desired independence. */
421 if (TREE_CODE (e) == SSA_NAME)
423 ssa_op_iter iter;
424 gimple *stmt;
425 long rank;
426 tree op;
428 if (SSA_NAME_IS_DEFAULT_DEF (e))
429 return find_operand_rank (e);
431 stmt = SSA_NAME_DEF_STMT (e);
432 if (gimple_code (stmt) == GIMPLE_PHI)
433 return phi_rank (stmt);
435 if (!is_gimple_assign (stmt))
436 return bb_rank[gimple_bb (stmt)->index];
438 /* If we already have a rank for this expression, use that. */
439 rank = find_operand_rank (e);
440 if (rank != -1)
441 return rank;
443 /* Otherwise, find the maximum rank for the operands. As an
444 exception, remove the bias from loop-carried phis when propagating
445 the rank so that dependent operations are not also biased. */
446 /* Simply walk over all SSA uses - this takes advatage of the
447 fact that non-SSA operands are is_gimple_min_invariant and
448 thus have rank 0. */
449 rank = 0;
450 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
451 rank = propagate_rank (rank, op);
453 if (dump_file && (dump_flags & TDF_DETAILS))
455 fprintf (dump_file, "Rank for ");
456 print_generic_expr (dump_file, e);
457 fprintf (dump_file, " is %ld\n", (rank + 1));
460 /* Note the rank in the hashtable so we don't recompute it. */
461 insert_operand_rank (e, (rank + 1));
462 return (rank + 1);
465 /* Constants, globals, etc., are rank 0 */
466 return 0;
470 /* We want integer ones to end up last no matter what, since they are
471 the ones we can do the most with. */
472 #define INTEGER_CONST_TYPE 1 << 4
473 #define FLOAT_ONE_CONST_TYPE 1 << 3
474 #define FLOAT_CONST_TYPE 1 << 2
475 #define OTHER_CONST_TYPE 1 << 1
477 /* Classify an invariant tree into integer, float, or other, so that
478 we can sort them to be near other constants of the same type. */
479 static inline int
480 constant_type (tree t)
482 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
483 return INTEGER_CONST_TYPE;
484 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
486 /* Sort -1.0 and 1.0 constants last, while in some cases
487 const_binop can't optimize some inexact operations, multiplication
488 by -1.0 or 1.0 can be always merged with others. */
489 if (real_onep (t) || real_minus_onep (t))
490 return FLOAT_ONE_CONST_TYPE;
491 return FLOAT_CONST_TYPE;
493 else
494 return OTHER_CONST_TYPE;
497 /* qsort comparison function to sort operand entries PA and PB by rank
498 so that the sorted array is ordered by rank in decreasing order. */
499 static int
500 sort_by_operand_rank (const void *pa, const void *pb)
502 const operand_entry *oea = *(const operand_entry *const *)pa;
503 const operand_entry *oeb = *(const operand_entry *const *)pb;
505 if (oeb->rank != oea->rank)
506 return oeb->rank > oea->rank ? 1 : -1;
508 /* It's nicer for optimize_expression if constants that are likely
509 to fold when added/multiplied/whatever are put next to each
510 other. Since all constants have rank 0, order them by type. */
511 if (oea->rank == 0)
513 if (constant_type (oeb->op) != constant_type (oea->op))
514 return constant_type (oea->op) - constant_type (oeb->op);
515 else
516 /* To make sorting result stable, we use unique IDs to determine
517 order. */
518 return oeb->id > oea->id ? 1 : -1;
521 if (TREE_CODE (oea->op) != SSA_NAME)
523 if (TREE_CODE (oeb->op) != SSA_NAME)
524 return oeb->id > oea->id ? 1 : -1;
525 else
526 return 1;
528 else if (TREE_CODE (oeb->op) != SSA_NAME)
529 return -1;
531 /* Lastly, make sure the versions that are the same go next to each
532 other. */
533 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
535 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
536 versions of removed SSA_NAMEs, so if possible, prefer to sort
537 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
538 See PR60418. */
539 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
540 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
541 basic_block bba = gimple_bb (stmta);
542 basic_block bbb = gimple_bb (stmtb);
543 if (bbb != bba)
545 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
546 but the other might not. */
547 if (!bba)
548 return 1;
549 if (!bbb)
550 return -1;
551 /* If neither is, compare bb_rank. */
552 if (bb_rank[bbb->index] != bb_rank[bba->index])
553 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
556 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
557 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
558 if (da != db)
559 return da ? 1 : -1;
561 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
564 return oeb->id > oea->id ? 1 : -1;
567 /* Add an operand entry to *OPS for the tree operand OP. */
569 static void
570 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
572 operand_entry *oe = operand_entry_pool.allocate ();
574 oe->op = op;
575 oe->rank = get_rank (op);
576 oe->id = next_operand_entry_id++;
577 oe->count = 1;
578 oe->stmt_to_insert = stmt_to_insert;
579 ops->safe_push (oe);
582 /* Add an operand entry to *OPS for the tree operand OP with repeat
583 count REPEAT. */
585 static void
586 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
587 HOST_WIDE_INT repeat)
589 operand_entry *oe = operand_entry_pool.allocate ();
591 oe->op = op;
592 oe->rank = get_rank (op);
593 oe->id = next_operand_entry_id++;
594 oe->count = repeat;
595 oe->stmt_to_insert = NULL;
596 ops->safe_push (oe);
598 reassociate_stats.pows_encountered++;
601 /* Return true if STMT is reassociable operation containing a binary
602 operation with tree code CODE, and is inside LOOP. */
604 static bool
605 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
607 basic_block bb = gimple_bb (stmt);
609 if (gimple_bb (stmt) == NULL)
610 return false;
612 if (!flow_bb_inside_loop_p (loop, bb))
613 return false;
615 if (is_gimple_assign (stmt)
616 && gimple_assign_rhs_code (stmt) == code
617 && has_single_use (gimple_assign_lhs (stmt)))
619 tree rhs1 = gimple_assign_rhs1 (stmt);
620 tree rhs2 = gimple_assign_rhs2 (stmt);
621 if (TREE_CODE (rhs1) == SSA_NAME
622 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
623 return false;
624 if (rhs2
625 && TREE_CODE (rhs2) == SSA_NAME
626 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
627 return false;
628 return true;
631 return false;
635 /* Return true if STMT is a nop-conversion. */
637 static bool
638 gimple_nop_conversion_p (gimple *stmt)
640 if (gassign *ass = dyn_cast <gassign *> (stmt))
642 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
643 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
644 TREE_TYPE (gimple_assign_rhs1 (ass))))
645 return true;
647 return false;
650 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
651 operand of the negate operation. Otherwise, return NULL. */
653 static tree
654 get_unary_op (tree name, enum tree_code opcode)
656 gimple *stmt = SSA_NAME_DEF_STMT (name);
658 /* Look through nop conversions (sign changes). */
659 if (gimple_nop_conversion_p (stmt)
660 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
661 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
663 if (!is_gimple_assign (stmt))
664 return NULL_TREE;
666 if (gimple_assign_rhs_code (stmt) == opcode)
667 return gimple_assign_rhs1 (stmt);
668 return NULL_TREE;
671 /* Return true if OP1 and OP2 have the same value if casted to either type. */
673 static bool
674 ops_equal_values_p (tree op1, tree op2)
676 if (op1 == op2)
677 return true;
679 tree orig_op1 = op1;
680 if (TREE_CODE (op1) == SSA_NAME)
682 gimple *stmt = SSA_NAME_DEF_STMT (op1);
683 if (gimple_nop_conversion_p (stmt))
685 op1 = gimple_assign_rhs1 (stmt);
686 if (op1 == op2)
687 return true;
691 if (TREE_CODE (op2) == SSA_NAME)
693 gimple *stmt = SSA_NAME_DEF_STMT (op2);
694 if (gimple_nop_conversion_p (stmt))
696 op2 = gimple_assign_rhs1 (stmt);
697 if (op1 == op2
698 || orig_op1 == op2)
699 return true;
703 return false;
707 /* If CURR and LAST are a pair of ops that OPCODE allows us to
708 eliminate through equivalences, do so, remove them from OPS, and
709 return true. Otherwise, return false. */
711 static bool
712 eliminate_duplicate_pair (enum tree_code opcode,
713 vec<operand_entry *> *ops,
714 bool *all_done,
715 unsigned int i,
716 operand_entry *curr,
717 operand_entry *last)
720 /* If we have two of the same op, and the opcode is & |, min, or max,
721 we can eliminate one of them.
722 If we have two of the same op, and the opcode is ^, we can
723 eliminate both of them. */
725 if (last && last->op == curr->op)
727 switch (opcode)
729 case MAX_EXPR:
730 case MIN_EXPR:
731 case BIT_IOR_EXPR:
732 case BIT_AND_EXPR:
733 if (dump_file && (dump_flags & TDF_DETAILS))
735 fprintf (dump_file, "Equivalence: ");
736 print_generic_expr (dump_file, curr->op);
737 fprintf (dump_file, " [&|minmax] ");
738 print_generic_expr (dump_file, last->op);
739 fprintf (dump_file, " -> ");
740 print_generic_stmt (dump_file, last->op);
743 ops->ordered_remove (i);
744 reassociate_stats.ops_eliminated ++;
746 return true;
748 case BIT_XOR_EXPR:
749 if (dump_file && (dump_flags & TDF_DETAILS))
751 fprintf (dump_file, "Equivalence: ");
752 print_generic_expr (dump_file, curr->op);
753 fprintf (dump_file, " ^ ");
754 print_generic_expr (dump_file, last->op);
755 fprintf (dump_file, " -> nothing\n");
758 reassociate_stats.ops_eliminated += 2;
760 if (ops->length () == 2)
762 ops->truncate (0);
763 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
764 *all_done = true;
766 else
768 ops->ordered_remove (i-1);
769 ops->ordered_remove (i-1);
772 return true;
774 default:
775 break;
778 return false;
781 static vec<tree> plus_negates;
783 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
784 expression, look in OPS for a corresponding positive operation to cancel
785 it out. If we find one, remove the other from OPS, replace
786 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
787 return false. */
789 static bool
790 eliminate_plus_minus_pair (enum tree_code opcode,
791 vec<operand_entry *> *ops,
792 unsigned int currindex,
793 operand_entry *curr)
795 tree negateop;
796 tree notop;
797 unsigned int i;
798 operand_entry *oe;
800 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
801 return false;
803 negateop = get_unary_op (curr->op, NEGATE_EXPR);
804 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
805 if (negateop == NULL_TREE && notop == NULL_TREE)
806 return false;
808 /* Any non-negated version will have a rank that is one less than
809 the current rank. So once we hit those ranks, if we don't find
810 one, we can stop. */
812 for (i = currindex + 1;
813 ops->iterate (i, &oe)
814 && oe->rank >= curr->rank - 1 ;
815 i++)
817 if (negateop
818 && ops_equal_values_p (oe->op, negateop))
820 if (dump_file && (dump_flags & TDF_DETAILS))
822 fprintf (dump_file, "Equivalence: ");
823 print_generic_expr (dump_file, negateop);
824 fprintf (dump_file, " + -");
825 print_generic_expr (dump_file, oe->op);
826 fprintf (dump_file, " -> 0\n");
829 ops->ordered_remove (i);
830 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
831 ops->ordered_remove (currindex);
832 reassociate_stats.ops_eliminated ++;
834 return true;
836 else if (notop
837 && ops_equal_values_p (oe->op, notop))
839 tree op_type = TREE_TYPE (oe->op);
841 if (dump_file && (dump_flags & TDF_DETAILS))
843 fprintf (dump_file, "Equivalence: ");
844 print_generic_expr (dump_file, notop);
845 fprintf (dump_file, " + ~");
846 print_generic_expr (dump_file, oe->op);
847 fprintf (dump_file, " -> -1\n");
850 ops->ordered_remove (i);
851 add_to_ops_vec (ops, build_all_ones_cst (op_type));
852 ops->ordered_remove (currindex);
853 reassociate_stats.ops_eliminated ++;
855 return true;
859 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
860 save it for later inspection in repropagate_negates(). */
861 if (negateop != NULL_TREE
862 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
863 plus_negates.safe_push (curr->op);
865 return false;
868 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
869 bitwise not expression, look in OPS for a corresponding operand to
870 cancel it out. If we find one, remove the other from OPS, replace
871 OPS[CURRINDEX] with 0, and return true. Otherwise, return
872 false. */
874 static bool
875 eliminate_not_pairs (enum tree_code opcode,
876 vec<operand_entry *> *ops,
877 unsigned int currindex,
878 operand_entry *curr)
880 tree notop;
881 unsigned int i;
882 operand_entry *oe;
884 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
885 || TREE_CODE (curr->op) != SSA_NAME)
886 return false;
888 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
889 if (notop == NULL_TREE)
890 return false;
892 /* Any non-not version will have a rank that is one less than
893 the current rank. So once we hit those ranks, if we don't find
894 one, we can stop. */
896 for (i = currindex + 1;
897 ops->iterate (i, &oe)
898 && oe->rank >= curr->rank - 1;
899 i++)
901 if (oe->op == notop)
903 if (dump_file && (dump_flags & TDF_DETAILS))
905 fprintf (dump_file, "Equivalence: ");
906 print_generic_expr (dump_file, notop);
907 if (opcode == BIT_AND_EXPR)
908 fprintf (dump_file, " & ~");
909 else if (opcode == BIT_IOR_EXPR)
910 fprintf (dump_file, " | ~");
911 print_generic_expr (dump_file, oe->op);
912 if (opcode == BIT_AND_EXPR)
913 fprintf (dump_file, " -> 0\n");
914 else if (opcode == BIT_IOR_EXPR)
915 fprintf (dump_file, " -> -1\n");
918 if (opcode == BIT_AND_EXPR)
919 oe->op = build_zero_cst (TREE_TYPE (oe->op));
920 else if (opcode == BIT_IOR_EXPR)
921 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
923 reassociate_stats.ops_eliminated += ops->length () - 1;
924 ops->truncate (0);
925 ops->quick_push (oe);
926 return true;
930 return false;
933 /* Use constant value that may be present in OPS to try to eliminate
934 operands. Note that this function is only really used when we've
935 eliminated ops for other reasons, or merged constants. Across
936 single statements, fold already does all of this, plus more. There
937 is little point in duplicating logic, so I've only included the
938 identities that I could ever construct testcases to trigger. */
940 static void
941 eliminate_using_constants (enum tree_code opcode,
942 vec<operand_entry *> *ops)
944 operand_entry *oelast = ops->last ();
945 tree type = TREE_TYPE (oelast->op);
947 if (oelast->rank == 0
948 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
950 switch (opcode)
952 case BIT_AND_EXPR:
953 if (integer_zerop (oelast->op))
955 if (ops->length () != 1)
957 if (dump_file && (dump_flags & TDF_DETAILS))
958 fprintf (dump_file, "Found & 0, removing all other ops\n");
960 reassociate_stats.ops_eliminated += ops->length () - 1;
962 ops->truncate (0);
963 ops->quick_push (oelast);
964 return;
967 else if (integer_all_onesp (oelast->op))
969 if (ops->length () != 1)
971 if (dump_file && (dump_flags & TDF_DETAILS))
972 fprintf (dump_file, "Found & -1, removing\n");
973 ops->pop ();
974 reassociate_stats.ops_eliminated++;
977 break;
978 case BIT_IOR_EXPR:
979 if (integer_all_onesp (oelast->op))
981 if (ops->length () != 1)
983 if (dump_file && (dump_flags & TDF_DETAILS))
984 fprintf (dump_file, "Found | -1, removing all other ops\n");
986 reassociate_stats.ops_eliminated += ops->length () - 1;
988 ops->truncate (0);
989 ops->quick_push (oelast);
990 return;
993 else if (integer_zerop (oelast->op))
995 if (ops->length () != 1)
997 if (dump_file && (dump_flags & TDF_DETAILS))
998 fprintf (dump_file, "Found | 0, removing\n");
999 ops->pop ();
1000 reassociate_stats.ops_eliminated++;
1003 break;
1004 case MULT_EXPR:
1005 if (integer_zerop (oelast->op)
1006 || (FLOAT_TYPE_P (type)
1007 && !HONOR_NANS (type)
1008 && !HONOR_SIGNED_ZEROS (type)
1009 && real_zerop (oelast->op)))
1011 if (ops->length () != 1)
1013 if (dump_file && (dump_flags & TDF_DETAILS))
1014 fprintf (dump_file, "Found * 0, removing all other ops\n");
1016 reassociate_stats.ops_eliminated += ops->length () - 1;
1017 ops->truncate (0);
1018 ops->quick_push (oelast);
1019 return;
1022 else if (integer_onep (oelast->op)
1023 || (FLOAT_TYPE_P (type)
1024 && !HONOR_SNANS (type)
1025 && real_onep (oelast->op)))
1027 if (ops->length () != 1)
1029 if (dump_file && (dump_flags & TDF_DETAILS))
1030 fprintf (dump_file, "Found * 1, removing\n");
1031 ops->pop ();
1032 reassociate_stats.ops_eliminated++;
1033 return;
1036 break;
1037 case BIT_XOR_EXPR:
1038 case PLUS_EXPR:
1039 case MINUS_EXPR:
1040 if (integer_zerop (oelast->op)
1041 || (FLOAT_TYPE_P (type)
1042 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1043 && fold_real_zero_addition_p (type, oelast->op,
1044 opcode == MINUS_EXPR)))
1046 if (ops->length () != 1)
1048 if (dump_file && (dump_flags & TDF_DETAILS))
1049 fprintf (dump_file, "Found [|^+] 0, removing\n");
1050 ops->pop ();
1051 reassociate_stats.ops_eliminated++;
1052 return;
1055 break;
1056 default:
1057 break;
1063 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1064 bool, bool);
1066 /* Structure for tracking and counting operands. */
1067 struct oecount {
1068 unsigned int cnt;
1069 unsigned int id;
1070 enum tree_code oecode;
1071 tree op;
1075 /* The heap for the oecount hashtable and the sorted list of operands. */
1076 static vec<oecount> cvec;
1079 /* Oecount hashtable helpers. */
1081 struct oecount_hasher : int_hash <int, 0, 1>
1083 static inline hashval_t hash (int);
1084 static inline bool equal (int, int);
1087 /* Hash function for oecount. */
1089 inline hashval_t
1090 oecount_hasher::hash (int p)
1092 const oecount *c = &cvec[p - 42];
1093 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1096 /* Comparison function for oecount. */
1098 inline bool
1099 oecount_hasher::equal (int p1, int p2)
1101 const oecount *c1 = &cvec[p1 - 42];
1102 const oecount *c2 = &cvec[p2 - 42];
1103 return c1->oecode == c2->oecode && c1->op == c2->op;
1106 /* Comparison function for qsort sorting oecount elements by count. */
1108 static int
1109 oecount_cmp (const void *p1, const void *p2)
1111 const oecount *c1 = (const oecount *)p1;
1112 const oecount *c2 = (const oecount *)p2;
1113 if (c1->cnt != c2->cnt)
1114 return c1->cnt > c2->cnt ? 1 : -1;
1115 else
1116 /* If counts are identical, use unique IDs to stabilize qsort. */
1117 return c1->id > c2->id ? 1 : -1;
1120 /* Return TRUE iff STMT represents a builtin call that raises OP
1121 to some exponent. */
1123 static bool
1124 stmt_is_power_of_op (gimple *stmt, tree op)
1126 if (!is_gimple_call (stmt))
1127 return false;
1129 switch (gimple_call_combined_fn (stmt))
1131 CASE_CFN_POW:
1132 CASE_CFN_POWI:
1133 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1135 default:
1136 return false;
1140 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1141 in place and return the result. Assumes that stmt_is_power_of_op
1142 was previously called for STMT and returned TRUE. */
1144 static HOST_WIDE_INT
1145 decrement_power (gimple *stmt)
1147 REAL_VALUE_TYPE c, cint;
1148 HOST_WIDE_INT power;
1149 tree arg1;
1151 switch (gimple_call_combined_fn (stmt))
1153 CASE_CFN_POW:
1154 arg1 = gimple_call_arg (stmt, 1);
1155 c = TREE_REAL_CST (arg1);
1156 power = real_to_integer (&c) - 1;
1157 real_from_integer (&cint, VOIDmode, power, SIGNED);
1158 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1159 return power;
1161 CASE_CFN_POWI:
1162 arg1 = gimple_call_arg (stmt, 1);
1163 power = TREE_INT_CST_LOW (arg1) - 1;
1164 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1165 return power;
1167 default:
1168 gcc_unreachable ();
1172 /* Replace SSA defined by STMT and replace all its uses with new
1173 SSA. Also return the new SSA. */
1175 static tree
1176 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1178 gimple *use_stmt;
1179 use_operand_p use;
1180 imm_use_iterator iter;
1181 tree new_lhs, new_debug_lhs = NULL_TREE;
1182 tree lhs = gimple_get_lhs (stmt);
1184 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1185 gimple_set_lhs (stmt, new_lhs);
1187 /* Also need to update GIMPLE_DEBUGs. */
1188 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1190 tree repl = new_lhs;
1191 if (is_gimple_debug (use_stmt))
1193 if (new_debug_lhs == NULL_TREE)
1195 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1196 gdebug *def_temp
1197 = gimple_build_debug_bind (new_debug_lhs,
1198 build2 (opcode, TREE_TYPE (lhs),
1199 new_lhs, op),
1200 stmt);
1201 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1202 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1203 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1204 gimple_set_uid (def_temp, gimple_uid (stmt));
1205 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1206 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1208 repl = new_debug_lhs;
1210 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1211 SET_USE (use, repl);
1212 update_stmt (use_stmt);
1214 return new_lhs;
1217 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1218 uses with new SSAs. Also do this for the stmt that defines DEF
1219 if *DEF is not OP. */
1221 static void
1222 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1223 vec<gimple *> &stmts_to_fix)
1225 unsigned i;
1226 gimple *stmt;
1228 if (*def != op
1229 && TREE_CODE (*def) == SSA_NAME
1230 && (stmt = SSA_NAME_DEF_STMT (*def))
1231 && gimple_code (stmt) != GIMPLE_NOP)
1232 *def = make_new_ssa_for_def (stmt, opcode, op);
1234 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1235 make_new_ssa_for_def (stmt, opcode, op);
1238 /* Find the single immediate use of STMT's LHS, and replace it
1239 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1240 replace *DEF with OP as well. */
1242 static void
1243 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1245 tree lhs;
1246 gimple *use_stmt;
1247 use_operand_p use;
1248 gimple_stmt_iterator gsi;
1250 if (is_gimple_call (stmt))
1251 lhs = gimple_call_lhs (stmt);
1252 else
1253 lhs = gimple_assign_lhs (stmt);
1255 gcc_assert (has_single_use (lhs));
1256 single_imm_use (lhs, &use, &use_stmt);
1257 if (lhs == *def)
1258 *def = op;
1259 SET_USE (use, op);
1260 if (TREE_CODE (op) != SSA_NAME)
1261 update_stmt (use_stmt);
1262 gsi = gsi_for_stmt (stmt);
1263 unlink_stmt_vdef (stmt);
1264 reassoc_remove_stmt (&gsi);
1265 release_defs (stmt);
1268 /* Walks the linear chain with result *DEF searching for an operation
1269 with operand OP and code OPCODE removing that from the chain. *DEF
1270 is updated if there is only one operand but no operation left. */
1272 static void
1273 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1275 tree orig_def = *def;
1276 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1277 /* PR72835 - Record the stmt chain that has to be updated such that
1278 we dont use the same LHS when the values computed are different. */
1279 auto_vec<gimple *, 64> stmts_to_fix;
1283 tree name;
1285 if (opcode == MULT_EXPR)
1287 if (stmt_is_power_of_op (stmt, op))
1289 if (decrement_power (stmt) == 1)
1291 if (stmts_to_fix.length () > 0)
1292 stmts_to_fix.pop ();
1293 propagate_op_to_single_use (op, stmt, def);
1295 break;
1297 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1299 if (gimple_assign_rhs1 (stmt) == op)
1301 tree cst = build_minus_one_cst (TREE_TYPE (op));
1302 if (stmts_to_fix.length () > 0)
1303 stmts_to_fix.pop ();
1304 propagate_op_to_single_use (cst, stmt, def);
1305 break;
1307 else if (integer_minus_onep (op)
1308 || real_minus_onep (op))
1310 gimple_assign_set_rhs_code
1311 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1312 break;
1317 name = gimple_assign_rhs1 (stmt);
1319 /* If this is the operation we look for and one of the operands
1320 is ours simply propagate the other operand into the stmts
1321 single use. */
1322 if (gimple_assign_rhs_code (stmt) == opcode
1323 && (name == op
1324 || gimple_assign_rhs2 (stmt) == op))
1326 if (name == op)
1327 name = gimple_assign_rhs2 (stmt);
1328 if (stmts_to_fix.length () > 0)
1329 stmts_to_fix.pop ();
1330 propagate_op_to_single_use (name, stmt, def);
1331 break;
1334 /* We might have a multiply of two __builtin_pow* calls, and
1335 the operand might be hiding in the rightmost one. Likewise
1336 this can happen for a negate. */
1337 if (opcode == MULT_EXPR
1338 && gimple_assign_rhs_code (stmt) == opcode
1339 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1340 && has_single_use (gimple_assign_rhs2 (stmt)))
1342 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1343 if (stmt_is_power_of_op (stmt2, op))
1345 if (decrement_power (stmt2) == 1)
1346 propagate_op_to_single_use (op, stmt2, def);
1347 else
1348 stmts_to_fix.safe_push (stmt2);
1349 break;
1351 else if (is_gimple_assign (stmt2)
1352 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1354 if (gimple_assign_rhs1 (stmt2) == op)
1356 tree cst = build_minus_one_cst (TREE_TYPE (op));
1357 propagate_op_to_single_use (cst, stmt2, def);
1358 break;
1360 else if (integer_minus_onep (op)
1361 || real_minus_onep (op))
1363 stmts_to_fix.safe_push (stmt2);
1364 gimple_assign_set_rhs_code
1365 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1366 break;
1371 /* Continue walking the chain. */
1372 gcc_assert (name != op
1373 && TREE_CODE (name) == SSA_NAME);
1374 stmt = SSA_NAME_DEF_STMT (name);
1375 stmts_to_fix.safe_push (stmt);
1377 while (1);
1379 if (stmts_to_fix.length () > 0 || *def == orig_def)
1380 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1383 /* Returns true if statement S1 dominates statement S2. Like
1384 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1386 static bool
1387 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1389 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1391 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1392 SSA_NAME. Assume it lives at the beginning of function and
1393 thus dominates everything. */
1394 if (!bb1 || s1 == s2)
1395 return true;
1397 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1398 if (!bb2)
1399 return false;
1401 if (bb1 == bb2)
1403 /* PHIs in the same basic block are assumed to be
1404 executed all in parallel, if only one stmt is a PHI,
1405 it dominates the other stmt in the same basic block. */
1406 if (gimple_code (s1) == GIMPLE_PHI)
1407 return true;
1409 if (gimple_code (s2) == GIMPLE_PHI)
1410 return false;
1412 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1414 if (gimple_uid (s1) < gimple_uid (s2))
1415 return true;
1417 if (gimple_uid (s1) > gimple_uid (s2))
1418 return false;
1420 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1421 unsigned int uid = gimple_uid (s1);
1422 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1424 gimple *s = gsi_stmt (gsi);
1425 if (gimple_uid (s) != uid)
1426 break;
1427 if (s == s2)
1428 return true;
1431 return false;
1434 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1437 /* Insert STMT after INSERT_POINT. */
1439 static void
1440 insert_stmt_after (gimple *stmt, gimple *insert_point)
1442 gimple_stmt_iterator gsi;
1443 basic_block bb;
1445 if (gimple_code (insert_point) == GIMPLE_PHI)
1446 bb = gimple_bb (insert_point);
1447 else if (!stmt_ends_bb_p (insert_point))
1449 gsi = gsi_for_stmt (insert_point);
1450 gimple_set_uid (stmt, gimple_uid (insert_point));
1451 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1452 return;
1454 else
1455 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1456 thus if it must end a basic block, it should be a call that can
1457 throw, or some assignment that can throw. If it throws, the LHS
1458 of it will not be initialized though, so only valid places using
1459 the SSA_NAME should be dominated by the fallthru edge. */
1460 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1461 gsi = gsi_after_labels (bb);
1462 if (gsi_end_p (gsi))
1464 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1465 gimple_set_uid (stmt,
1466 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1468 else
1469 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1470 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1473 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1474 the result. Places the statement after the definition of either
1475 OP1 or OP2. Returns the new statement. */
1477 static gimple *
1478 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1480 gimple *op1def = NULL, *op2def = NULL;
1481 gimple_stmt_iterator gsi;
1482 tree op;
1483 gassign *sum;
1485 /* Create the addition statement. */
1486 op = make_ssa_name (type);
1487 sum = gimple_build_assign (op, opcode, op1, op2);
1489 /* Find an insertion place and insert. */
1490 if (TREE_CODE (op1) == SSA_NAME)
1491 op1def = SSA_NAME_DEF_STMT (op1);
1492 if (TREE_CODE (op2) == SSA_NAME)
1493 op2def = SSA_NAME_DEF_STMT (op2);
1494 if ((!op1def || gimple_nop_p (op1def))
1495 && (!op2def || gimple_nop_p (op2def)))
1497 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1498 if (gsi_end_p (gsi))
1500 gimple_stmt_iterator gsi2
1501 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1502 gimple_set_uid (sum,
1503 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1505 else
1506 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1507 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1509 else
1511 gimple *insert_point;
1512 if ((!op1def || gimple_nop_p (op1def))
1513 || (op2def && !gimple_nop_p (op2def)
1514 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1515 insert_point = op2def;
1516 else
1517 insert_point = op1def;
1518 insert_stmt_after (sum, insert_point);
1520 update_stmt (sum);
1522 return sum;
1525 /* Perform un-distribution of divisions and multiplications.
1526 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1527 to (A + B) / X for real X.
1529 The algorithm is organized as follows.
1531 - First we walk the addition chain *OPS looking for summands that
1532 are defined by a multiplication or a real division. This results
1533 in the candidates bitmap with relevant indices into *OPS.
1535 - Second we build the chains of multiplications or divisions for
1536 these candidates, counting the number of occurrences of (operand, code)
1537 pairs in all of the candidates chains.
1539 - Third we sort the (operand, code) pairs by number of occurrence and
1540 process them starting with the pair with the most uses.
1542 * For each such pair we walk the candidates again to build a
1543 second candidate bitmap noting all multiplication/division chains
1544 that have at least one occurrence of (operand, code).
1546 * We build an alternate addition chain only covering these
1547 candidates with one (operand, code) operation removed from their
1548 multiplication/division chain.
1550 * The first candidate gets replaced by the alternate addition chain
1551 multiplied/divided by the operand.
1553 * All candidate chains get disabled for further processing and
1554 processing of (operand, code) pairs continues.
1556 The alternate addition chains built are re-processed by the main
1557 reassociation algorithm which allows optimizing a * x * y + b * y * x
1558 to (a + b ) * x * y in one invocation of the reassociation pass. */
1560 static bool
1561 undistribute_ops_list (enum tree_code opcode,
1562 vec<operand_entry *> *ops, class loop *loop)
1564 unsigned int length = ops->length ();
1565 operand_entry *oe1;
1566 unsigned i, j;
1567 unsigned nr_candidates, nr_candidates2;
1568 sbitmap_iterator sbi0;
1569 vec<operand_entry *> *subops;
1570 bool changed = false;
1571 unsigned int next_oecount_id = 0;
1573 if (length <= 1
1574 || opcode != PLUS_EXPR)
1575 return false;
1577 /* Build a list of candidates to process. */
1578 auto_sbitmap candidates (length);
1579 bitmap_clear (candidates);
1580 nr_candidates = 0;
1581 FOR_EACH_VEC_ELT (*ops, i, oe1)
1583 enum tree_code dcode;
1584 gimple *oe1def;
1586 if (TREE_CODE (oe1->op) != SSA_NAME)
1587 continue;
1588 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1589 if (!is_gimple_assign (oe1def))
1590 continue;
1591 dcode = gimple_assign_rhs_code (oe1def);
1592 if ((dcode != MULT_EXPR
1593 && dcode != RDIV_EXPR)
1594 || !is_reassociable_op (oe1def, dcode, loop))
1595 continue;
1597 bitmap_set_bit (candidates, i);
1598 nr_candidates++;
1601 if (nr_candidates < 2)
1602 return false;
1604 if (dump_file && (dump_flags & TDF_DETAILS))
1606 fprintf (dump_file, "searching for un-distribute opportunities ");
1607 print_generic_expr (dump_file,
1608 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1609 fprintf (dump_file, " %d\n", nr_candidates);
1612 /* Build linearized sub-operand lists and the counting table. */
1613 cvec.create (0);
1615 hash_table<oecount_hasher> ctable (15);
1617 /* ??? Macro arguments cannot have multi-argument template types in
1618 them. This typedef is needed to workaround that limitation. */
1619 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1620 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1621 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1623 gimple *oedef;
1624 enum tree_code oecode;
1625 unsigned j;
1627 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1628 oecode = gimple_assign_rhs_code (oedef);
1629 linearize_expr_tree (&subops[i], oedef,
1630 associative_tree_code (oecode), false);
1632 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1634 oecount c;
1635 int *slot;
1636 int idx;
1637 c.oecode = oecode;
1638 c.cnt = 1;
1639 c.id = next_oecount_id++;
1640 c.op = oe1->op;
1641 cvec.safe_push (c);
1642 idx = cvec.length () + 41;
1643 slot = ctable.find_slot (idx, INSERT);
1644 if (!*slot)
1646 *slot = idx;
1648 else
1650 cvec.pop ();
1651 cvec[*slot - 42].cnt++;
1656 /* Sort the counting table. */
1657 cvec.qsort (oecount_cmp);
1659 if (dump_file && (dump_flags & TDF_DETAILS))
1661 oecount *c;
1662 fprintf (dump_file, "Candidates:\n");
1663 FOR_EACH_VEC_ELT (cvec, j, c)
1665 fprintf (dump_file, " %u %s: ", c->cnt,
1666 c->oecode == MULT_EXPR
1667 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1668 print_generic_expr (dump_file, c->op);
1669 fprintf (dump_file, "\n");
1673 /* Process the (operand, code) pairs in order of most occurrence. */
1674 auto_sbitmap candidates2 (length);
1675 while (!cvec.is_empty ())
1677 oecount *c = &cvec.last ();
1678 if (c->cnt < 2)
1679 break;
1681 /* Now collect the operands in the outer chain that contain
1682 the common operand in their inner chain. */
1683 bitmap_clear (candidates2);
1684 nr_candidates2 = 0;
1685 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1687 gimple *oedef;
1688 enum tree_code oecode;
1689 unsigned j;
1690 tree op = (*ops)[i]->op;
1692 /* If we undistributed in this chain already this may be
1693 a constant. */
1694 if (TREE_CODE (op) != SSA_NAME)
1695 continue;
1697 oedef = SSA_NAME_DEF_STMT (op);
1698 oecode = gimple_assign_rhs_code (oedef);
1699 if (oecode != c->oecode)
1700 continue;
1702 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1704 if (oe1->op == c->op)
1706 bitmap_set_bit (candidates2, i);
1707 ++nr_candidates2;
1708 break;
1713 if (nr_candidates2 >= 2)
1715 operand_entry *oe1, *oe2;
1716 gimple *prod;
1717 int first = bitmap_first_set_bit (candidates2);
1719 /* Build the new addition chain. */
1720 oe1 = (*ops)[first];
1721 if (dump_file && (dump_flags & TDF_DETAILS))
1723 fprintf (dump_file, "Building (");
1724 print_generic_expr (dump_file, oe1->op);
1726 zero_one_operation (&oe1->op, c->oecode, c->op);
1727 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1729 gimple *sum;
1730 oe2 = (*ops)[i];
1731 if (dump_file && (dump_flags & TDF_DETAILS))
1733 fprintf (dump_file, " + ");
1734 print_generic_expr (dump_file, oe2->op);
1736 zero_one_operation (&oe2->op, c->oecode, c->op);
1737 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1738 oe1->op, oe2->op, opcode);
1739 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1740 oe2->rank = 0;
1741 oe1->op = gimple_get_lhs (sum);
1744 /* Apply the multiplication/division. */
1745 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1746 oe1->op, c->op, c->oecode);
1747 if (dump_file && (dump_flags & TDF_DETAILS))
1749 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1750 print_generic_expr (dump_file, c->op);
1751 fprintf (dump_file, "\n");
1754 /* Record it in the addition chain and disable further
1755 undistribution with this op. */
1756 oe1->op = gimple_assign_lhs (prod);
1757 oe1->rank = get_rank (oe1->op);
1758 subops[first].release ();
1760 changed = true;
1763 cvec.pop ();
1766 for (i = 0; i < ops->length (); ++i)
1767 subops[i].release ();
1768 free (subops);
1769 cvec.release ();
1771 return changed;
1774 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1775 first: element index for each relevant BIT_FIELD_REF.
1776 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1777 typedef std::pair<unsigned, unsigned> v_info_elem;
1778 struct v_info {
1779 tree vec_type;
1780 auto_vec<v_info_elem, 32> vec;
1782 typedef v_info *v_info_ptr;
1784 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1785 static int
1786 sort_by_mach_mode (const void *p_i, const void *p_j)
1788 const tree tr1 = *((const tree *) p_i);
1789 const tree tr2 = *((const tree *) p_j);
1790 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1791 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1792 if (mode1 > mode2)
1793 return 1;
1794 else if (mode1 < mode2)
1795 return -1;
1796 else
1797 return 0;
1800 /* Cleanup hash map for VECTOR information. */
1801 static void
1802 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1804 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1805 it != info_map.end (); ++it)
1807 v_info_ptr info = (*it).second;
1808 delete info;
1809 (*it).second = NULL;
1813 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1814 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1815 is transformed to
1816 Vs = (V1 + V2 + ... + Vn)
1817 Vs[0] + Vs[1] + ... + Vs[k]
1819 The basic steps are listed below:
1821 1) Check the addition chain *OPS by looking those summands coming from
1822 VECTOR bit_field_ref on VECTOR type. Put the information into
1823 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1825 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1826 continuous, they can cover the whole VECTOR perfectly without any holes.
1827 Obtain one VECTOR list which contain candidates to be transformed.
1829 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1830 candidates with same mode, build the addition statements for them and
1831 generate BIT_FIELD_REFs accordingly.
1833 TODO:
1834 The current implementation requires the whole VECTORs should be fully
1835 covered, but it can be extended to support partial, checking adjacent
1836 but not fill the whole, it may need some cost model to define the
1837 boundary to do or not.
1839 static bool
1840 undistribute_bitref_for_vector (enum tree_code opcode,
1841 vec<operand_entry *> *ops, struct loop *loop)
1843 if (ops->length () <= 1)
1844 return false;
1846 if (opcode != PLUS_EXPR
1847 && opcode != MULT_EXPR
1848 && opcode != BIT_XOR_EXPR
1849 && opcode != BIT_IOR_EXPR
1850 && opcode != BIT_AND_EXPR)
1851 return false;
1853 hash_map<tree, v_info_ptr> v_info_map;
1854 operand_entry *oe1;
1855 unsigned i;
1857 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1858 information into map. */
1859 FOR_EACH_VEC_ELT (*ops, i, oe1)
1861 enum tree_code dcode;
1862 gimple *oe1def;
1864 if (TREE_CODE (oe1->op) != SSA_NAME)
1865 continue;
1866 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1867 if (!is_gimple_assign (oe1def))
1868 continue;
1869 dcode = gimple_assign_rhs_code (oe1def);
1870 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1871 continue;
1873 tree rhs = gimple_assign_rhs1 (oe1def);
1874 tree vec = TREE_OPERAND (rhs, 0);
1875 tree vec_type = TREE_TYPE (vec);
1877 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1878 continue;
1880 /* Ignore it if target machine can't support this VECTOR type. */
1881 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1882 continue;
1884 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1885 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1886 continue;
1888 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1889 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1890 continue;
1892 /* The type of BIT_FIELD_REF might not be equal to the element type of
1893 the vector. We want to use a vector type with element type the
1894 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1895 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1897 machine_mode simd_mode;
1898 unsigned HOST_WIDE_INT size, nunits;
1899 unsigned HOST_WIDE_INT elem_size
1900 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1901 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1902 continue;
1903 if (size <= elem_size || (size % elem_size) != 0)
1904 continue;
1905 nunits = size / elem_size;
1906 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1907 nunits).exists (&simd_mode))
1908 continue;
1909 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1911 /* Ignore it if target machine can't support this VECTOR type. */
1912 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1913 continue;
1915 /* Check const vector type, constrain BIT_FIELD_REF offset and
1916 size. */
1917 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1918 continue;
1920 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1921 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1922 continue;
1925 tree elem_type = TREE_TYPE (vec_type);
1926 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
1927 if (maybe_ne (bit_field_size (rhs), elem_size))
1928 continue;
1930 unsigned idx;
1931 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
1932 continue;
1934 /* Ignore it if target machine can't support this type of VECTOR
1935 operation. */
1936 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
1937 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
1938 continue;
1940 bool existed;
1941 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
1942 if (!existed)
1944 info = new v_info;
1945 info->vec_type = vec_type;
1947 else if (!types_compatible_p (vec_type, info->vec_type))
1948 continue;
1949 info->vec.safe_push (std::make_pair (idx, i));
1952 /* At least two VECTOR to combine. */
1953 if (v_info_map.elements () <= 1)
1955 cleanup_vinfo_map (v_info_map);
1956 return false;
1959 /* Verify all VECTOR candidates by checking two conditions:
1960 1) sorted offsets are adjacent, no holes.
1961 2) can fill the whole VECTOR perfectly.
1962 And add the valid candidates to a vector for further handling. */
1963 auto_vec<tree> valid_vecs (v_info_map.elements ());
1964 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
1965 it != v_info_map.end (); ++it)
1967 tree cand_vec = (*it).first;
1968 v_info_ptr cand_info = (*it).second;
1969 unsigned int num_elems
1970 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
1971 if (cand_info->vec.length () != num_elems)
1972 continue;
1973 sbitmap holes = sbitmap_alloc (num_elems);
1974 bitmap_ones (holes);
1975 bool valid = true;
1976 v_info_elem *curr;
1977 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
1979 if (!bitmap_bit_p (holes, curr->first))
1981 valid = false;
1982 break;
1984 else
1985 bitmap_clear_bit (holes, curr->first);
1987 if (valid && bitmap_empty_p (holes))
1988 valid_vecs.quick_push (cand_vec);
1989 sbitmap_free (holes);
1992 /* At least two VECTOR to combine. */
1993 if (valid_vecs.length () <= 1)
1995 cleanup_vinfo_map (v_info_map);
1996 return false;
1999 valid_vecs.qsort (sort_by_mach_mode);
2000 /* Go through all candidates by machine mode order, query the mode_to_total
2001 to get the total number for each mode and skip the single one. */
2002 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2004 tree tvec = valid_vecs[i];
2005 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2007 /* Skip modes with only a single candidate. */
2008 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2009 continue;
2011 unsigned int idx, j;
2012 gimple *sum = NULL;
2013 tree sum_vec = tvec;
2014 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2015 v_info_elem *elem;
2016 tree vec_type = info_ptr->vec_type;
2018 /* Build the sum for all candidates with same mode. */
2021 sum = build_and_add_sum (vec_type, sum_vec,
2022 valid_vecs[i + 1], opcode);
2023 if (!useless_type_conversion_p (vec_type,
2024 TREE_TYPE (valid_vecs[i + 1])))
2026 /* Update the operands only after build_and_add_sum,
2027 so that we don't have to repeat the placement algorithm
2028 of build_and_add_sum. */
2029 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2030 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2031 valid_vecs[i + 1]);
2032 tree lhs = make_ssa_name (vec_type);
2033 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2034 gimple_set_uid (g, gimple_uid (sum));
2035 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2036 gimple_assign_set_rhs2 (sum, lhs);
2037 if (sum_vec == tvec)
2039 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2040 lhs = make_ssa_name (vec_type);
2041 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2042 gimple_set_uid (g, gimple_uid (sum));
2043 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2044 gimple_assign_set_rhs1 (sum, lhs);
2046 update_stmt (sum);
2048 sum_vec = gimple_get_lhs (sum);
2049 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2050 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2051 /* Update those related ops of current candidate VECTOR. */
2052 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2054 idx = elem->second;
2055 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2056 /* Set this then op definition will get DCEd later. */
2057 gimple_set_visited (def, true);
2058 if (opcode == PLUS_EXPR
2059 || opcode == BIT_XOR_EXPR
2060 || opcode == BIT_IOR_EXPR)
2061 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2062 else if (opcode == MULT_EXPR)
2063 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2064 else
2066 gcc_assert (opcode == BIT_AND_EXPR);
2067 (*ops)[idx]->op
2068 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2070 (*ops)[idx]->rank = 0;
2072 if (dump_file && (dump_flags & TDF_DETAILS))
2074 fprintf (dump_file, "Generating addition -> ");
2075 print_gimple_stmt (dump_file, sum, 0);
2077 i++;
2079 while ((i < valid_vecs.length () - 1)
2080 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2082 /* Referring to first valid VECTOR with this mode, generate the
2083 BIT_FIELD_REF statements accordingly. */
2084 info_ptr = *(v_info_map.get (tvec));
2085 gcc_assert (sum);
2086 tree elem_type = TREE_TYPE (vec_type);
2087 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2089 idx = elem->second;
2090 tree dst = make_ssa_name (elem_type);
2091 tree pos = bitsize_int (elem->first
2092 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2093 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2094 TYPE_SIZE (elem_type), pos);
2095 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2096 insert_stmt_after (gs, sum);
2097 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2098 /* Set this then op definition will get DCEd later. */
2099 gimple_set_visited (def, true);
2100 (*ops)[idx]->op = gimple_assign_lhs (gs);
2101 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2102 if (dump_file && (dump_flags & TDF_DETAILS))
2104 fprintf (dump_file, "Generating bit_field_ref -> ");
2105 print_gimple_stmt (dump_file, gs, 0);
2110 if (dump_file && (dump_flags & TDF_DETAILS))
2111 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2113 cleanup_vinfo_map (v_info_map);
2115 return true;
2118 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2119 expression, examine the other OPS to see if any of them are comparisons
2120 of the same values, which we may be able to combine or eliminate.
2121 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2123 static bool
2124 eliminate_redundant_comparison (enum tree_code opcode,
2125 vec<operand_entry *> *ops,
2126 unsigned int currindex,
2127 operand_entry *curr)
2129 tree op1, op2;
2130 enum tree_code lcode, rcode;
2131 gimple *def1, *def2;
2132 int i;
2133 operand_entry *oe;
2135 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2136 return false;
2138 /* Check that CURR is a comparison. */
2139 if (TREE_CODE (curr->op) != SSA_NAME)
2140 return false;
2141 def1 = SSA_NAME_DEF_STMT (curr->op);
2142 if (!is_gimple_assign (def1))
2143 return false;
2144 lcode = gimple_assign_rhs_code (def1);
2145 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2146 return false;
2147 op1 = gimple_assign_rhs1 (def1);
2148 op2 = gimple_assign_rhs2 (def1);
2150 /* Now look for a similar comparison in the remaining OPS. */
2151 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2153 tree t;
2155 if (TREE_CODE (oe->op) != SSA_NAME)
2156 continue;
2157 def2 = SSA_NAME_DEF_STMT (oe->op);
2158 if (!is_gimple_assign (def2))
2159 continue;
2160 rcode = gimple_assign_rhs_code (def2);
2161 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2162 continue;
2164 /* If we got here, we have a match. See if we can combine the
2165 two comparisons. */
2166 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2167 if (opcode == BIT_IOR_EXPR)
2168 t = maybe_fold_or_comparisons (type,
2169 lcode, op1, op2,
2170 rcode, gimple_assign_rhs1 (def2),
2171 gimple_assign_rhs2 (def2));
2172 else
2173 t = maybe_fold_and_comparisons (type,
2174 lcode, op1, op2,
2175 rcode, gimple_assign_rhs1 (def2),
2176 gimple_assign_rhs2 (def2));
2177 if (!t)
2178 continue;
2180 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2181 always give us a boolean_type_node value back. If the original
2182 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2183 we need to convert. */
2184 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2185 t = fold_convert (TREE_TYPE (curr->op), t);
2187 if (TREE_CODE (t) != INTEGER_CST
2188 && !operand_equal_p (t, curr->op, 0))
2190 enum tree_code subcode;
2191 tree newop1, newop2;
2192 if (!COMPARISON_CLASS_P (t))
2193 continue;
2194 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2195 STRIP_USELESS_TYPE_CONVERSION (newop1);
2196 STRIP_USELESS_TYPE_CONVERSION (newop2);
2197 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2198 continue;
2201 if (dump_file && (dump_flags & TDF_DETAILS))
2203 fprintf (dump_file, "Equivalence: ");
2204 print_generic_expr (dump_file, curr->op);
2205 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2206 print_generic_expr (dump_file, oe->op);
2207 fprintf (dump_file, " -> ");
2208 print_generic_expr (dump_file, t);
2209 fprintf (dump_file, "\n");
2212 /* Now we can delete oe, as it has been subsumed by the new combined
2213 expression t. */
2214 ops->ordered_remove (i);
2215 reassociate_stats.ops_eliminated ++;
2217 /* If t is the same as curr->op, we're done. Otherwise we must
2218 replace curr->op with t. Special case is if we got a constant
2219 back, in which case we add it to the end instead of in place of
2220 the current entry. */
2221 if (TREE_CODE (t) == INTEGER_CST)
2223 ops->ordered_remove (currindex);
2224 add_to_ops_vec (ops, t);
2226 else if (!operand_equal_p (t, curr->op, 0))
2228 gimple *sum;
2229 enum tree_code subcode;
2230 tree newop1;
2231 tree newop2;
2232 gcc_assert (COMPARISON_CLASS_P (t));
2233 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2234 STRIP_USELESS_TYPE_CONVERSION (newop1);
2235 STRIP_USELESS_TYPE_CONVERSION (newop2);
2236 gcc_checking_assert (is_gimple_val (newop1)
2237 && is_gimple_val (newop2));
2238 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2239 curr->op = gimple_get_lhs (sum);
2241 return true;
2244 return false;
2248 /* Transform repeated addition of same values into multiply with
2249 constant. */
2250 static bool
2251 transform_add_to_multiply (vec<operand_entry *> *ops)
2253 operand_entry *oe;
2254 tree op = NULL_TREE;
2255 int j;
2256 int i, start = -1, end = 0, count = 0;
2257 auto_vec<std::pair <int, int> > indxs;
2258 bool changed = false;
2260 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2261 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2262 || !flag_unsafe_math_optimizations))
2263 return false;
2265 /* Look for repeated operands. */
2266 FOR_EACH_VEC_ELT (*ops, i, oe)
2268 if (start == -1)
2270 count = 1;
2271 op = oe->op;
2272 start = i;
2274 else if (operand_equal_p (oe->op, op, 0))
2276 count++;
2277 end = i;
2279 else
2281 if (count > 1)
2282 indxs.safe_push (std::make_pair (start, end));
2283 count = 1;
2284 op = oe->op;
2285 start = i;
2289 if (count > 1)
2290 indxs.safe_push (std::make_pair (start, end));
2292 for (j = indxs.length () - 1; j >= 0; --j)
2294 /* Convert repeated operand addition to multiplication. */
2295 start = indxs[j].first;
2296 end = indxs[j].second;
2297 op = (*ops)[start]->op;
2298 count = end - start + 1;
2299 for (i = end; i >= start; --i)
2300 ops->unordered_remove (i);
2301 tree tmp = make_ssa_name (TREE_TYPE (op));
2302 tree cst = build_int_cst (integer_type_node, count);
2303 gassign *mul_stmt
2304 = gimple_build_assign (tmp, MULT_EXPR,
2305 op, fold_convert (TREE_TYPE (op), cst));
2306 gimple_set_visited (mul_stmt, true);
2307 add_to_ops_vec (ops, tmp, mul_stmt);
2308 changed = true;
2311 return changed;
2315 /* Perform various identities and other optimizations on the list of
2316 operand entries, stored in OPS. The tree code for the binary
2317 operation between all the operands is OPCODE. */
2319 static void
2320 optimize_ops_list (enum tree_code opcode,
2321 vec<operand_entry *> *ops)
2323 unsigned int length = ops->length ();
2324 unsigned int i;
2325 operand_entry *oe;
2326 operand_entry *oelast = NULL;
2327 bool iterate = false;
2329 if (length == 1)
2330 return;
2332 oelast = ops->last ();
2334 /* If the last two are constants, pop the constants off, merge them
2335 and try the next two. */
2336 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2338 operand_entry *oelm1 = (*ops)[length - 2];
2340 if (oelm1->rank == 0
2341 && is_gimple_min_invariant (oelm1->op)
2342 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2343 TREE_TYPE (oelast->op)))
2345 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2346 oelm1->op, oelast->op);
2348 if (folded && is_gimple_min_invariant (folded))
2350 if (dump_file && (dump_flags & TDF_DETAILS))
2351 fprintf (dump_file, "Merging constants\n");
2353 ops->pop ();
2354 ops->pop ();
2356 add_to_ops_vec (ops, folded);
2357 reassociate_stats.constants_eliminated++;
2359 optimize_ops_list (opcode, ops);
2360 return;
2365 eliminate_using_constants (opcode, ops);
2366 oelast = NULL;
2368 for (i = 0; ops->iterate (i, &oe);)
2370 bool done = false;
2372 if (eliminate_not_pairs (opcode, ops, i, oe))
2373 return;
2374 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2375 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2376 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2378 if (done)
2379 return;
2380 iterate = true;
2381 oelast = NULL;
2382 continue;
2384 oelast = oe;
2385 i++;
2388 if (iterate)
2389 optimize_ops_list (opcode, ops);
2392 /* The following functions are subroutines to optimize_range_tests and allow
2393 it to try to change a logical combination of comparisons into a range
2394 test.
2396 For example, both
2397 X == 2 || X == 5 || X == 3 || X == 4
2399 X >= 2 && X <= 5
2400 are converted to
2401 (unsigned) (X - 2) <= 3
2403 For more information see comments above fold_test_range in fold-const.c,
2404 this implementation is for GIMPLE. */
2406 struct range_entry
2408 tree exp;
2409 tree low;
2410 tree high;
2411 bool in_p;
2412 bool strict_overflow_p;
2413 unsigned int idx, next;
2416 /* This is similar to make_range in fold-const.c, but on top of
2417 GIMPLE instead of trees. If EXP is non-NULL, it should be
2418 an SSA_NAME and STMT argument is ignored, otherwise STMT
2419 argument should be a GIMPLE_COND. */
2421 static void
2422 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2424 int in_p;
2425 tree low, high;
2426 bool is_bool, strict_overflow_p;
2428 r->exp = NULL_TREE;
2429 r->in_p = false;
2430 r->strict_overflow_p = false;
2431 r->low = NULL_TREE;
2432 r->high = NULL_TREE;
2433 if (exp != NULL_TREE
2434 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2435 return;
2437 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2438 and see if we can refine the range. Some of the cases below may not
2439 happen, but it doesn't seem worth worrying about this. We "continue"
2440 the outer loop when we've changed something; otherwise we "break"
2441 the switch, which will "break" the while. */
2442 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2443 high = low;
2444 in_p = 0;
2445 strict_overflow_p = false;
2446 is_bool = false;
2447 if (exp == NULL_TREE)
2448 is_bool = true;
2449 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2451 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2452 is_bool = true;
2453 else
2454 return;
2456 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2457 is_bool = true;
2459 while (1)
2461 enum tree_code code;
2462 tree arg0, arg1, exp_type;
2463 tree nexp;
2464 location_t loc;
2466 if (exp != NULL_TREE)
2468 if (TREE_CODE (exp) != SSA_NAME
2469 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2470 break;
2472 stmt = SSA_NAME_DEF_STMT (exp);
2473 if (!is_gimple_assign (stmt))
2474 break;
2476 code = gimple_assign_rhs_code (stmt);
2477 arg0 = gimple_assign_rhs1 (stmt);
2478 arg1 = gimple_assign_rhs2 (stmt);
2479 exp_type = TREE_TYPE (exp);
2481 else
2483 code = gimple_cond_code (stmt);
2484 arg0 = gimple_cond_lhs (stmt);
2485 arg1 = gimple_cond_rhs (stmt);
2486 exp_type = boolean_type_node;
2489 if (TREE_CODE (arg0) != SSA_NAME
2490 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2491 break;
2492 loc = gimple_location (stmt);
2493 switch (code)
2495 case BIT_NOT_EXPR:
2496 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2497 /* Ensure the range is either +[-,0], +[0,0],
2498 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2499 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2500 or similar expression of unconditional true or
2501 false, it should not be negated. */
2502 && ((high && integer_zerop (high))
2503 || (low && integer_onep (low))))
2505 in_p = !in_p;
2506 exp = arg0;
2507 continue;
2509 break;
2510 case SSA_NAME:
2511 exp = arg0;
2512 continue;
2513 CASE_CONVERT:
2514 if (is_bool)
2516 if ((TYPE_PRECISION (exp_type) == 1
2517 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2518 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2519 return;
2521 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2523 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2524 is_bool = true;
2525 else
2526 return;
2528 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2529 is_bool = true;
2530 goto do_default;
2531 case EQ_EXPR:
2532 case NE_EXPR:
2533 case LT_EXPR:
2534 case LE_EXPR:
2535 case GE_EXPR:
2536 case GT_EXPR:
2537 is_bool = true;
2538 /* FALLTHRU */
2539 default:
2540 if (!is_bool)
2541 return;
2542 do_default:
2543 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2544 &low, &high, &in_p,
2545 &strict_overflow_p);
2546 if (nexp != NULL_TREE)
2548 exp = nexp;
2549 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2550 continue;
2552 break;
2554 break;
2556 if (is_bool)
2558 r->exp = exp;
2559 r->in_p = in_p;
2560 r->low = low;
2561 r->high = high;
2562 r->strict_overflow_p = strict_overflow_p;
2566 /* Comparison function for qsort. Sort entries
2567 without SSA_NAME exp first, then with SSA_NAMEs sorted
2568 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2569 by increasing ->low and if ->low is the same, by increasing
2570 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2571 maximum. */
2573 static int
2574 range_entry_cmp (const void *a, const void *b)
2576 const struct range_entry *p = (const struct range_entry *) a;
2577 const struct range_entry *q = (const struct range_entry *) b;
2579 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2581 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2583 /* Group range_entries for the same SSA_NAME together. */
2584 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2585 return -1;
2586 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2587 return 1;
2588 /* If ->low is different, NULL low goes first, then by
2589 ascending low. */
2590 if (p->low != NULL_TREE)
2592 if (q->low != NULL_TREE)
2594 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2595 p->low, q->low);
2596 if (tem && integer_onep (tem))
2597 return -1;
2598 tem = fold_binary (GT_EXPR, boolean_type_node,
2599 p->low, q->low);
2600 if (tem && integer_onep (tem))
2601 return 1;
2603 else
2604 return 1;
2606 else if (q->low != NULL_TREE)
2607 return -1;
2608 /* If ->high is different, NULL high goes last, before that by
2609 ascending high. */
2610 if (p->high != NULL_TREE)
2612 if (q->high != NULL_TREE)
2614 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2615 p->high, q->high);
2616 if (tem && integer_onep (tem))
2617 return -1;
2618 tem = fold_binary (GT_EXPR, boolean_type_node,
2619 p->high, q->high);
2620 if (tem && integer_onep (tem))
2621 return 1;
2623 else
2624 return -1;
2626 else if (q->high != NULL_TREE)
2627 return 1;
2628 /* If both ranges are the same, sort below by ascending idx. */
2630 else
2631 return 1;
2633 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2634 return -1;
2636 if (p->idx < q->idx)
2637 return -1;
2638 else
2640 gcc_checking_assert (p->idx > q->idx);
2641 return 1;
2645 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2646 insert needed statements BEFORE or after GSI. */
2648 static tree
2649 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2651 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2652 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2653 if (TREE_CODE (ret) != SSA_NAME)
2655 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2656 if (before)
2657 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2658 else
2659 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2660 ret = gimple_assign_lhs (g);
2662 return ret;
2665 /* Helper routine of optimize_range_test.
2666 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2667 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2668 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2669 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2670 an array of COUNT pointers to other ranges. Return
2671 true if the range merge has been successful.
2672 If OPCODE is ERROR_MARK, this is called from within
2673 maybe_optimize_range_tests and is performing inter-bb range optimization.
2674 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2675 oe->rank. */
2677 static bool
2678 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2679 struct range_entry **otherrangep,
2680 unsigned int count, enum tree_code opcode,
2681 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2682 bool in_p, tree low, tree high, bool strict_overflow_p)
2684 operand_entry *oe = (*ops)[range->idx];
2685 tree op = oe->op;
2686 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2687 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2688 location_t loc = gimple_location (stmt);
2689 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2690 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2691 in_p, low, high);
2692 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2693 gimple_stmt_iterator gsi;
2694 unsigned int i, uid;
2696 if (tem == NULL_TREE)
2697 return false;
2699 /* If op is default def SSA_NAME, there is no place to insert the
2700 new comparison. Give up, unless we can use OP itself as the
2701 range test. */
2702 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2704 if (op == range->exp
2705 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2706 || TREE_CODE (optype) == BOOLEAN_TYPE)
2707 && (op == tem
2708 || (TREE_CODE (tem) == EQ_EXPR
2709 && TREE_OPERAND (tem, 0) == op
2710 && integer_onep (TREE_OPERAND (tem, 1))))
2711 && opcode != BIT_IOR_EXPR
2712 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2714 stmt = NULL;
2715 tem = op;
2717 else
2718 return false;
2721 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2722 warning_at (loc, OPT_Wstrict_overflow,
2723 "assuming signed overflow does not occur "
2724 "when simplifying range test");
2726 if (dump_file && (dump_flags & TDF_DETAILS))
2728 struct range_entry *r;
2729 fprintf (dump_file, "Optimizing range tests ");
2730 print_generic_expr (dump_file, range->exp);
2731 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2732 print_generic_expr (dump_file, range->low);
2733 fprintf (dump_file, ", ");
2734 print_generic_expr (dump_file, range->high);
2735 fprintf (dump_file, "]");
2736 for (i = 0; i < count; i++)
2738 if (otherrange)
2739 r = otherrange + i;
2740 else
2741 r = otherrangep[i];
2742 if (r->exp
2743 && r->exp != range->exp
2744 && TREE_CODE (r->exp) == SSA_NAME)
2746 fprintf (dump_file, " and ");
2747 print_generic_expr (dump_file, r->exp);
2749 else
2750 fprintf (dump_file, " and");
2751 fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
2752 print_generic_expr (dump_file, r->low);
2753 fprintf (dump_file, ", ");
2754 print_generic_expr (dump_file, r->high);
2755 fprintf (dump_file, "]");
2757 fprintf (dump_file, "\n into ");
2758 print_generic_expr (dump_file, tem);
2759 fprintf (dump_file, "\n");
2762 if (opcode == BIT_IOR_EXPR
2763 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2764 tem = invert_truthvalue_loc (loc, tem);
2766 tem = fold_convert_loc (loc, optype, tem);
2767 if (stmt)
2769 gsi = gsi_for_stmt (stmt);
2770 uid = gimple_uid (stmt);
2772 else
2774 gsi = gsi_none ();
2775 uid = 0;
2777 if (stmt == NULL)
2778 gcc_checking_assert (tem == op);
2779 /* In rare cases range->exp can be equal to lhs of stmt.
2780 In that case we have to insert after the stmt rather then before
2781 it. If stmt is a PHI, insert it at the start of the basic block. */
2782 else if (op != range->exp)
2784 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2785 tem = force_into_ssa_name (&gsi, tem, true);
2786 gsi_prev (&gsi);
2788 else if (gimple_code (stmt) != GIMPLE_PHI)
2790 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2791 tem = force_into_ssa_name (&gsi, tem, false);
2793 else
2795 gsi = gsi_after_labels (gimple_bb (stmt));
2796 if (!gsi_end_p (gsi))
2797 uid = gimple_uid (gsi_stmt (gsi));
2798 else
2800 gsi = gsi_start_bb (gimple_bb (stmt));
2801 uid = 1;
2802 while (!gsi_end_p (gsi))
2804 uid = gimple_uid (gsi_stmt (gsi));
2805 gsi_next (&gsi);
2808 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2809 tem = force_into_ssa_name (&gsi, tem, true);
2810 if (gsi_end_p (gsi))
2811 gsi = gsi_last_bb (gimple_bb (stmt));
2812 else
2813 gsi_prev (&gsi);
2815 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2816 if (gimple_uid (gsi_stmt (gsi)))
2817 break;
2818 else
2819 gimple_set_uid (gsi_stmt (gsi), uid);
2821 oe->op = tem;
2822 range->exp = exp;
2823 range->low = low;
2824 range->high = high;
2825 range->in_p = in_p;
2826 range->strict_overflow_p = false;
2828 for (i = 0; i < count; i++)
2830 if (otherrange)
2831 range = otherrange + i;
2832 else
2833 range = otherrangep[i];
2834 oe = (*ops)[range->idx];
2835 /* Now change all the other range test immediate uses, so that
2836 those tests will be optimized away. */
2837 if (opcode == ERROR_MARK)
2839 if (oe->op)
2840 oe->op = build_int_cst (TREE_TYPE (oe->op),
2841 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2842 else
2843 oe->op = (oe->rank == BIT_IOR_EXPR
2844 ? boolean_false_node : boolean_true_node);
2846 else
2847 oe->op = error_mark_node;
2848 range->exp = NULL_TREE;
2849 range->low = NULL_TREE;
2850 range->high = NULL_TREE;
2852 return true;
2855 /* Optimize X == CST1 || X == CST2
2856 if popcount (CST1 ^ CST2) == 1 into
2857 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2858 Similarly for ranges. E.g.
2859 X != 2 && X != 3 && X != 10 && X != 11
2860 will be transformed by the previous optimization into
2861 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2862 and this loop can transform that into
2863 !(((X & ~8) - 2U) <= 1U). */
2865 static bool
2866 optimize_range_tests_xor (enum tree_code opcode, tree type,
2867 tree lowi, tree lowj, tree highi, tree highj,
2868 vec<operand_entry *> *ops,
2869 struct range_entry *rangei,
2870 struct range_entry *rangej)
2872 tree lowxor, highxor, tem, exp;
2873 /* Check lowi ^ lowj == highi ^ highj and
2874 popcount (lowi ^ lowj) == 1. */
2875 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2876 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2877 return false;
2878 if (!integer_pow2p (lowxor))
2879 return false;
2880 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2881 if (!tree_int_cst_equal (lowxor, highxor))
2882 return false;
2884 exp = rangei->exp;
2885 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2886 int prec = GET_MODE_PRECISION (mode);
2887 if (TYPE_PRECISION (type) < prec
2888 || (wi::to_wide (TYPE_MIN_VALUE (type))
2889 != wi::min_value (prec, TYPE_SIGN (type)))
2890 || (wi::to_wide (TYPE_MAX_VALUE (type))
2891 != wi::max_value (prec, TYPE_SIGN (type))))
2893 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2894 exp = fold_convert (type, exp);
2895 lowxor = fold_convert (type, lowxor);
2896 lowi = fold_convert (type, lowi);
2897 highi = fold_convert (type, highi);
2899 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2900 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2901 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2902 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2903 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2904 NULL, rangei->in_p, lowj, highj,
2905 rangei->strict_overflow_p
2906 || rangej->strict_overflow_p))
2907 return true;
2908 return false;
2911 /* Optimize X == CST1 || X == CST2
2912 if popcount (CST2 - CST1) == 1 into
2913 ((X - CST1) & ~(CST2 - CST1)) == 0.
2914 Similarly for ranges. E.g.
2915 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2916 || X == 75 || X == 45
2917 will be transformed by the previous optimization into
2918 (X - 43U) <= 3U || (X - 75U) <= 3U
2919 and this loop can transform that into
2920 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2921 static bool
2922 optimize_range_tests_diff (enum tree_code opcode, tree type,
2923 tree lowi, tree lowj, tree highi, tree highj,
2924 vec<operand_entry *> *ops,
2925 struct range_entry *rangei,
2926 struct range_entry *rangej)
2928 tree tem1, tem2, mask;
2929 /* Check highi - lowi == highj - lowj. */
2930 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2931 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2932 return false;
2933 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2934 if (!tree_int_cst_equal (tem1, tem2))
2935 return false;
2936 /* Check popcount (lowj - lowi) == 1. */
2937 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2938 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2939 return false;
2940 if (!integer_pow2p (tem1))
2941 return false;
2943 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2944 int prec = GET_MODE_PRECISION (mode);
2945 if (TYPE_PRECISION (type) < prec
2946 || (wi::to_wide (TYPE_MIN_VALUE (type))
2947 != wi::min_value (prec, TYPE_SIGN (type)))
2948 || (wi::to_wide (TYPE_MAX_VALUE (type))
2949 != wi::max_value (prec, TYPE_SIGN (type))))
2950 type = build_nonstandard_integer_type (prec, 1);
2951 else
2952 type = unsigned_type_for (type);
2953 tem1 = fold_convert (type, tem1);
2954 tem2 = fold_convert (type, tem2);
2955 lowi = fold_convert (type, lowi);
2956 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2957 tem1 = fold_build2 (MINUS_EXPR, type,
2958 fold_convert (type, rangei->exp), lowi);
2959 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2960 lowj = build_int_cst (type, 0);
2961 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2962 NULL, rangei->in_p, lowj, tem2,
2963 rangei->strict_overflow_p
2964 || rangej->strict_overflow_p))
2965 return true;
2966 return false;
2969 /* It does some common checks for function optimize_range_tests_xor and
2970 optimize_range_tests_diff.
2971 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2972 Else it calls optimize_range_tests_diff. */
2974 static bool
2975 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2976 bool optimize_xor, vec<operand_entry *> *ops,
2977 struct range_entry *ranges)
2979 int i, j;
2980 bool any_changes = false;
2981 for (i = first; i < length; i++)
2983 tree lowi, highi, lowj, highj, type, tem;
2985 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2986 continue;
2987 type = TREE_TYPE (ranges[i].exp);
2988 if (!INTEGRAL_TYPE_P (type))
2989 continue;
2990 lowi = ranges[i].low;
2991 if (lowi == NULL_TREE)
2992 lowi = TYPE_MIN_VALUE (type);
2993 highi = ranges[i].high;
2994 if (highi == NULL_TREE)
2995 continue;
2996 for (j = i + 1; j < length && j < i + 64; j++)
2998 bool changes;
2999 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3000 continue;
3001 lowj = ranges[j].low;
3002 if (lowj == NULL_TREE)
3003 continue;
3004 highj = ranges[j].high;
3005 if (highj == NULL_TREE)
3006 highj = TYPE_MAX_VALUE (type);
3007 /* Check lowj > highi. */
3008 tem = fold_binary (GT_EXPR, boolean_type_node,
3009 lowj, highi);
3010 if (tem == NULL_TREE || !integer_onep (tem))
3011 continue;
3012 if (optimize_xor)
3013 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3014 highi, highj, ops,
3015 ranges + i, ranges + j);
3016 else
3017 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3018 highi, highj, ops,
3019 ranges + i, ranges + j);
3020 if (changes)
3022 any_changes = true;
3023 break;
3027 return any_changes;
3030 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3031 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3032 EXP on success, NULL otherwise. */
3034 static tree
3035 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3036 wide_int *mask, tree *totallowp)
3038 tree tem = int_const_binop (MINUS_EXPR, high, low);
3039 if (tem == NULL_TREE
3040 || TREE_CODE (tem) != INTEGER_CST
3041 || TREE_OVERFLOW (tem)
3042 || tree_int_cst_sgn (tem) == -1
3043 || compare_tree_int (tem, prec) != -1)
3044 return NULL_TREE;
3046 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3047 *mask = wi::shifted_mask (0, max, false, prec);
3048 if (TREE_CODE (exp) == BIT_AND_EXPR
3049 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3051 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3052 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3053 if (wi::popcount (msk) == 1
3054 && wi::ltu_p (msk, prec - max))
3056 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3057 max += msk.to_uhwi ();
3058 exp = TREE_OPERAND (exp, 0);
3059 if (integer_zerop (low)
3060 && TREE_CODE (exp) == PLUS_EXPR
3061 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3063 tree ret = TREE_OPERAND (exp, 0);
3064 STRIP_NOPS (ret);
3065 widest_int bias
3066 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3067 TYPE_PRECISION (TREE_TYPE (low))));
3068 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3069 if (totallowp)
3071 *totallowp = tbias;
3072 return ret;
3074 else if (!tree_int_cst_lt (totallow, tbias))
3075 return NULL_TREE;
3076 bias = wi::to_widest (tbias);
3077 bias -= wi::to_widest (totallow);
3078 if (bias >= 0 && bias < prec - max)
3080 *mask = wi::lshift (*mask, bias);
3081 return ret;
3086 if (totallowp)
3087 return exp;
3088 if (!tree_int_cst_lt (totallow, low))
3089 return exp;
3090 tem = int_const_binop (MINUS_EXPR, low, totallow);
3091 if (tem == NULL_TREE
3092 || TREE_CODE (tem) != INTEGER_CST
3093 || TREE_OVERFLOW (tem)
3094 || compare_tree_int (tem, prec - max) == 1)
3095 return NULL_TREE;
3097 *mask = wi::lshift (*mask, wi::to_widest (tem));
3098 return exp;
3101 /* Attempt to optimize small range tests using bit test.
3102 E.g.
3103 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3104 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3105 has been by earlier optimizations optimized into:
3106 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3107 As all the 43 through 82 range is less than 64 numbers,
3108 for 64-bit word targets optimize that into:
3109 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3111 static bool
3112 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3113 vec<operand_entry *> *ops,
3114 struct range_entry *ranges)
3116 int i, j;
3117 bool any_changes = false;
3118 int prec = GET_MODE_BITSIZE (word_mode);
3119 auto_vec<struct range_entry *, 64> candidates;
3121 for (i = first; i < length - 2; i++)
3123 tree lowi, highi, lowj, highj, type;
3125 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3126 continue;
3127 type = TREE_TYPE (ranges[i].exp);
3128 if (!INTEGRAL_TYPE_P (type))
3129 continue;
3130 lowi = ranges[i].low;
3131 if (lowi == NULL_TREE)
3132 lowi = TYPE_MIN_VALUE (type);
3133 highi = ranges[i].high;
3134 if (highi == NULL_TREE)
3135 continue;
3136 wide_int mask;
3137 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3138 highi, &mask, &lowi);
3139 if (exp == NULL_TREE)
3140 continue;
3141 bool strict_overflow_p = ranges[i].strict_overflow_p;
3142 candidates.truncate (0);
3143 int end = MIN (i + 64, length);
3144 for (j = i + 1; j < end; j++)
3146 tree exp2;
3147 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3148 continue;
3149 if (ranges[j].exp == exp)
3151 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3153 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3154 if (exp2 == exp)
3156 else if (TREE_CODE (exp2) == PLUS_EXPR)
3158 exp2 = TREE_OPERAND (exp2, 0);
3159 STRIP_NOPS (exp2);
3160 if (exp2 != exp)
3161 continue;
3163 else
3164 continue;
3166 else
3167 continue;
3168 lowj = ranges[j].low;
3169 if (lowj == NULL_TREE)
3170 continue;
3171 highj = ranges[j].high;
3172 if (highj == NULL_TREE)
3173 highj = TYPE_MAX_VALUE (type);
3174 wide_int mask2;
3175 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3176 highj, &mask2, NULL);
3177 if (exp2 != exp)
3178 continue;
3179 mask |= mask2;
3180 strict_overflow_p |= ranges[j].strict_overflow_p;
3181 candidates.safe_push (&ranges[j]);
3184 /* If we need otherwise 3 or more comparisons, use a bit test. */
3185 if (candidates.length () >= 2)
3187 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3188 wi::to_widest (lowi)
3189 + prec - 1 - wi::clz (mask));
3190 operand_entry *oe = (*ops)[ranges[i].idx];
3191 tree op = oe->op;
3192 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3193 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3194 location_t loc = gimple_location (stmt);
3195 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3197 /* See if it isn't cheaper to pretend the minimum value of the
3198 range is 0, if maximum value is small enough.
3199 We can avoid then subtraction of the minimum value, but the
3200 mask constant could be perhaps more expensive. */
3201 if (compare_tree_int (lowi, 0) > 0
3202 && compare_tree_int (high, prec) < 0)
3204 int cost_diff;
3205 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3206 rtx reg = gen_raw_REG (word_mode, 10000);
3207 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3208 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
3209 GEN_INT (-m)), speed_p);
3210 rtx r = immed_wide_int_const (mask, word_mode);
3211 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3212 word_mode, speed_p);
3213 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3214 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3215 word_mode, speed_p);
3216 if (cost_diff > 0)
3218 mask = wi::lshift (mask, m);
3219 lowi = build_zero_cst (TREE_TYPE (lowi));
3223 tree tem = build_range_check (loc, optype, unshare_expr (exp),
3224 false, lowi, high);
3225 if (tem == NULL_TREE || is_gimple_val (tem))
3226 continue;
3227 tree etype = unsigned_type_for (TREE_TYPE (exp));
3228 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3229 fold_convert_loc (loc, etype, exp),
3230 fold_convert_loc (loc, etype, lowi));
3231 exp = fold_convert_loc (loc, integer_type_node, exp);
3232 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3233 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3234 build_int_cst (word_type, 1), exp);
3235 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3236 wide_int_to_tree (word_type, mask));
3237 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3238 build_zero_cst (word_type));
3239 if (is_gimple_val (exp))
3240 continue;
3242 /* The shift might have undefined behavior if TEM is true,
3243 but reassociate_bb isn't prepared to have basic blocks
3244 split when it is running. So, temporarily emit a code
3245 with BIT_IOR_EXPR instead of &&, and fix it up in
3246 branch_fixup. */
3247 gimple_seq seq;
3248 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3249 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3250 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3251 gimple_seq seq2;
3252 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3253 gimple_seq_add_seq_without_update (&seq, seq2);
3254 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3255 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3256 gimple *g = gimple_build_assign (make_ssa_name (optype),
3257 BIT_IOR_EXPR, tem, exp);
3258 gimple_set_location (g, loc);
3259 gimple_seq_add_stmt_without_update (&seq, g);
3260 exp = gimple_assign_lhs (g);
3261 tree val = build_zero_cst (optype);
3262 if (update_range_test (&ranges[i], NULL, candidates.address (),
3263 candidates.length (), opcode, ops, exp,
3264 seq, false, val, val, strict_overflow_p))
3266 any_changes = true;
3267 reassoc_branch_fixups.safe_push (tem);
3269 else
3270 gimple_seq_discard (seq);
3273 return any_changes;
3276 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3277 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
3279 static bool
3280 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3281 vec<operand_entry *> *ops,
3282 struct range_entry *ranges)
3284 int i;
3285 unsigned int b;
3286 bool any_changes = false;
3287 auto_vec<int, 128> buckets;
3288 auto_vec<int, 32> chains;
3289 auto_vec<struct range_entry *, 32> candidates;
3291 for (i = first; i < length; i++)
3293 if (ranges[i].exp == NULL_TREE
3294 || TREE_CODE (ranges[i].exp) != SSA_NAME
3295 || !ranges[i].in_p
3296 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3297 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
3298 || ranges[i].low == NULL_TREE
3299 || ranges[i].low != ranges[i].high)
3300 continue;
3302 bool zero_p = integer_zerop (ranges[i].low);
3303 if (!zero_p && !integer_all_onesp (ranges[i].low))
3304 continue;
3306 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
3307 if (buckets.length () <= b)
3308 buckets.safe_grow_cleared (b + 1);
3309 if (chains.length () <= (unsigned) i)
3310 chains.safe_grow (i + 1);
3311 chains[i] = buckets[b];
3312 buckets[b] = i + 1;
3315 FOR_EACH_VEC_ELT (buckets, b, i)
3316 if (i && chains[i - 1])
3318 int j, k = i;
3319 for (j = chains[i - 1]; j; j = chains[j - 1])
3321 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3322 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3323 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3324 k = j;
3326 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3327 tree type2 = NULL_TREE;
3328 bool strict_overflow_p = false;
3329 candidates.truncate (0);
3330 for (j = i; j; j = chains[j - 1])
3332 tree type = TREE_TYPE (ranges[j - 1].exp);
3333 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3334 if (j == k
3335 || useless_type_conversion_p (type1, type))
3337 else if (type2 == NULL_TREE
3338 || useless_type_conversion_p (type2, type))
3340 if (type2 == NULL_TREE)
3341 type2 = type;
3342 candidates.safe_push (&ranges[j - 1]);
3345 unsigned l = candidates.length ();
3346 for (j = i; j; j = chains[j - 1])
3348 tree type = TREE_TYPE (ranges[j - 1].exp);
3349 if (j == k)
3350 continue;
3351 if (useless_type_conversion_p (type1, type))
3353 else if (type2 == NULL_TREE
3354 || useless_type_conversion_p (type2, type))
3355 continue;
3356 candidates.safe_push (&ranges[j - 1]);
3358 gimple_seq seq = NULL;
3359 tree op = NULL_TREE;
3360 unsigned int id;
3361 struct range_entry *r;
3362 candidates.safe_push (&ranges[k - 1]);
3363 FOR_EACH_VEC_ELT (candidates, id, r)
3365 gimple *g;
3366 if (id == 0)
3368 op = r->exp;
3369 continue;
3371 if (id == l)
3373 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3374 gimple_seq_add_stmt_without_update (&seq, g);
3375 op = gimple_assign_lhs (g);
3377 tree type = TREE_TYPE (r->exp);
3378 tree exp = r->exp;
3379 if (id >= l && !useless_type_conversion_p (type1, type))
3381 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3382 gimple_seq_add_stmt_without_update (&seq, g);
3383 exp = gimple_assign_lhs (g);
3385 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3386 (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3387 op, exp);
3388 gimple_seq_add_stmt_without_update (&seq, g);
3389 op = gimple_assign_lhs (g);
3391 candidates.pop ();
3392 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3393 candidates.length (), opcode, ops, op,
3394 seq, true, ranges[k - 1].low,
3395 ranges[k - 1].low, strict_overflow_p))
3396 any_changes = true;
3397 else
3398 gimple_seq_discard (seq);
3401 return any_changes;
3404 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3405 a >= 0 && a < b into (unsigned) a < (unsigned) b
3406 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3408 static bool
3409 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3410 vec<operand_entry *> *ops,
3411 struct range_entry *ranges,
3412 basic_block first_bb)
3414 int i;
3415 bool any_changes = false;
3416 hash_map<tree, int> *map = NULL;
3418 for (i = first; i < length; i++)
3420 if (ranges[i].exp == NULL_TREE
3421 || TREE_CODE (ranges[i].exp) != SSA_NAME
3422 || !ranges[i].in_p)
3423 continue;
3425 tree type = TREE_TYPE (ranges[i].exp);
3426 if (!INTEGRAL_TYPE_P (type)
3427 || TYPE_UNSIGNED (type)
3428 || ranges[i].low == NULL_TREE
3429 || !integer_zerop (ranges[i].low)
3430 || ranges[i].high != NULL_TREE)
3431 continue;
3432 /* EXP >= 0 here. */
3433 if (map == NULL)
3434 map = new hash_map <tree, int>;
3435 map->put (ranges[i].exp, i);
3438 if (map == NULL)
3439 return false;
3441 for (i = 0; i < length; i++)
3443 bool in_p = ranges[i].in_p;
3444 if (ranges[i].low == NULL_TREE
3445 || ranges[i].high == NULL_TREE)
3446 continue;
3447 if (!integer_zerop (ranges[i].low)
3448 || !integer_zerop (ranges[i].high))
3450 if (ranges[i].exp
3451 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3452 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3453 && integer_onep (ranges[i].low)
3454 && integer_onep (ranges[i].high))
3455 in_p = !in_p;
3456 else
3457 continue;
3460 gimple *stmt;
3461 tree_code ccode;
3462 tree rhs1, rhs2;
3463 if (ranges[i].exp)
3465 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3466 continue;
3467 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3468 if (!is_gimple_assign (stmt))
3469 continue;
3470 ccode = gimple_assign_rhs_code (stmt);
3471 rhs1 = gimple_assign_rhs1 (stmt);
3472 rhs2 = gimple_assign_rhs2 (stmt);
3474 else
3476 operand_entry *oe = (*ops)[ranges[i].idx];
3477 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3478 if (gimple_code (stmt) != GIMPLE_COND)
3479 continue;
3480 ccode = gimple_cond_code (stmt);
3481 rhs1 = gimple_cond_lhs (stmt);
3482 rhs2 = gimple_cond_rhs (stmt);
3485 if (TREE_CODE (rhs1) != SSA_NAME
3486 || rhs2 == NULL_TREE
3487 || TREE_CODE (rhs2) != SSA_NAME)
3488 continue;
3490 switch (ccode)
3492 case GT_EXPR:
3493 case GE_EXPR:
3494 case LT_EXPR:
3495 case LE_EXPR:
3496 break;
3497 default:
3498 continue;
3500 if (in_p)
3501 ccode = invert_tree_comparison (ccode, false);
3502 switch (ccode)
3504 case GT_EXPR:
3505 case GE_EXPR:
3506 std::swap (rhs1, rhs2);
3507 ccode = swap_tree_comparison (ccode);
3508 break;
3509 case LT_EXPR:
3510 case LE_EXPR:
3511 break;
3512 default:
3513 gcc_unreachable ();
3516 int *idx = map->get (rhs1);
3517 if (idx == NULL)
3518 continue;
3520 /* maybe_optimize_range_tests allows statements without side-effects
3521 in the basic blocks as long as they are consumed in the same bb.
3522 Make sure rhs2's def stmt is not among them, otherwise we can't
3523 use safely get_nonzero_bits on it. E.g. in:
3524 # RANGE [-83, 1] NONZERO 173
3525 # k_32 = PHI <k_47(13), k_12(9)>
3527 if (k_32 >= 0)
3528 goto <bb 5>; [26.46%]
3529 else
3530 goto <bb 9>; [73.54%]
3532 <bb 5> [local count: 140323371]:
3533 # RANGE [0, 1] NONZERO 1
3534 _5 = (int) k_32;
3535 # RANGE [0, 4] NONZERO 4
3536 _21 = _5 << 2;
3537 # RANGE [0, 4] NONZERO 4
3538 iftmp.0_44 = (char) _21;
3539 if (k_32 < iftmp.0_44)
3540 goto <bb 6>; [84.48%]
3541 else
3542 goto <bb 9>; [15.52%]
3543 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3544 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3545 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3546 those stmts even for negative k_32 and the value ranges would be no
3547 longer guaranteed and so the optimization would be invalid. */
3548 while (opcode == ERROR_MARK)
3550 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3551 basic_block bb2 = gimple_bb (g);
3552 if (bb2
3553 && bb2 != first_bb
3554 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3556 /* As an exception, handle a few common cases. */
3557 if (gimple_assign_cast_p (g)
3558 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3560 tree op0 = gimple_assign_rhs1 (g);
3561 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3562 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3563 > TYPE_PRECISION (TREE_TYPE (op0))))
3564 /* Zero-extension is always ok. */
3565 break;
3566 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3567 == TYPE_PRECISION (TREE_TYPE (op0))
3568 && TREE_CODE (op0) == SSA_NAME)
3570 /* Cast from signed to unsigned or vice versa. Retry
3571 with the op0 as new rhs2. */
3572 rhs2 = op0;
3573 continue;
3576 else if (is_gimple_assign (g)
3577 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3578 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3579 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3580 /* Masking with INTEGER_CST with MSB clear is always ok
3581 too. */
3582 break;
3583 rhs2 = NULL_TREE;
3585 break;
3587 if (rhs2 == NULL_TREE)
3588 continue;
3590 wide_int nz = get_nonzero_bits (rhs2);
3591 if (wi::neg_p (nz))
3592 continue;
3594 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3595 and RHS2 is known to be RHS2 >= 0. */
3596 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3598 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3599 if ((ranges[*idx].strict_overflow_p
3600 || ranges[i].strict_overflow_p)
3601 && issue_strict_overflow_warning (wc))
3602 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3603 "assuming signed overflow does not occur "
3604 "when simplifying range test");
3606 if (dump_file && (dump_flags & TDF_DETAILS))
3608 struct range_entry *r = &ranges[*idx];
3609 fprintf (dump_file, "Optimizing range test ");
3610 print_generic_expr (dump_file, r->exp);
3611 fprintf (dump_file, " +[");
3612 print_generic_expr (dump_file, r->low);
3613 fprintf (dump_file, ", ");
3614 print_generic_expr (dump_file, r->high);
3615 fprintf (dump_file, "] and comparison ");
3616 print_generic_expr (dump_file, rhs1);
3617 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3618 print_generic_expr (dump_file, rhs2);
3619 fprintf (dump_file, "\n into (");
3620 print_generic_expr (dump_file, utype);
3621 fprintf (dump_file, ") ");
3622 print_generic_expr (dump_file, rhs1);
3623 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3624 print_generic_expr (dump_file, utype);
3625 fprintf (dump_file, ") ");
3626 print_generic_expr (dump_file, rhs2);
3627 fprintf (dump_file, "\n");
3630 operand_entry *oe = (*ops)[ranges[i].idx];
3631 ranges[i].in_p = 0;
3632 if (opcode == BIT_IOR_EXPR
3633 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3635 ranges[i].in_p = 1;
3636 ccode = invert_tree_comparison (ccode, false);
3639 unsigned int uid = gimple_uid (stmt);
3640 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3641 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3642 gimple_set_uid (g, uid);
3643 rhs1 = gimple_assign_lhs (g);
3644 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3645 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3647 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3648 gimple_set_uid (g, uid);
3649 rhs2 = gimple_assign_lhs (g);
3650 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3652 if (tree_swap_operands_p (rhs1, rhs2))
3654 std::swap (rhs1, rhs2);
3655 ccode = swap_tree_comparison (ccode);
3657 if (gimple_code (stmt) == GIMPLE_COND)
3659 gcond *c = as_a <gcond *> (stmt);
3660 gimple_cond_set_code (c, ccode);
3661 gimple_cond_set_lhs (c, rhs1);
3662 gimple_cond_set_rhs (c, rhs2);
3663 update_stmt (stmt);
3665 else
3667 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3668 if (!INTEGRAL_TYPE_P (ctype)
3669 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3670 && TYPE_PRECISION (ctype) != 1))
3671 ctype = boolean_type_node;
3672 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3673 gimple_set_uid (g, uid);
3674 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3675 if (oe->op && ctype != TREE_TYPE (oe->op))
3677 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3678 NOP_EXPR, gimple_assign_lhs (g));
3679 gimple_set_uid (g, uid);
3680 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3682 ranges[i].exp = gimple_assign_lhs (g);
3683 oe->op = ranges[i].exp;
3684 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3685 ranges[i].high = ranges[i].low;
3687 ranges[i].strict_overflow_p = false;
3688 oe = (*ops)[ranges[*idx].idx];
3689 /* Now change all the other range test immediate uses, so that
3690 those tests will be optimized away. */
3691 if (opcode == ERROR_MARK)
3693 if (oe->op)
3694 oe->op = build_int_cst (TREE_TYPE (oe->op),
3695 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3696 else
3697 oe->op = (oe->rank == BIT_IOR_EXPR
3698 ? boolean_false_node : boolean_true_node);
3700 else
3701 oe->op = error_mark_node;
3702 ranges[*idx].exp = NULL_TREE;
3703 ranges[*idx].low = NULL_TREE;
3704 ranges[*idx].high = NULL_TREE;
3705 any_changes = true;
3708 delete map;
3709 return any_changes;
3712 /* Optimize range tests, similarly how fold_range_test optimizes
3713 it on trees. The tree code for the binary
3714 operation between all the operands is OPCODE.
3715 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3716 maybe_optimize_range_tests for inter-bb range optimization.
3717 In that case if oe->op is NULL, oe->id is bb->index whose
3718 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3719 the actual opcode.
3720 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3722 static bool
3723 optimize_range_tests (enum tree_code opcode,
3724 vec<operand_entry *> *ops, basic_block first_bb)
3726 unsigned int length = ops->length (), i, j, first;
3727 operand_entry *oe;
3728 struct range_entry *ranges;
3729 bool any_changes = false;
3731 if (length == 1)
3732 return false;
3734 ranges = XNEWVEC (struct range_entry, length);
3735 for (i = 0; i < length; i++)
3737 oe = (*ops)[i];
3738 ranges[i].idx = i;
3739 init_range_entry (ranges + i, oe->op,
3740 oe->op
3741 ? NULL
3742 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3743 /* For | invert it now, we will invert it again before emitting
3744 the optimized expression. */
3745 if (opcode == BIT_IOR_EXPR
3746 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3747 ranges[i].in_p = !ranges[i].in_p;
3750 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3751 for (i = 0; i < length; i++)
3752 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3753 break;
3755 /* Try to merge ranges. */
3756 for (first = i; i < length; i++)
3758 tree low = ranges[i].low;
3759 tree high = ranges[i].high;
3760 int in_p = ranges[i].in_p;
3761 bool strict_overflow_p = ranges[i].strict_overflow_p;
3762 int update_fail_count = 0;
3764 for (j = i + 1; j < length; j++)
3766 if (ranges[i].exp != ranges[j].exp)
3767 break;
3768 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3769 ranges[j].in_p, ranges[j].low, ranges[j].high))
3770 break;
3771 strict_overflow_p |= ranges[j].strict_overflow_p;
3774 if (j == i + 1)
3775 continue;
3777 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3778 opcode, ops, ranges[i].exp, NULL, in_p,
3779 low, high, strict_overflow_p))
3781 i = j - 1;
3782 any_changes = true;
3784 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3785 while update_range_test would fail. */
3786 else if (update_fail_count == 64)
3787 i = j - 1;
3788 else
3789 ++update_fail_count;
3792 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3793 ops, ranges);
3795 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3796 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3797 ops, ranges);
3798 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3799 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3800 ops, ranges);
3801 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3802 ops, ranges);
3803 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3804 ranges, first_bb);
3806 if (any_changes && opcode != ERROR_MARK)
3808 j = 0;
3809 FOR_EACH_VEC_ELT (*ops, i, oe)
3811 if (oe->op == error_mark_node)
3812 continue;
3813 else if (i != j)
3814 (*ops)[j] = oe;
3815 j++;
3817 ops->truncate (j);
3820 XDELETEVEC (ranges);
3821 return any_changes;
3824 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3825 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3826 otherwise the comparison code. TYPE is a return value that is set
3827 to type of comparison. */
3829 static tree_code
3830 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type)
3832 if (TREE_CODE (var) != SSA_NAME)
3833 return ERROR_MARK;
3835 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3836 if (stmt == NULL)
3837 return ERROR_MARK;
3839 /* ??? If we start creating more COND_EXPR, we could perform
3840 this same optimization with them. For now, simplify. */
3841 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3842 return ERROR_MARK;
3844 tree cond = gimple_assign_rhs1 (stmt);
3845 tree_code cmp = TREE_CODE (cond);
3846 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3847 return ERROR_MARK;
3849 /* ??? For now, allow only canonical true and false result vectors.
3850 We could expand this to other constants should the need arise,
3851 but at the moment we don't create them. */
3852 tree t = gimple_assign_rhs2 (stmt);
3853 tree f = gimple_assign_rhs3 (stmt);
3854 bool inv;
3855 if (integer_all_onesp (t))
3856 inv = false;
3857 else if (integer_all_onesp (f))
3859 cmp = invert_tree_comparison (cmp, false);
3860 inv = true;
3862 else
3863 return ERROR_MARK;
3864 if (!integer_zerop (f))
3865 return ERROR_MARK;
3867 /* Success! */
3868 if (rets)
3869 *rets = stmt;
3870 if (reti)
3871 *reti = inv;
3872 if (type)
3873 *type = TREE_TYPE (cond);
3874 return cmp;
3877 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3878 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3880 static bool
3881 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3883 unsigned int length = ops->length (), i, j;
3884 bool any_changes = false;
3886 if (length == 1)
3887 return false;
3889 for (i = 0; i < length; ++i)
3891 tree elt0 = (*ops)[i]->op;
3893 gassign *stmt0;
3894 bool invert;
3895 tree type;
3896 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type);
3897 if (cmp0 == ERROR_MARK)
3898 continue;
3900 for (j = i + 1; j < length; ++j)
3902 tree &elt1 = (*ops)[j]->op;
3904 gassign *stmt1;
3905 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL);
3906 if (cmp1 == ERROR_MARK)
3907 continue;
3909 tree cond0 = gimple_assign_rhs1 (stmt0);
3910 tree x0 = TREE_OPERAND (cond0, 0);
3911 tree y0 = TREE_OPERAND (cond0, 1);
3913 tree cond1 = gimple_assign_rhs1 (stmt1);
3914 tree x1 = TREE_OPERAND (cond1, 0);
3915 tree y1 = TREE_OPERAND (cond1, 1);
3917 tree comb;
3918 if (opcode == BIT_AND_EXPR)
3919 comb = maybe_fold_and_comparisons (type, cmp0, x0, y0, cmp1, x1,
3920 y1);
3921 else if (opcode == BIT_IOR_EXPR)
3922 comb = maybe_fold_or_comparisons (type, cmp0, x0, y0, cmp1, x1,
3923 y1);
3924 else
3925 gcc_unreachable ();
3926 if (comb == NULL)
3927 continue;
3929 /* Success! */
3930 if (dump_file && (dump_flags & TDF_DETAILS))
3932 fprintf (dump_file, "Transforming ");
3933 print_generic_expr (dump_file, cond0);
3934 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3935 print_generic_expr (dump_file, cond1);
3936 fprintf (dump_file, " into ");
3937 print_generic_expr (dump_file, comb);
3938 fputc ('\n', dump_file);
3941 gimple_assign_set_rhs1 (stmt0, comb);
3942 if (invert)
3943 std::swap (*gimple_assign_rhs2_ptr (stmt0),
3944 *gimple_assign_rhs3_ptr (stmt0));
3945 update_stmt (stmt0);
3947 elt1 = error_mark_node;
3948 any_changes = true;
3952 if (any_changes)
3954 operand_entry *oe;
3955 j = 0;
3956 FOR_EACH_VEC_ELT (*ops, i, oe)
3958 if (oe->op == error_mark_node)
3959 continue;
3960 else if (i != j)
3961 (*ops)[j] = oe;
3962 j++;
3964 ops->truncate (j);
3967 return any_changes;
3970 /* Return true if STMT is a cast like:
3971 <bb N>:
3973 _123 = (int) _234;
3975 <bb M>:
3976 # _345 = PHI <_123(N), 1(...), 1(...)>
3977 where _234 has bool type, _123 has single use and
3978 bb N has a single successor M. This is commonly used in
3979 the last block of a range test.
3981 Also Return true if STMT is tcc_compare like:
3982 <bb N>:
3984 _234 = a_2(D) == 2;
3986 <bb M>:
3987 # _345 = PHI <_234(N), 1(...), 1(...)>
3988 _346 = (int) _345;
3989 where _234 has booltype, single use and
3990 bb N has a single successor M. This is commonly used in
3991 the last block of a range test. */
3993 static bool
3994 final_range_test_p (gimple *stmt)
3996 basic_block bb, rhs_bb, lhs_bb;
3997 edge e;
3998 tree lhs, rhs;
3999 use_operand_p use_p;
4000 gimple *use_stmt;
4002 if (!gimple_assign_cast_p (stmt)
4003 && (!is_gimple_assign (stmt)
4004 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4005 != tcc_comparison)))
4006 return false;
4007 bb = gimple_bb (stmt);
4008 if (!single_succ_p (bb))
4009 return false;
4010 e = single_succ_edge (bb);
4011 if (e->flags & EDGE_COMPLEX)
4012 return false;
4014 lhs = gimple_assign_lhs (stmt);
4015 rhs = gimple_assign_rhs1 (stmt);
4016 if (gimple_assign_cast_p (stmt)
4017 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4018 || TREE_CODE (rhs) != SSA_NAME
4019 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4020 return false;
4022 if (!gimple_assign_cast_p (stmt)
4023 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4024 return false;
4026 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4027 if (!single_imm_use (lhs, &use_p, &use_stmt))
4028 return false;
4030 if (gimple_code (use_stmt) != GIMPLE_PHI
4031 || gimple_bb (use_stmt) != e->dest)
4032 return false;
4034 /* And that the rhs is defined in the same loop. */
4035 if (gimple_assign_cast_p (stmt))
4037 if (TREE_CODE (rhs) != SSA_NAME
4038 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4039 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4040 return false;
4042 else
4044 if (TREE_CODE (lhs) != SSA_NAME
4045 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4046 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4047 return false;
4050 return true;
4053 /* Return true if BB is suitable basic block for inter-bb range test
4054 optimization. If BACKWARD is true, BB should be the only predecessor
4055 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4056 or compared with to find a common basic block to which all conditions
4057 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4058 be the only predecessor of BB. */
4060 static bool
4061 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4062 bool backward)
4064 edge_iterator ei, ei2;
4065 edge e, e2;
4066 gimple *stmt;
4067 gphi_iterator gsi;
4068 bool other_edge_seen = false;
4069 bool is_cond;
4071 if (test_bb == bb)
4072 return false;
4073 /* Check last stmt first. */
4074 stmt = last_stmt (bb);
4075 if (stmt == NULL
4076 || (gimple_code (stmt) != GIMPLE_COND
4077 && (backward || !final_range_test_p (stmt)))
4078 || gimple_visited_p (stmt)
4079 || stmt_could_throw_p (cfun, stmt)
4080 || *other_bb == bb)
4081 return false;
4082 is_cond = gimple_code (stmt) == GIMPLE_COND;
4083 if (is_cond)
4085 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4086 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4087 to *OTHER_BB (if not set yet, try to find it out). */
4088 if (EDGE_COUNT (bb->succs) != 2)
4089 return false;
4090 FOR_EACH_EDGE (e, ei, bb->succs)
4092 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4093 return false;
4094 if (e->dest == test_bb)
4096 if (backward)
4097 continue;
4098 else
4099 return false;
4101 if (e->dest == bb)
4102 return false;
4103 if (*other_bb == NULL)
4105 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4106 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4107 return false;
4108 else if (e->dest == e2->dest)
4109 *other_bb = e->dest;
4110 if (*other_bb == NULL)
4111 return false;
4113 if (e->dest == *other_bb)
4114 other_edge_seen = true;
4115 else if (backward)
4116 return false;
4118 if (*other_bb == NULL || !other_edge_seen)
4119 return false;
4121 else if (single_succ (bb) != *other_bb)
4122 return false;
4124 /* Now check all PHIs of *OTHER_BB. */
4125 e = find_edge (bb, *other_bb);
4126 e2 = find_edge (test_bb, *other_bb);
4127 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4129 gphi *phi = gsi.phi ();
4130 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4131 corresponding to BB and TEST_BB predecessor must be the same. */
4132 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4133 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4135 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4136 one of the PHIs should have the lhs of the last stmt in
4137 that block as PHI arg and that PHI should have 0 or 1
4138 corresponding to it in all other range test basic blocks
4139 considered. */
4140 if (!is_cond)
4142 if (gimple_phi_arg_def (phi, e->dest_idx)
4143 == gimple_assign_lhs (stmt)
4144 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4145 || integer_onep (gimple_phi_arg_def (phi,
4146 e2->dest_idx))))
4147 continue;
4149 else
4151 gimple *test_last = last_stmt (test_bb);
4152 if (gimple_code (test_last) != GIMPLE_COND
4153 && gimple_phi_arg_def (phi, e2->dest_idx)
4154 == gimple_assign_lhs (test_last)
4155 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
4156 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
4157 continue;
4160 return false;
4163 return true;
4166 /* Return true if BB doesn't have side-effects that would disallow
4167 range test optimization, all SSA_NAMEs set in the bb are consumed
4168 in the bb and there are no PHIs. */
4170 static bool
4171 no_side_effect_bb (basic_block bb)
4173 gimple_stmt_iterator gsi;
4174 gimple *last;
4176 if (!gimple_seq_empty_p (phi_nodes (bb)))
4177 return false;
4178 last = last_stmt (bb);
4179 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4181 gimple *stmt = gsi_stmt (gsi);
4182 tree lhs;
4183 imm_use_iterator imm_iter;
4184 use_operand_p use_p;
4186 if (is_gimple_debug (stmt))
4187 continue;
4188 if (gimple_has_side_effects (stmt))
4189 return false;
4190 if (stmt == last)
4191 return true;
4192 if (!is_gimple_assign (stmt))
4193 return false;
4194 lhs = gimple_assign_lhs (stmt);
4195 if (TREE_CODE (lhs) != SSA_NAME)
4196 return false;
4197 if (gimple_assign_rhs_could_trap_p (stmt))
4198 return false;
4199 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4201 gimple *use_stmt = USE_STMT (use_p);
4202 if (is_gimple_debug (use_stmt))
4203 continue;
4204 if (gimple_bb (use_stmt) != bb)
4205 return false;
4208 return false;
4211 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4212 return true and fill in *OPS recursively. */
4214 static bool
4215 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4216 class loop *loop)
4218 gimple *stmt = SSA_NAME_DEF_STMT (var);
4219 tree rhs[2];
4220 int i;
4222 if (!is_reassociable_op (stmt, code, loop))
4223 return false;
4225 rhs[0] = gimple_assign_rhs1 (stmt);
4226 rhs[1] = gimple_assign_rhs2 (stmt);
4227 gimple_set_visited (stmt, true);
4228 for (i = 0; i < 2; i++)
4229 if (TREE_CODE (rhs[i]) == SSA_NAME
4230 && !get_ops (rhs[i], code, ops, loop)
4231 && has_single_use (rhs[i]))
4233 operand_entry *oe = operand_entry_pool.allocate ();
4235 oe->op = rhs[i];
4236 oe->rank = code;
4237 oe->id = 0;
4238 oe->count = 1;
4239 oe->stmt_to_insert = NULL;
4240 ops->safe_push (oe);
4242 return true;
4245 /* Find the ops that were added by get_ops starting from VAR, see if
4246 they were changed during update_range_test and if yes, create new
4247 stmts. */
4249 static tree
4250 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
4251 unsigned int *pidx, class loop *loop)
4253 gimple *stmt = SSA_NAME_DEF_STMT (var);
4254 tree rhs[4];
4255 int i;
4257 if (!is_reassociable_op (stmt, code, loop))
4258 return NULL;
4260 rhs[0] = gimple_assign_rhs1 (stmt);
4261 rhs[1] = gimple_assign_rhs2 (stmt);
4262 rhs[2] = rhs[0];
4263 rhs[3] = rhs[1];
4264 for (i = 0; i < 2; i++)
4265 if (TREE_CODE (rhs[i]) == SSA_NAME)
4267 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4268 if (rhs[2 + i] == NULL_TREE)
4270 if (has_single_use (rhs[i]))
4271 rhs[2 + i] = ops[(*pidx)++]->op;
4272 else
4273 rhs[2 + i] = rhs[i];
4276 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4277 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4279 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4280 var = make_ssa_name (TREE_TYPE (var));
4281 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4282 rhs[2], rhs[3]);
4283 gimple_set_uid (g, gimple_uid (stmt));
4284 gimple_set_visited (g, true);
4285 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4287 return var;
4290 /* Structure to track the initial value passed to get_ops and
4291 the range in the ops vector for each basic block. */
4293 struct inter_bb_range_test_entry
4295 tree op;
4296 unsigned int first_idx, last_idx;
4299 /* Inter-bb range test optimization.
4301 Returns TRUE if a gimple conditional is optimized to a true/false,
4302 otherwise return FALSE.
4304 This indicates to the caller that it should run a CFG cleanup pass
4305 once reassociation is completed. */
4307 static bool
4308 maybe_optimize_range_tests (gimple *stmt)
4310 basic_block first_bb = gimple_bb (stmt);
4311 basic_block last_bb = first_bb;
4312 basic_block other_bb = NULL;
4313 basic_block bb;
4314 edge_iterator ei;
4315 edge e;
4316 auto_vec<operand_entry *> ops;
4317 auto_vec<inter_bb_range_test_entry> bbinfo;
4318 bool any_changes = false;
4319 bool cfg_cleanup_needed = false;
4321 /* Consider only basic blocks that end with GIMPLE_COND or
4322 a cast statement satisfying final_range_test_p. All
4323 but the last bb in the first_bb .. last_bb range
4324 should end with GIMPLE_COND. */
4325 if (gimple_code (stmt) == GIMPLE_COND)
4327 if (EDGE_COUNT (first_bb->succs) != 2)
4328 return cfg_cleanup_needed;
4330 else if (final_range_test_p (stmt))
4331 other_bb = single_succ (first_bb);
4332 else
4333 return cfg_cleanup_needed;
4335 if (stmt_could_throw_p (cfun, stmt))
4336 return cfg_cleanup_needed;
4338 /* As relative ordering of post-dominator sons isn't fixed,
4339 maybe_optimize_range_tests can be called first on any
4340 bb in the range we want to optimize. So, start searching
4341 backwards, if first_bb can be set to a predecessor. */
4342 while (single_pred_p (first_bb))
4344 basic_block pred_bb = single_pred (first_bb);
4345 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
4346 break;
4347 if (!no_side_effect_bb (first_bb))
4348 break;
4349 first_bb = pred_bb;
4351 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4352 Before starting forward search in last_bb successors, find
4353 out the other_bb. */
4354 if (first_bb == last_bb)
4356 other_bb = NULL;
4357 /* As non-GIMPLE_COND last stmt always terminates the range,
4358 if forward search didn't discover anything, just give up. */
4359 if (gimple_code (stmt) != GIMPLE_COND)
4360 return cfg_cleanup_needed;
4361 /* Look at both successors. Either it ends with a GIMPLE_COND
4362 and satisfies suitable_cond_bb, or ends with a cast and
4363 other_bb is that cast's successor. */
4364 FOR_EACH_EDGE (e, ei, first_bb->succs)
4365 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4366 || e->dest == first_bb)
4367 return cfg_cleanup_needed;
4368 else if (single_pred_p (e->dest))
4370 stmt = last_stmt (e->dest);
4371 if (stmt
4372 && gimple_code (stmt) == GIMPLE_COND
4373 && EDGE_COUNT (e->dest->succs) == 2)
4375 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
4376 break;
4377 else
4378 other_bb = NULL;
4380 else if (stmt
4381 && final_range_test_p (stmt)
4382 && find_edge (first_bb, single_succ (e->dest)))
4384 other_bb = single_succ (e->dest);
4385 if (other_bb == first_bb)
4386 other_bb = NULL;
4389 if (other_bb == NULL)
4390 return cfg_cleanup_needed;
4392 /* Now do the forward search, moving last_bb to successor bbs
4393 that aren't other_bb. */
4394 while (EDGE_COUNT (last_bb->succs) == 2)
4396 FOR_EACH_EDGE (e, ei, last_bb->succs)
4397 if (e->dest != other_bb)
4398 break;
4399 if (e == NULL)
4400 break;
4401 if (!single_pred_p (e->dest))
4402 break;
4403 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
4404 break;
4405 if (!no_side_effect_bb (e->dest))
4406 break;
4407 last_bb = e->dest;
4409 if (first_bb == last_bb)
4410 return cfg_cleanup_needed;
4411 /* Here basic blocks first_bb through last_bb's predecessor
4412 end with GIMPLE_COND, all of them have one of the edges to
4413 other_bb and another to another block in the range,
4414 all blocks except first_bb don't have side-effects and
4415 last_bb ends with either GIMPLE_COND, or cast satisfying
4416 final_range_test_p. */
4417 for (bb = last_bb; ; bb = single_pred (bb))
4419 enum tree_code code;
4420 tree lhs, rhs;
4421 inter_bb_range_test_entry bb_ent;
4423 bb_ent.op = NULL_TREE;
4424 bb_ent.first_idx = ops.length ();
4425 bb_ent.last_idx = bb_ent.first_idx;
4426 e = find_edge (bb, other_bb);
4427 stmt = last_stmt (bb);
4428 gimple_set_visited (stmt, true);
4429 if (gimple_code (stmt) != GIMPLE_COND)
4431 use_operand_p use_p;
4432 gimple *phi;
4433 edge e2;
4434 unsigned int d;
4436 lhs = gimple_assign_lhs (stmt);
4437 rhs = gimple_assign_rhs1 (stmt);
4438 gcc_assert (bb == last_bb);
4440 /* stmt is
4441 _123 = (int) _234;
4443 _234 = a_2(D) == 2;
4445 followed by:
4446 <bb M>:
4447 # _345 = PHI <_123(N), 1(...), 1(...)>
4449 or 0 instead of 1. If it is 0, the _234
4450 range test is anded together with all the
4451 other range tests, if it is 1, it is ored with
4452 them. */
4453 single_imm_use (lhs, &use_p, &phi);
4454 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4455 e2 = find_edge (first_bb, other_bb);
4456 d = e2->dest_idx;
4457 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4458 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4459 code = BIT_AND_EXPR;
4460 else
4462 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4463 code = BIT_IOR_EXPR;
4466 /* If _234 SSA_NAME_DEF_STMT is
4467 _234 = _567 | _789;
4468 (or &, corresponding to 1/0 in the phi arguments,
4469 push into ops the individual range test arguments
4470 of the bitwise or resp. and, recursively. */
4471 if (TREE_CODE (rhs) == SSA_NAME
4472 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4473 != tcc_comparison)
4474 && !get_ops (rhs, code, &ops,
4475 loop_containing_stmt (stmt))
4476 && has_single_use (rhs))
4478 /* Otherwise, push the _234 range test itself. */
4479 operand_entry *oe = operand_entry_pool.allocate ();
4481 oe->op = rhs;
4482 oe->rank = code;
4483 oe->id = 0;
4484 oe->count = 1;
4485 oe->stmt_to_insert = NULL;
4486 ops.safe_push (oe);
4487 bb_ent.last_idx++;
4488 bb_ent.op = rhs;
4490 else if (is_gimple_assign (stmt)
4491 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4492 == tcc_comparison)
4493 && !get_ops (lhs, code, &ops,
4494 loop_containing_stmt (stmt))
4495 && has_single_use (lhs))
4497 operand_entry *oe = operand_entry_pool.allocate ();
4498 oe->op = lhs;
4499 oe->rank = code;
4500 oe->id = 0;
4501 oe->count = 1;
4502 ops.safe_push (oe);
4503 bb_ent.last_idx++;
4504 bb_ent.op = lhs;
4506 else
4508 bb_ent.last_idx = ops.length ();
4509 bb_ent.op = rhs;
4511 bbinfo.safe_push (bb_ent);
4512 continue;
4514 /* Otherwise stmt is GIMPLE_COND. */
4515 code = gimple_cond_code (stmt);
4516 lhs = gimple_cond_lhs (stmt);
4517 rhs = gimple_cond_rhs (stmt);
4518 if (TREE_CODE (lhs) == SSA_NAME
4519 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4520 && ((code != EQ_EXPR && code != NE_EXPR)
4521 || rhs != boolean_false_node
4522 /* Either push into ops the individual bitwise
4523 or resp. and operands, depending on which
4524 edge is other_bb. */
4525 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4526 ^ (code == EQ_EXPR))
4527 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4528 loop_containing_stmt (stmt))))
4530 /* Or push the GIMPLE_COND stmt itself. */
4531 operand_entry *oe = operand_entry_pool.allocate ();
4533 oe->op = NULL;
4534 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4535 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4536 /* oe->op = NULL signs that there is no SSA_NAME
4537 for the range test, and oe->id instead is the
4538 basic block number, at which's end the GIMPLE_COND
4539 is. */
4540 oe->id = bb->index;
4541 oe->count = 1;
4542 oe->stmt_to_insert = NULL;
4543 ops.safe_push (oe);
4544 bb_ent.op = NULL;
4545 bb_ent.last_idx++;
4547 else if (ops.length () > bb_ent.first_idx)
4549 bb_ent.op = lhs;
4550 bb_ent.last_idx = ops.length ();
4552 bbinfo.safe_push (bb_ent);
4553 if (bb == first_bb)
4554 break;
4556 if (ops.length () > 1)
4557 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4558 if (any_changes)
4560 unsigned int idx, max_idx = 0;
4561 /* update_ops relies on has_single_use predicates returning the
4562 same values as it did during get_ops earlier. Additionally it
4563 never removes statements, only adds new ones and it should walk
4564 from the single imm use and check the predicate already before
4565 making those changes.
4566 On the other side, the handling of GIMPLE_COND directly can turn
4567 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4568 it needs to be done in a separate loop afterwards. */
4569 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4571 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4572 && bbinfo[idx].op != NULL_TREE)
4574 tree new_op;
4576 max_idx = idx;
4577 stmt = last_stmt (bb);
4578 new_op = update_ops (bbinfo[idx].op,
4579 (enum tree_code)
4580 ops[bbinfo[idx].first_idx]->rank,
4581 ops, &bbinfo[idx].first_idx,
4582 loop_containing_stmt (stmt));
4583 if (new_op == NULL_TREE)
4585 gcc_assert (bb == last_bb);
4586 new_op = ops[bbinfo[idx].first_idx++]->op;
4588 if (bbinfo[idx].op != new_op)
4590 imm_use_iterator iter;
4591 use_operand_p use_p;
4592 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4594 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4595 if (is_gimple_debug (use_stmt))
4596 continue;
4597 else if (gimple_code (use_stmt) == GIMPLE_COND
4598 || gimple_code (use_stmt) == GIMPLE_PHI)
4599 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4600 SET_USE (use_p, new_op);
4601 else if ((is_gimple_assign (use_stmt)
4602 && (TREE_CODE_CLASS
4603 (gimple_assign_rhs_code (use_stmt))
4604 == tcc_comparison)))
4605 cast_or_tcc_cmp_stmt = use_stmt;
4606 else if (gimple_assign_cast_p (use_stmt))
4607 cast_or_tcc_cmp_stmt = use_stmt;
4608 else
4609 gcc_unreachable ();
4611 if (cast_or_tcc_cmp_stmt)
4613 gcc_assert (bb == last_bb);
4614 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4615 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4616 enum tree_code rhs_code
4617 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4618 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4619 : CONVERT_EXPR;
4620 gassign *g;
4621 if (is_gimple_min_invariant (new_op))
4623 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4624 g = gimple_build_assign (new_lhs, new_op);
4626 else
4627 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4628 gimple_stmt_iterator gsi
4629 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4630 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4631 gimple_set_visited (g, true);
4632 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4633 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4634 if (is_gimple_debug (use_stmt))
4635 continue;
4636 else if (gimple_code (use_stmt) == GIMPLE_COND
4637 || gimple_code (use_stmt) == GIMPLE_PHI)
4638 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4639 SET_USE (use_p, new_lhs);
4640 else
4641 gcc_unreachable ();
4645 if (bb == first_bb)
4646 break;
4648 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4650 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4651 && bbinfo[idx].op == NULL_TREE
4652 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4654 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4656 if (idx > max_idx)
4657 max_idx = idx;
4659 /* If we collapse the conditional to a true/false
4660 condition, then bubble that knowledge up to our caller. */
4661 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4663 gimple_cond_make_false (cond_stmt);
4664 cfg_cleanup_needed = true;
4666 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4668 gimple_cond_make_true (cond_stmt);
4669 cfg_cleanup_needed = true;
4671 else
4673 gimple_cond_set_code (cond_stmt, NE_EXPR);
4674 gimple_cond_set_lhs (cond_stmt,
4675 ops[bbinfo[idx].first_idx]->op);
4676 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4678 update_stmt (cond_stmt);
4680 if (bb == first_bb)
4681 break;
4684 /* The above changes could result in basic blocks after the first
4685 modified one, up to and including last_bb, to be executed even if
4686 they would not be in the original program. If the value ranges of
4687 assignment lhs' in those bbs were dependent on the conditions
4688 guarding those basic blocks which now can change, the VRs might
4689 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4690 are only used within the same bb, it should be not a big deal if
4691 we just reset all the VRs in those bbs. See PR68671. */
4692 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4693 reset_flow_sensitive_info_in_bb (bb);
4695 return cfg_cleanup_needed;
4698 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4699 of STMT in it's operands. This is also known as a "destructive
4700 update" operation. */
4702 static bool
4703 is_phi_for_stmt (gimple *stmt, tree operand)
4705 gimple *def_stmt;
4706 gphi *def_phi;
4707 tree lhs;
4708 use_operand_p arg_p;
4709 ssa_op_iter i;
4711 if (TREE_CODE (operand) != SSA_NAME)
4712 return false;
4714 lhs = gimple_assign_lhs (stmt);
4716 def_stmt = SSA_NAME_DEF_STMT (operand);
4717 def_phi = dyn_cast <gphi *> (def_stmt);
4718 if (!def_phi)
4719 return false;
4721 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4722 if (lhs == USE_FROM_PTR (arg_p))
4723 return true;
4724 return false;
4727 /* Remove def stmt of VAR if VAR has zero uses and recurse
4728 on rhs1 operand if so. */
4730 static void
4731 remove_visited_stmt_chain (tree var)
4733 gimple *stmt;
4734 gimple_stmt_iterator gsi;
4736 while (1)
4738 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4739 return;
4740 stmt = SSA_NAME_DEF_STMT (var);
4741 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4743 var = gimple_assign_rhs1 (stmt);
4744 gsi = gsi_for_stmt (stmt);
4745 reassoc_remove_stmt (&gsi);
4746 release_defs (stmt);
4748 else
4749 return;
4753 /* This function checks three consequtive operands in
4754 passed operands vector OPS starting from OPINDEX and
4755 swaps two operands if it is profitable for binary operation
4756 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4758 We pair ops with the same rank if possible.
4760 The alternative we try is to see if STMT is a destructive
4761 update style statement, which is like:
4762 b = phi (a, ...)
4763 a = c + b;
4764 In that case, we want to use the destructive update form to
4765 expose the possible vectorizer sum reduction opportunity.
4766 In that case, the third operand will be the phi node. This
4767 check is not performed if STMT is null.
4769 We could, of course, try to be better as noted above, and do a
4770 lot of work to try to find these opportunities in >3 operand
4771 cases, but it is unlikely to be worth it. */
4773 static void
4774 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4775 unsigned int opindex, gimple *stmt)
4777 operand_entry *oe1, *oe2, *oe3;
4779 oe1 = ops[opindex];
4780 oe2 = ops[opindex + 1];
4781 oe3 = ops[opindex + 2];
4783 if ((oe1->rank == oe2->rank
4784 && oe2->rank != oe3->rank)
4785 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4786 && !is_phi_for_stmt (stmt, oe1->op)
4787 && !is_phi_for_stmt (stmt, oe2->op)))
4788 std::swap (*oe1, *oe3);
4789 else if ((oe1->rank == oe3->rank
4790 && oe2->rank != oe3->rank)
4791 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4792 && !is_phi_for_stmt (stmt, oe1->op)
4793 && !is_phi_for_stmt (stmt, oe3->op)))
4794 std::swap (*oe1, *oe2);
4797 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4798 two definitions, otherwise return STMT. */
4800 static inline gimple *
4801 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4803 if (TREE_CODE (rhs1) == SSA_NAME
4804 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4805 stmt = SSA_NAME_DEF_STMT (rhs1);
4806 if (TREE_CODE (rhs2) == SSA_NAME
4807 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4808 stmt = SSA_NAME_DEF_STMT (rhs2);
4809 return stmt;
4812 /* If the stmt that defines operand has to be inserted, insert it
4813 before the use. */
4814 static void
4815 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4817 gcc_assert (is_gimple_assign (stmt_to_insert));
4818 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4819 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4820 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4821 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4822 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4824 /* If the insert point is not stmt, then insert_point would be
4825 the point where operand rhs1 or rhs2 is defined. In this case,
4826 stmt_to_insert has to be inserted afterwards. This would
4827 only happen when the stmt insertion point is flexible. */
4828 if (stmt == insert_point)
4829 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4830 else
4831 insert_stmt_after (stmt_to_insert, insert_point);
4835 /* Recursively rewrite our linearized statements so that the operators
4836 match those in OPS[OPINDEX], putting the computation in rank
4837 order. Return new lhs.
4838 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4839 the current stmt and during recursive invocations.
4840 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4841 recursive invocations. */
4843 static tree
4844 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4845 vec<operand_entry *> ops, bool changed, bool next_changed)
4847 tree rhs1 = gimple_assign_rhs1 (stmt);
4848 tree rhs2 = gimple_assign_rhs2 (stmt);
4849 tree lhs = gimple_assign_lhs (stmt);
4850 operand_entry *oe;
4852 /* The final recursion case for this function is that you have
4853 exactly two operations left.
4854 If we had exactly one op in the entire list to start with, we
4855 would have never called this function, and the tail recursion
4856 rewrites them one at a time. */
4857 if (opindex + 2 == ops.length ())
4859 operand_entry *oe1, *oe2;
4861 oe1 = ops[opindex];
4862 oe2 = ops[opindex + 1];
4864 if (rhs1 != oe1->op || rhs2 != oe2->op)
4866 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4867 unsigned int uid = gimple_uid (stmt);
4869 if (dump_file && (dump_flags & TDF_DETAILS))
4871 fprintf (dump_file, "Transforming ");
4872 print_gimple_stmt (dump_file, stmt, 0);
4875 /* If the stmt that defines operand has to be inserted, insert it
4876 before the use. */
4877 if (oe1->stmt_to_insert)
4878 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4879 if (oe2->stmt_to_insert)
4880 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4881 /* Even when changed is false, reassociation could have e.g. removed
4882 some redundant operations, so unless we are just swapping the
4883 arguments or unless there is no change at all (then we just
4884 return lhs), force creation of a new SSA_NAME. */
4885 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4887 gimple *insert_point
4888 = find_insert_point (stmt, oe1->op, oe2->op);
4889 lhs = make_ssa_name (TREE_TYPE (lhs));
4890 stmt
4891 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4892 oe1->op, oe2->op);
4893 gimple_set_uid (stmt, uid);
4894 gimple_set_visited (stmt, true);
4895 if (insert_point == gsi_stmt (gsi))
4896 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4897 else
4898 insert_stmt_after (stmt, insert_point);
4900 else
4902 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4903 == stmt);
4904 gimple_assign_set_rhs1 (stmt, oe1->op);
4905 gimple_assign_set_rhs2 (stmt, oe2->op);
4906 update_stmt (stmt);
4909 if (rhs1 != oe1->op && rhs1 != oe2->op)
4910 remove_visited_stmt_chain (rhs1);
4912 if (dump_file && (dump_flags & TDF_DETAILS))
4914 fprintf (dump_file, " into ");
4915 print_gimple_stmt (dump_file, stmt, 0);
4918 return lhs;
4921 /* If we hit here, we should have 3 or more ops left. */
4922 gcc_assert (opindex + 2 < ops.length ());
4924 /* Rewrite the next operator. */
4925 oe = ops[opindex];
4927 /* If the stmt that defines operand has to be inserted, insert it
4928 before the use. */
4929 if (oe->stmt_to_insert)
4930 insert_stmt_before_use (stmt, oe->stmt_to_insert);
4932 /* Recurse on the LHS of the binary operator, which is guaranteed to
4933 be the non-leaf side. */
4934 tree new_rhs1
4935 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4936 changed || oe->op != rhs2 || next_changed,
4937 false);
4939 if (oe->op != rhs2 || new_rhs1 != rhs1)
4941 if (dump_file && (dump_flags & TDF_DETAILS))
4943 fprintf (dump_file, "Transforming ");
4944 print_gimple_stmt (dump_file, stmt, 0);
4947 /* If changed is false, this is either opindex == 0
4948 or all outer rhs2's were equal to corresponding oe->op,
4949 and powi_result is NULL.
4950 That means lhs is equivalent before and after reassociation.
4951 Otherwise ensure the old lhs SSA_NAME is not reused and
4952 create a new stmt as well, so that any debug stmts will be
4953 properly adjusted. */
4954 if (changed)
4956 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4957 unsigned int uid = gimple_uid (stmt);
4958 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4960 lhs = make_ssa_name (TREE_TYPE (lhs));
4961 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4962 new_rhs1, oe->op);
4963 gimple_set_uid (stmt, uid);
4964 gimple_set_visited (stmt, true);
4965 if (insert_point == gsi_stmt (gsi))
4966 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4967 else
4968 insert_stmt_after (stmt, insert_point);
4970 else
4972 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4973 == stmt);
4974 gimple_assign_set_rhs1 (stmt, new_rhs1);
4975 gimple_assign_set_rhs2 (stmt, oe->op);
4976 update_stmt (stmt);
4979 if (dump_file && (dump_flags & TDF_DETAILS))
4981 fprintf (dump_file, " into ");
4982 print_gimple_stmt (dump_file, stmt, 0);
4985 return lhs;
4988 /* Find out how many cycles we need to compute statements chain.
4989 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4990 maximum number of independent statements we may execute per cycle. */
4992 static int
4993 get_required_cycles (int ops_num, int cpu_width)
4995 int res;
4996 int elog;
4997 unsigned int rest;
4999 /* While we have more than 2 * cpu_width operands
5000 we may reduce number of operands by cpu_width
5001 per cycle. */
5002 res = ops_num / (2 * cpu_width);
5004 /* Remained operands count may be reduced twice per cycle
5005 until we have only one operand. */
5006 rest = (unsigned)(ops_num - res * cpu_width);
5007 elog = exact_log2 (rest);
5008 if (elog >= 0)
5009 res += elog;
5010 else
5011 res += floor_log2 (rest) + 1;
5013 return res;
5016 /* Returns an optimal number of registers to use for computation of
5017 given statements. */
5019 static int
5020 get_reassociation_width (int ops_num, enum tree_code opc,
5021 machine_mode mode)
5023 int param_width = param_tree_reassoc_width;
5024 int width;
5025 int width_min;
5026 int cycles_best;
5028 if (param_width > 0)
5029 width = param_width;
5030 else
5031 width = targetm.sched.reassociation_width (opc, mode);
5033 if (width == 1)
5034 return width;
5036 /* Get the minimal time required for sequence computation. */
5037 cycles_best = get_required_cycles (ops_num, width);
5039 /* Check if we may use less width and still compute sequence for
5040 the same time. It will allow us to reduce registers usage.
5041 get_required_cycles is monotonically increasing with lower width
5042 so we can perform a binary search for the minimal width that still
5043 results in the optimal cycle count. */
5044 width_min = 1;
5045 while (width > width_min)
5047 int width_mid = (width + width_min) / 2;
5049 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5050 width = width_mid;
5051 else if (width_min < width_mid)
5052 width_min = width_mid;
5053 else
5054 break;
5057 return width;
5060 /* Recursively rewrite our linearized statements so that the operators
5061 match those in OPS[OPINDEX], putting the computation in rank
5062 order and trying to allow operations to be executed in
5063 parallel. */
5065 static void
5066 rewrite_expr_tree_parallel (gassign *stmt, int width,
5067 vec<operand_entry *> ops)
5069 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5070 int op_num = ops.length ();
5071 gcc_assert (op_num > 0);
5072 int stmt_num = op_num - 1;
5073 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5074 int op_index = op_num - 1;
5075 int stmt_index = 0;
5076 int ready_stmts_end = 0;
5077 int i = 0;
5078 gimple *stmt1 = NULL, *stmt2 = NULL;
5079 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5081 /* We start expression rewriting from the top statements.
5082 So, in this loop we create a full list of statements
5083 we will work with. */
5084 stmts[stmt_num - 1] = stmt;
5085 for (i = stmt_num - 2; i >= 0; i--)
5086 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5088 for (i = 0; i < stmt_num; i++)
5090 tree op1, op2;
5092 /* Determine whether we should use results of
5093 already handled statements or not. */
5094 if (ready_stmts_end == 0
5095 && (i - stmt_index >= width || op_index < 1))
5096 ready_stmts_end = i;
5098 /* Now we choose operands for the next statement. Non zero
5099 value in ready_stmts_end means here that we should use
5100 the result of already generated statements as new operand. */
5101 if (ready_stmts_end > 0)
5103 op1 = gimple_assign_lhs (stmts[stmt_index++]);
5104 if (ready_stmts_end > stmt_index)
5105 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5106 else if (op_index >= 0)
5108 operand_entry *oe = ops[op_index--];
5109 stmt2 = oe->stmt_to_insert;
5110 op2 = oe->op;
5112 else
5114 gcc_assert (stmt_index < i);
5115 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5118 if (stmt_index >= ready_stmts_end)
5119 ready_stmts_end = 0;
5121 else
5123 if (op_index > 1)
5124 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5125 operand_entry *oe2 = ops[op_index--];
5126 operand_entry *oe1 = ops[op_index--];
5127 op2 = oe2->op;
5128 stmt2 = oe2->stmt_to_insert;
5129 op1 = oe1->op;
5130 stmt1 = oe1->stmt_to_insert;
5133 /* If we emit the last statement then we should put
5134 operands into the last statement. It will also
5135 break the loop. */
5136 if (op_index < 0 && stmt_index == i)
5137 i = stmt_num - 1;
5139 if (dump_file && (dump_flags & TDF_DETAILS))
5141 fprintf (dump_file, "Transforming ");
5142 print_gimple_stmt (dump_file, stmts[i], 0);
5145 /* If the stmt that defines operand has to be inserted, insert it
5146 before the use. */
5147 if (stmt1)
5148 insert_stmt_before_use (stmts[i], stmt1);
5149 if (stmt2)
5150 insert_stmt_before_use (stmts[i], stmt2);
5151 stmt1 = stmt2 = NULL;
5153 /* We keep original statement only for the last one. All
5154 others are recreated. */
5155 if (i == stmt_num - 1)
5157 gimple_assign_set_rhs1 (stmts[i], op1);
5158 gimple_assign_set_rhs2 (stmts[i], op2);
5159 update_stmt (stmts[i]);
5161 else
5163 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5164 gimple_set_visited (stmts[i], true);
5166 if (dump_file && (dump_flags & TDF_DETAILS))
5168 fprintf (dump_file, " into ");
5169 print_gimple_stmt (dump_file, stmts[i], 0);
5173 remove_visited_stmt_chain (last_rhs1);
5176 /* Transform STMT, which is really (A +B) + (C + D) into the left
5177 linear form, ((A+B)+C)+D.
5178 Recurse on D if necessary. */
5180 static void
5181 linearize_expr (gimple *stmt)
5183 gimple_stmt_iterator gsi;
5184 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5185 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5186 gimple *oldbinrhs = binrhs;
5187 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5188 gimple *newbinrhs = NULL;
5189 class loop *loop = loop_containing_stmt (stmt);
5190 tree lhs = gimple_assign_lhs (stmt);
5192 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5193 && is_reassociable_op (binrhs, rhscode, loop));
5195 gsi = gsi_for_stmt (stmt);
5197 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5198 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5199 gimple_assign_rhs_code (binrhs),
5200 gimple_assign_lhs (binlhs),
5201 gimple_assign_rhs2 (binrhs));
5202 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5203 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5204 gimple_set_uid (binrhs, gimple_uid (stmt));
5206 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5207 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5209 if (dump_file && (dump_flags & TDF_DETAILS))
5211 fprintf (dump_file, "Linearized: ");
5212 print_gimple_stmt (dump_file, stmt, 0);
5215 reassociate_stats.linearized++;
5216 update_stmt (stmt);
5218 gsi = gsi_for_stmt (oldbinrhs);
5219 reassoc_remove_stmt (&gsi);
5220 release_defs (oldbinrhs);
5222 gimple_set_visited (stmt, true);
5223 gimple_set_visited (binlhs, true);
5224 gimple_set_visited (binrhs, true);
5226 /* Tail recurse on the new rhs if it still needs reassociation. */
5227 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5228 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5229 want to change the algorithm while converting to tuples. */
5230 linearize_expr (stmt);
5233 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5234 it. Otherwise, return NULL. */
5236 static gimple *
5237 get_single_immediate_use (tree lhs)
5239 use_operand_p immuse;
5240 gimple *immusestmt;
5242 if (TREE_CODE (lhs) == SSA_NAME
5243 && single_imm_use (lhs, &immuse, &immusestmt)
5244 && is_gimple_assign (immusestmt))
5245 return immusestmt;
5247 return NULL;
5250 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5251 representing the negated value. Insertions of any necessary
5252 instructions go before GSI.
5253 This function is recursive in that, if you hand it "a_5" as the
5254 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5255 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5257 static tree
5258 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5260 gimple *negatedefstmt = NULL;
5261 tree resultofnegate;
5262 gimple_stmt_iterator gsi;
5263 unsigned int uid;
5265 /* If we are trying to negate a name, defined by an add, negate the
5266 add operands instead. */
5267 if (TREE_CODE (tonegate) == SSA_NAME)
5268 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5269 if (TREE_CODE (tonegate) == SSA_NAME
5270 && is_gimple_assign (negatedefstmt)
5271 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5272 && has_single_use (gimple_assign_lhs (negatedefstmt))
5273 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5275 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5276 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5277 tree lhs = gimple_assign_lhs (negatedefstmt);
5278 gimple *g;
5280 gsi = gsi_for_stmt (negatedefstmt);
5281 rhs1 = negate_value (rhs1, &gsi);
5283 gsi = gsi_for_stmt (negatedefstmt);
5284 rhs2 = negate_value (rhs2, &gsi);
5286 gsi = gsi_for_stmt (negatedefstmt);
5287 lhs = make_ssa_name (TREE_TYPE (lhs));
5288 gimple_set_visited (negatedefstmt, true);
5289 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5290 gimple_set_uid (g, gimple_uid (negatedefstmt));
5291 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5292 return lhs;
5295 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5296 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5297 NULL_TREE, true, GSI_SAME_STMT);
5298 gsi = *gsip;
5299 uid = gimple_uid (gsi_stmt (gsi));
5300 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5302 gimple *stmt = gsi_stmt (gsi);
5303 if (gimple_uid (stmt) != 0)
5304 break;
5305 gimple_set_uid (stmt, uid);
5307 return resultofnegate;
5310 /* Return true if we should break up the subtract in STMT into an add
5311 with negate. This is true when we the subtract operands are really
5312 adds, or the subtract itself is used in an add expression. In
5313 either case, breaking up the subtract into an add with negate
5314 exposes the adds to reassociation. */
5316 static bool
5317 should_break_up_subtract (gimple *stmt)
5319 tree lhs = gimple_assign_lhs (stmt);
5320 tree binlhs = gimple_assign_rhs1 (stmt);
5321 tree binrhs = gimple_assign_rhs2 (stmt);
5322 gimple *immusestmt;
5323 class loop *loop = loop_containing_stmt (stmt);
5325 if (TREE_CODE (binlhs) == SSA_NAME
5326 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5327 return true;
5329 if (TREE_CODE (binrhs) == SSA_NAME
5330 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5331 return true;
5333 if (TREE_CODE (lhs) == SSA_NAME
5334 && (immusestmt = get_single_immediate_use (lhs))
5335 && is_gimple_assign (immusestmt)
5336 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5337 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5338 && gimple_assign_rhs1 (immusestmt) == lhs)
5339 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5340 return true;
5341 return false;
5344 /* Transform STMT from A - B into A + -B. */
5346 static void
5347 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5349 tree rhs1 = gimple_assign_rhs1 (stmt);
5350 tree rhs2 = gimple_assign_rhs2 (stmt);
5352 if (dump_file && (dump_flags & TDF_DETAILS))
5354 fprintf (dump_file, "Breaking up subtract ");
5355 print_gimple_stmt (dump_file, stmt, 0);
5358 rhs2 = negate_value (rhs2, gsip);
5359 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5360 update_stmt (stmt);
5363 /* Determine whether STMT is a builtin call that raises an SSA name
5364 to an integer power and has only one use. If so, and this is early
5365 reassociation and unsafe math optimizations are permitted, place
5366 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5367 If any of these conditions does not hold, return FALSE. */
5369 static bool
5370 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5372 tree arg1;
5373 REAL_VALUE_TYPE c, cint;
5375 switch (gimple_call_combined_fn (stmt))
5377 CASE_CFN_POW:
5378 if (flag_errno_math)
5379 return false;
5381 *base = gimple_call_arg (stmt, 0);
5382 arg1 = gimple_call_arg (stmt, 1);
5384 if (TREE_CODE (arg1) != REAL_CST)
5385 return false;
5387 c = TREE_REAL_CST (arg1);
5389 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5390 return false;
5392 *exponent = real_to_integer (&c);
5393 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5394 if (!real_identical (&c, &cint))
5395 return false;
5397 break;
5399 CASE_CFN_POWI:
5400 *base = gimple_call_arg (stmt, 0);
5401 arg1 = gimple_call_arg (stmt, 1);
5403 if (!tree_fits_shwi_p (arg1))
5404 return false;
5406 *exponent = tree_to_shwi (arg1);
5407 break;
5409 default:
5410 return false;
5413 /* Expanding negative exponents is generally unproductive, so we don't
5414 complicate matters with those. Exponents of zero and one should
5415 have been handled by expression folding. */
5416 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5417 return false;
5419 return true;
5422 /* Try to derive and add operand entry for OP to *OPS. Return false if
5423 unsuccessful. */
5425 static bool
5426 try_special_add_to_ops (vec<operand_entry *> *ops,
5427 enum tree_code code,
5428 tree op, gimple* def_stmt)
5430 tree base = NULL_TREE;
5431 HOST_WIDE_INT exponent = 0;
5433 if (TREE_CODE (op) != SSA_NAME
5434 || ! has_single_use (op))
5435 return false;
5437 if (code == MULT_EXPR
5438 && reassoc_insert_powi_p
5439 && flag_unsafe_math_optimizations
5440 && is_gimple_call (def_stmt)
5441 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5443 add_repeat_to_ops_vec (ops, base, exponent);
5444 gimple_set_visited (def_stmt, true);
5445 return true;
5447 else if (code == MULT_EXPR
5448 && is_gimple_assign (def_stmt)
5449 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5450 && !HONOR_SNANS (TREE_TYPE (op))
5451 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5452 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5454 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5455 tree cst = build_minus_one_cst (TREE_TYPE (op));
5456 add_to_ops_vec (ops, rhs1);
5457 add_to_ops_vec (ops, cst);
5458 gimple_set_visited (def_stmt, true);
5459 return true;
5462 return false;
5465 /* Recursively linearize a binary expression that is the RHS of STMT.
5466 Place the operands of the expression tree in the vector named OPS. */
5468 static void
5469 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5470 bool is_associative, bool set_visited)
5472 tree binlhs = gimple_assign_rhs1 (stmt);
5473 tree binrhs = gimple_assign_rhs2 (stmt);
5474 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5475 bool binlhsisreassoc = false;
5476 bool binrhsisreassoc = false;
5477 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5478 class loop *loop = loop_containing_stmt (stmt);
5480 if (set_visited)
5481 gimple_set_visited (stmt, true);
5483 if (TREE_CODE (binlhs) == SSA_NAME)
5485 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5486 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5487 && !stmt_could_throw_p (cfun, binlhsdef));
5490 if (TREE_CODE (binrhs) == SSA_NAME)
5492 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5493 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5494 && !stmt_could_throw_p (cfun, binrhsdef));
5497 /* If the LHS is not reassociable, but the RHS is, we need to swap
5498 them. If neither is reassociable, there is nothing we can do, so
5499 just put them in the ops vector. If the LHS is reassociable,
5500 linearize it. If both are reassociable, then linearize the RHS
5501 and the LHS. */
5503 if (!binlhsisreassoc)
5505 /* If this is not a associative operation like division, give up. */
5506 if (!is_associative)
5508 add_to_ops_vec (ops, binrhs);
5509 return;
5512 if (!binrhsisreassoc)
5514 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5515 add_to_ops_vec (ops, binrhs);
5517 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5518 add_to_ops_vec (ops, binlhs);
5520 return;
5523 if (dump_file && (dump_flags & TDF_DETAILS))
5525 fprintf (dump_file, "swapping operands of ");
5526 print_gimple_stmt (dump_file, stmt, 0);
5529 swap_ssa_operands (stmt,
5530 gimple_assign_rhs1_ptr (stmt),
5531 gimple_assign_rhs2_ptr (stmt));
5532 update_stmt (stmt);
5534 if (dump_file && (dump_flags & TDF_DETAILS))
5536 fprintf (dump_file, " is now ");
5537 print_gimple_stmt (dump_file, stmt, 0);
5540 /* We want to make it so the lhs is always the reassociative op,
5541 so swap. */
5542 std::swap (binlhs, binrhs);
5544 else if (binrhsisreassoc)
5546 linearize_expr (stmt);
5547 binlhs = gimple_assign_rhs1 (stmt);
5548 binrhs = gimple_assign_rhs2 (stmt);
5551 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5552 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5553 rhscode, loop));
5554 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5555 is_associative, set_visited);
5557 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5558 add_to_ops_vec (ops, binrhs);
5561 /* Repropagate the negates back into subtracts, since no other pass
5562 currently does it. */
5564 static void
5565 repropagate_negates (void)
5567 unsigned int i = 0;
5568 tree negate;
5570 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5572 gimple *user = get_single_immediate_use (negate);
5574 if (!user || !is_gimple_assign (user))
5575 continue;
5577 /* The negate operand can be either operand of a PLUS_EXPR
5578 (it can be the LHS if the RHS is a constant for example).
5580 Force the negate operand to the RHS of the PLUS_EXPR, then
5581 transform the PLUS_EXPR into a MINUS_EXPR. */
5582 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5584 /* If the negated operand appears on the LHS of the
5585 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5586 to force the negated operand to the RHS of the PLUS_EXPR. */
5587 if (gimple_assign_rhs1 (user) == negate)
5589 swap_ssa_operands (user,
5590 gimple_assign_rhs1_ptr (user),
5591 gimple_assign_rhs2_ptr (user));
5594 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5595 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5596 if (gimple_assign_rhs2 (user) == negate)
5598 tree rhs1 = gimple_assign_rhs1 (user);
5599 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5600 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5601 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5602 update_stmt (user);
5605 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5607 if (gimple_assign_rhs1 (user) == negate)
5609 /* We have
5610 x = -a
5611 y = x - b
5612 which we transform into
5613 x = a + b
5614 y = -x .
5615 This pushes down the negate which we possibly can merge
5616 into some other operation, hence insert it into the
5617 plus_negates vector. */
5618 gimple *feed = SSA_NAME_DEF_STMT (negate);
5619 tree a = gimple_assign_rhs1 (feed);
5620 tree b = gimple_assign_rhs2 (user);
5621 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5622 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5623 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5624 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5625 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5626 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5627 user = gsi_stmt (gsi2);
5628 update_stmt (user);
5629 reassoc_remove_stmt (&gsi);
5630 release_defs (feed);
5631 plus_negates.safe_push (gimple_assign_lhs (user));
5633 else
5635 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5636 rid of one operation. */
5637 gimple *feed = SSA_NAME_DEF_STMT (negate);
5638 tree a = gimple_assign_rhs1 (feed);
5639 tree rhs1 = gimple_assign_rhs1 (user);
5640 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5641 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5642 update_stmt (gsi_stmt (gsi));
5648 /* Returns true if OP is of a type for which we can do reassociation.
5649 That is for integral or non-saturating fixed-point types, and for
5650 floating point type when associative-math is enabled. */
5652 static bool
5653 can_reassociate_p (tree op)
5655 tree type = TREE_TYPE (op);
5656 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5657 return false;
5658 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5659 || NON_SAT_FIXED_POINT_TYPE_P (type)
5660 || (flag_associative_math && FLOAT_TYPE_P (type)))
5661 return true;
5662 return false;
5665 /* Break up subtract operations in block BB.
5667 We do this top down because we don't know whether the subtract is
5668 part of a possible chain of reassociation except at the top.
5670 IE given
5671 d = f + g
5672 c = a + e
5673 b = c - d
5674 q = b - r
5675 k = t - q
5677 we want to break up k = t - q, but we won't until we've transformed q
5678 = b - r, which won't be broken up until we transform b = c - d.
5680 En passant, clear the GIMPLE visited flag on every statement
5681 and set UIDs within each basic block. */
5683 static void
5684 break_up_subtract_bb (basic_block bb)
5686 gimple_stmt_iterator gsi;
5687 basic_block son;
5688 unsigned int uid = 1;
5690 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5692 gimple *stmt = gsi_stmt (gsi);
5693 gimple_set_visited (stmt, false);
5694 gimple_set_uid (stmt, uid++);
5696 if (!is_gimple_assign (stmt)
5697 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5698 continue;
5700 /* Look for simple gimple subtract operations. */
5701 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5703 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5704 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5705 continue;
5707 /* Check for a subtract used only in an addition. If this
5708 is the case, transform it into add of a negate for better
5709 reassociation. IE transform C = A-B into C = A + -B if C
5710 is only used in an addition. */
5711 if (should_break_up_subtract (stmt))
5712 break_up_subtract (stmt, &gsi);
5714 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5715 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5716 plus_negates.safe_push (gimple_assign_lhs (stmt));
5718 for (son = first_dom_son (CDI_DOMINATORS, bb);
5719 son;
5720 son = next_dom_son (CDI_DOMINATORS, son))
5721 break_up_subtract_bb (son);
5724 /* Used for repeated factor analysis. */
5725 struct repeat_factor
5727 /* An SSA name that occurs in a multiply chain. */
5728 tree factor;
5730 /* Cached rank of the factor. */
5731 unsigned rank;
5733 /* Number of occurrences of the factor in the chain. */
5734 HOST_WIDE_INT count;
5736 /* An SSA name representing the product of this factor and
5737 all factors appearing later in the repeated factor vector. */
5738 tree repr;
5742 static vec<repeat_factor> repeat_factor_vec;
5744 /* Used for sorting the repeat factor vector. Sort primarily by
5745 ascending occurrence count, secondarily by descending rank. */
5747 static int
5748 compare_repeat_factors (const void *x1, const void *x2)
5750 const repeat_factor *rf1 = (const repeat_factor *) x1;
5751 const repeat_factor *rf2 = (const repeat_factor *) x2;
5753 if (rf1->count != rf2->count)
5754 return rf1->count - rf2->count;
5756 return rf2->rank - rf1->rank;
5759 /* Look for repeated operands in OPS in the multiply tree rooted at
5760 STMT. Replace them with an optimal sequence of multiplies and powi
5761 builtin calls, and remove the used operands from OPS. Return an
5762 SSA name representing the value of the replacement sequence. */
5764 static tree
5765 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5767 unsigned i, j, vec_len;
5768 int ii;
5769 operand_entry *oe;
5770 repeat_factor *rf1, *rf2;
5771 repeat_factor rfnew;
5772 tree result = NULL_TREE;
5773 tree target_ssa, iter_result;
5774 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5775 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5776 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5777 gimple *mul_stmt, *pow_stmt;
5779 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5780 target. */
5781 if (!powi_fndecl)
5782 return NULL_TREE;
5784 /* Allocate the repeated factor vector. */
5785 repeat_factor_vec.create (10);
5787 /* Scan the OPS vector for all SSA names in the product and build
5788 up a vector of occurrence counts for each factor. */
5789 FOR_EACH_VEC_ELT (*ops, i, oe)
5791 if (TREE_CODE (oe->op) == SSA_NAME)
5793 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5795 if (rf1->factor == oe->op)
5797 rf1->count += oe->count;
5798 break;
5802 if (j >= repeat_factor_vec.length ())
5804 rfnew.factor = oe->op;
5805 rfnew.rank = oe->rank;
5806 rfnew.count = oe->count;
5807 rfnew.repr = NULL_TREE;
5808 repeat_factor_vec.safe_push (rfnew);
5813 /* Sort the repeated factor vector by (a) increasing occurrence count,
5814 and (b) decreasing rank. */
5815 repeat_factor_vec.qsort (compare_repeat_factors);
5817 /* It is generally best to combine as many base factors as possible
5818 into a product before applying __builtin_powi to the result.
5819 However, the sort order chosen for the repeated factor vector
5820 allows us to cache partial results for the product of the base
5821 factors for subsequent use. When we already have a cached partial
5822 result from a previous iteration, it is best to make use of it
5823 before looking for another __builtin_pow opportunity.
5825 As an example, consider x * x * y * y * y * z * z * z * z.
5826 We want to first compose the product x * y * z, raise it to the
5827 second power, then multiply this by y * z, and finally multiply
5828 by z. This can be done in 5 multiplies provided we cache y * z
5829 for use in both expressions:
5831 t1 = y * z
5832 t2 = t1 * x
5833 t3 = t2 * t2
5834 t4 = t1 * t3
5835 result = t4 * z
5837 If we instead ignored the cached y * z and first multiplied by
5838 the __builtin_pow opportunity z * z, we would get the inferior:
5840 t1 = y * z
5841 t2 = t1 * x
5842 t3 = t2 * t2
5843 t4 = z * z
5844 t5 = t3 * t4
5845 result = t5 * y */
5847 vec_len = repeat_factor_vec.length ();
5849 /* Repeatedly look for opportunities to create a builtin_powi call. */
5850 while (true)
5852 HOST_WIDE_INT power;
5854 /* First look for the largest cached product of factors from
5855 preceding iterations. If found, create a builtin_powi for
5856 it if the minimum occurrence count for its factors is at
5857 least 2, or just use this cached product as our next
5858 multiplicand if the minimum occurrence count is 1. */
5859 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5861 if (rf1->repr && rf1->count > 0)
5862 break;
5865 if (j < vec_len)
5867 power = rf1->count;
5869 if (power == 1)
5871 iter_result = rf1->repr;
5873 if (dump_file && (dump_flags & TDF_DETAILS))
5875 unsigned elt;
5876 repeat_factor *rf;
5877 fputs ("Multiplying by cached product ", dump_file);
5878 for (elt = j; elt < vec_len; elt++)
5880 rf = &repeat_factor_vec[elt];
5881 print_generic_expr (dump_file, rf->factor);
5882 if (elt < vec_len - 1)
5883 fputs (" * ", dump_file);
5885 fputs ("\n", dump_file);
5888 else
5890 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5891 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5892 build_int_cst (integer_type_node,
5893 power));
5894 gimple_call_set_lhs (pow_stmt, iter_result);
5895 gimple_set_location (pow_stmt, gimple_location (stmt));
5896 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5897 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5899 if (dump_file && (dump_flags & TDF_DETAILS))
5901 unsigned elt;
5902 repeat_factor *rf;
5903 fputs ("Building __builtin_pow call for cached product (",
5904 dump_file);
5905 for (elt = j; elt < vec_len; elt++)
5907 rf = &repeat_factor_vec[elt];
5908 print_generic_expr (dump_file, rf->factor);
5909 if (elt < vec_len - 1)
5910 fputs (" * ", dump_file);
5912 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5913 power);
5917 else
5919 /* Otherwise, find the first factor in the repeated factor
5920 vector whose occurrence count is at least 2. If no such
5921 factor exists, there are no builtin_powi opportunities
5922 remaining. */
5923 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5925 if (rf1->count >= 2)
5926 break;
5929 if (j >= vec_len)
5930 break;
5932 power = rf1->count;
5934 if (dump_file && (dump_flags & TDF_DETAILS))
5936 unsigned elt;
5937 repeat_factor *rf;
5938 fputs ("Building __builtin_pow call for (", dump_file);
5939 for (elt = j; elt < vec_len; elt++)
5941 rf = &repeat_factor_vec[elt];
5942 print_generic_expr (dump_file, rf->factor);
5943 if (elt < vec_len - 1)
5944 fputs (" * ", dump_file);
5946 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5949 reassociate_stats.pows_created++;
5951 /* Visit each element of the vector in reverse order (so that
5952 high-occurrence elements are visited first, and within the
5953 same occurrence count, lower-ranked elements are visited
5954 first). Form a linear product of all elements in this order
5955 whose occurrencce count is at least that of element J.
5956 Record the SSA name representing the product of each element
5957 with all subsequent elements in the vector. */
5958 if (j == vec_len - 1)
5959 rf1->repr = rf1->factor;
5960 else
5962 for (ii = vec_len - 2; ii >= (int)j; ii--)
5964 tree op1, op2;
5966 rf1 = &repeat_factor_vec[ii];
5967 rf2 = &repeat_factor_vec[ii + 1];
5969 /* Init the last factor's representative to be itself. */
5970 if (!rf2->repr)
5971 rf2->repr = rf2->factor;
5973 op1 = rf1->factor;
5974 op2 = rf2->repr;
5976 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5977 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5978 op1, op2);
5979 gimple_set_location (mul_stmt, gimple_location (stmt));
5980 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5981 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5982 rf1->repr = target_ssa;
5984 /* Don't reprocess the multiply we just introduced. */
5985 gimple_set_visited (mul_stmt, true);
5989 /* Form a call to __builtin_powi for the maximum product
5990 just formed, raised to the power obtained earlier. */
5991 rf1 = &repeat_factor_vec[j];
5992 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5993 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5994 build_int_cst (integer_type_node,
5995 power));
5996 gimple_call_set_lhs (pow_stmt, iter_result);
5997 gimple_set_location (pow_stmt, gimple_location (stmt));
5998 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5999 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6002 /* If we previously formed at least one other builtin_powi call,
6003 form the product of this one and those others. */
6004 if (result)
6006 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6007 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6008 result, iter_result);
6009 gimple_set_location (mul_stmt, gimple_location (stmt));
6010 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6011 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6012 gimple_set_visited (mul_stmt, true);
6013 result = new_result;
6015 else
6016 result = iter_result;
6018 /* Decrement the occurrence count of each element in the product
6019 by the count found above, and remove this many copies of each
6020 factor from OPS. */
6021 for (i = j; i < vec_len; i++)
6023 unsigned k = power;
6024 unsigned n;
6026 rf1 = &repeat_factor_vec[i];
6027 rf1->count -= power;
6029 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6031 if (oe->op == rf1->factor)
6033 if (oe->count <= k)
6035 ops->ordered_remove (n);
6036 k -= oe->count;
6038 if (k == 0)
6039 break;
6041 else
6043 oe->count -= k;
6044 break;
6051 /* At this point all elements in the repeated factor vector have a
6052 remaining occurrence count of 0 or 1, and those with a count of 1
6053 don't have cached representatives. Re-sort the ops vector and
6054 clean up. */
6055 ops->qsort (sort_by_operand_rank);
6056 repeat_factor_vec.release ();
6058 /* Return the final product computed herein. Note that there may
6059 still be some elements with single occurrence count left in OPS;
6060 those will be handled by the normal reassociation logic. */
6061 return result;
6064 /* Attempt to optimize
6065 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6066 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6068 static void
6069 attempt_builtin_copysign (vec<operand_entry *> *ops)
6071 operand_entry *oe;
6072 unsigned int i;
6073 unsigned int length = ops->length ();
6074 tree cst = ops->last ()->op;
6076 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6077 return;
6079 FOR_EACH_VEC_ELT (*ops, i, oe)
6081 if (TREE_CODE (oe->op) == SSA_NAME
6082 && has_single_use (oe->op))
6084 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6085 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6087 tree arg0, arg1;
6088 switch (gimple_call_combined_fn (old_call))
6090 CASE_CFN_COPYSIGN:
6091 CASE_CFN_COPYSIGN_FN:
6092 arg0 = gimple_call_arg (old_call, 0);
6093 arg1 = gimple_call_arg (old_call, 1);
6094 /* The first argument of copysign must be a constant,
6095 otherwise there's nothing to do. */
6096 if (TREE_CODE (arg0) == REAL_CST)
6098 tree type = TREE_TYPE (arg0);
6099 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6100 /* If we couldn't fold to a single constant, skip it.
6101 That happens e.g. for inexact multiplication when
6102 -frounding-math. */
6103 if (mul == NULL_TREE)
6104 break;
6105 /* Instead of adjusting OLD_CALL, let's build a new
6106 call to not leak the LHS and prevent keeping bogus
6107 debug statements. DCE will clean up the old call. */
6108 gcall *new_call;
6109 if (gimple_call_internal_p (old_call))
6110 new_call = gimple_build_call_internal
6111 (IFN_COPYSIGN, 2, mul, arg1);
6112 else
6113 new_call = gimple_build_call
6114 (gimple_call_fndecl (old_call), 2, mul, arg1);
6115 tree lhs = make_ssa_name (type);
6116 gimple_call_set_lhs (new_call, lhs);
6117 gimple_set_location (new_call,
6118 gimple_location (old_call));
6119 insert_stmt_after (new_call, old_call);
6120 /* We've used the constant, get rid of it. */
6121 ops->pop ();
6122 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6123 /* Handle the CST1 < 0 case by negating the result. */
6124 if (cst1_neg)
6126 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6127 gimple *negate_stmt
6128 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6129 insert_stmt_after (negate_stmt, new_call);
6130 oe->op = negrhs;
6132 else
6133 oe->op = lhs;
6134 if (dump_file && (dump_flags & TDF_DETAILS))
6136 fprintf (dump_file, "Optimizing copysign: ");
6137 print_generic_expr (dump_file, cst);
6138 fprintf (dump_file, " * COPYSIGN (");
6139 print_generic_expr (dump_file, arg0);
6140 fprintf (dump_file, ", ");
6141 print_generic_expr (dump_file, arg1);
6142 fprintf (dump_file, ") into %sCOPYSIGN (",
6143 cst1_neg ? "-" : "");
6144 print_generic_expr (dump_file, mul);
6145 fprintf (dump_file, ", ");
6146 print_generic_expr (dump_file, arg1);
6147 fprintf (dump_file, "\n");
6149 return;
6151 break;
6152 default:
6153 break;
6160 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6162 static void
6163 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6165 tree rhs1;
6167 if (dump_file && (dump_flags & TDF_DETAILS))
6169 fprintf (dump_file, "Transforming ");
6170 print_gimple_stmt (dump_file, stmt, 0);
6173 rhs1 = gimple_assign_rhs1 (stmt);
6174 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6175 update_stmt (stmt);
6176 remove_visited_stmt_chain (rhs1);
6178 if (dump_file && (dump_flags & TDF_DETAILS))
6180 fprintf (dump_file, " into ");
6181 print_gimple_stmt (dump_file, stmt, 0);
6185 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6187 static void
6188 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6189 tree rhs1, tree rhs2)
6191 if (dump_file && (dump_flags & TDF_DETAILS))
6193 fprintf (dump_file, "Transforming ");
6194 print_gimple_stmt (dump_file, stmt, 0);
6197 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6198 update_stmt (gsi_stmt (*gsi));
6199 remove_visited_stmt_chain (rhs1);
6201 if (dump_file && (dump_flags & TDF_DETAILS))
6203 fprintf (dump_file, " into ");
6204 print_gimple_stmt (dump_file, stmt, 0);
6208 /* Reassociate expressions in basic block BB and its post-dominator as
6209 children.
6211 Bubble up return status from maybe_optimize_range_tests. */
6213 static bool
6214 reassociate_bb (basic_block bb)
6216 gimple_stmt_iterator gsi;
6217 basic_block son;
6218 gimple *stmt = last_stmt (bb);
6219 bool cfg_cleanup_needed = false;
6221 if (stmt && !gimple_visited_p (stmt))
6222 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6224 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
6226 stmt = gsi_stmt (gsi);
6228 if (is_gimple_assign (stmt)
6229 && !stmt_could_throw_p (cfun, stmt))
6231 tree lhs, rhs1, rhs2;
6232 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6234 /* If this was part of an already processed statement,
6235 we don't need to touch it again. */
6236 if (gimple_visited_p (stmt))
6238 /* This statement might have become dead because of previous
6239 reassociations. */
6240 if (has_zero_uses (gimple_get_lhs (stmt)))
6242 reassoc_remove_stmt (&gsi);
6243 release_defs (stmt);
6244 /* We might end up removing the last stmt above which
6245 places the iterator to the end of the sequence.
6246 Reset it to the last stmt in this case which might
6247 be the end of the sequence as well if we removed
6248 the last statement of the sequence. In which case
6249 we need to bail out. */
6250 if (gsi_end_p (gsi))
6252 gsi = gsi_last_bb (bb);
6253 if (gsi_end_p (gsi))
6254 break;
6257 continue;
6260 /* If this is not a gimple binary expression, there is
6261 nothing for us to do with it. */
6262 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6263 continue;
6265 lhs = gimple_assign_lhs (stmt);
6266 rhs1 = gimple_assign_rhs1 (stmt);
6267 rhs2 = gimple_assign_rhs2 (stmt);
6269 /* For non-bit or min/max operations we can't associate
6270 all types. Verify that here. */
6271 if (rhs_code != BIT_IOR_EXPR
6272 && rhs_code != BIT_AND_EXPR
6273 && rhs_code != BIT_XOR_EXPR
6274 && rhs_code != MIN_EXPR
6275 && rhs_code != MAX_EXPR
6276 && (!can_reassociate_p (lhs)
6277 || !can_reassociate_p (rhs1)
6278 || !can_reassociate_p (rhs2)))
6279 continue;
6281 if (associative_tree_code (rhs_code))
6283 auto_vec<operand_entry *> ops;
6284 tree powi_result = NULL_TREE;
6285 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6287 /* There may be no immediate uses left by the time we
6288 get here because we may have eliminated them all. */
6289 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6290 continue;
6292 gimple_set_visited (stmt, true);
6293 linearize_expr_tree (&ops, stmt, true, true);
6294 ops.qsort (sort_by_operand_rank);
6295 int orig_len = ops.length ();
6296 optimize_ops_list (rhs_code, &ops);
6297 if (undistribute_ops_list (rhs_code, &ops,
6298 loop_containing_stmt (stmt)))
6300 ops.qsort (sort_by_operand_rank);
6301 optimize_ops_list (rhs_code, &ops);
6303 if (undistribute_bitref_for_vector (rhs_code, &ops,
6304 loop_containing_stmt (stmt)))
6306 ops.qsort (sort_by_operand_rank);
6307 optimize_ops_list (rhs_code, &ops);
6309 if (rhs_code == PLUS_EXPR
6310 && transform_add_to_multiply (&ops))
6311 ops.qsort (sort_by_operand_rank);
6313 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6315 if (is_vector)
6316 optimize_vec_cond_expr (rhs_code, &ops);
6317 else
6318 optimize_range_tests (rhs_code, &ops, NULL);
6321 if (rhs_code == MULT_EXPR && !is_vector)
6323 attempt_builtin_copysign (&ops);
6325 if (reassoc_insert_powi_p
6326 && flag_unsafe_math_optimizations)
6327 powi_result = attempt_builtin_powi (stmt, &ops);
6330 operand_entry *last;
6331 bool negate_result = false;
6332 if (ops.length () > 1
6333 && rhs_code == MULT_EXPR)
6335 last = ops.last ();
6336 if ((integer_minus_onep (last->op)
6337 || real_minus_onep (last->op))
6338 && !HONOR_SNANS (TREE_TYPE (lhs))
6339 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6340 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6342 ops.pop ();
6343 negate_result = true;
6347 tree new_lhs = lhs;
6348 /* If the operand vector is now empty, all operands were
6349 consumed by the __builtin_powi optimization. */
6350 if (ops.length () == 0)
6351 transform_stmt_to_copy (&gsi, stmt, powi_result);
6352 else if (ops.length () == 1)
6354 tree last_op = ops.last ()->op;
6356 /* If the stmt that defines operand has to be inserted, insert it
6357 before the use. */
6358 if (ops.last ()->stmt_to_insert)
6359 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6360 if (powi_result)
6361 transform_stmt_to_multiply (&gsi, stmt, last_op,
6362 powi_result);
6363 else
6364 transform_stmt_to_copy (&gsi, stmt, last_op);
6366 else
6368 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6369 int ops_num = ops.length ();
6370 int width;
6372 /* For binary bit operations, if there are at least 3
6373 operands and the last last operand in OPS is a constant,
6374 move it to the front. This helps ensure that we generate
6375 (X & Y) & C rather than (X & C) & Y. The former will
6376 often match a canonical bit test when we get to RTL. */
6377 if (ops.length () > 2
6378 && (rhs_code == BIT_AND_EXPR
6379 || rhs_code == BIT_IOR_EXPR
6380 || rhs_code == BIT_XOR_EXPR)
6381 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6382 std::swap (*ops[0], *ops[ops_num - 1]);
6384 /* Only rewrite the expression tree to parallel in the
6385 last reassoc pass to avoid useless work back-and-forth
6386 with initial linearization. */
6387 if (!reassoc_insert_powi_p
6388 && ops.length () > 3
6389 && (width = get_reassociation_width (ops_num, rhs_code,
6390 mode)) > 1)
6392 if (dump_file && (dump_flags & TDF_DETAILS))
6393 fprintf (dump_file,
6394 "Width = %d was chosen for reassociation\n",
6395 width);
6396 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6397 width, ops);
6399 else
6401 /* When there are three operands left, we want
6402 to make sure the ones that get the double
6403 binary op are chosen wisely. */
6404 int len = ops.length ();
6405 if (len >= 3)
6406 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6408 new_lhs = rewrite_expr_tree (stmt, 0, ops,
6409 powi_result != NULL
6410 || negate_result,
6411 len != orig_len);
6414 /* If we combined some repeated factors into a
6415 __builtin_powi call, multiply that result by the
6416 reassociated operands. */
6417 if (powi_result)
6419 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6420 tree type = TREE_TYPE (lhs);
6421 tree target_ssa = make_temp_ssa_name (type, NULL,
6422 "reassocpow");
6423 gimple_set_lhs (lhs_stmt, target_ssa);
6424 update_stmt (lhs_stmt);
6425 if (lhs != new_lhs)
6427 target_ssa = new_lhs;
6428 new_lhs = lhs;
6430 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6431 powi_result, target_ssa);
6432 gimple_set_location (mul_stmt, gimple_location (stmt));
6433 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6434 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6438 if (negate_result)
6440 stmt = SSA_NAME_DEF_STMT (lhs);
6441 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6442 gimple_set_lhs (stmt, tmp);
6443 if (lhs != new_lhs)
6444 tmp = new_lhs;
6445 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6446 tmp);
6447 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6448 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6449 update_stmt (stmt);
6454 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6455 son;
6456 son = next_dom_son (CDI_POST_DOMINATORS, son))
6457 cfg_cleanup_needed |= reassociate_bb (son);
6459 return cfg_cleanup_needed;
6462 /* Add jumps around shifts for range tests turned into bit tests.
6463 For each SSA_NAME VAR we have code like:
6464 VAR = ...; // final stmt of range comparison
6465 // bit test here...;
6466 OTHERVAR = ...; // final stmt of the bit test sequence
6467 RES = VAR | OTHERVAR;
6468 Turn the above into:
6469 VAR = ...;
6470 if (VAR != 0)
6471 goto <l3>;
6472 else
6473 goto <l2>;
6474 <l2>:
6475 // bit test here...;
6476 OTHERVAR = ...;
6477 <l3>:
6478 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6480 static void
6481 branch_fixup (void)
6483 tree var;
6484 unsigned int i;
6486 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6488 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6489 gimple *use_stmt;
6490 use_operand_p use;
6491 bool ok = single_imm_use (var, &use, &use_stmt);
6492 gcc_assert (ok
6493 && is_gimple_assign (use_stmt)
6494 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6495 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6497 basic_block cond_bb = gimple_bb (def_stmt);
6498 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6499 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6501 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6502 gimple *g = gimple_build_cond (NE_EXPR, var,
6503 build_zero_cst (TREE_TYPE (var)),
6504 NULL_TREE, NULL_TREE);
6505 location_t loc = gimple_location (use_stmt);
6506 gimple_set_location (g, loc);
6507 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6509 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6510 etrue->probability = profile_probability::even ();
6511 edge efalse = find_edge (cond_bb, then_bb);
6512 efalse->flags = EDGE_FALSE_VALUE;
6513 efalse->probability -= etrue->probability;
6514 then_bb->count -= etrue->count ();
6516 tree othervar = NULL_TREE;
6517 if (gimple_assign_rhs1 (use_stmt) == var)
6518 othervar = gimple_assign_rhs2 (use_stmt);
6519 else if (gimple_assign_rhs2 (use_stmt) == var)
6520 othervar = gimple_assign_rhs1 (use_stmt);
6521 else
6522 gcc_unreachable ();
6523 tree lhs = gimple_assign_lhs (use_stmt);
6524 gphi *phi = create_phi_node (lhs, merge_bb);
6525 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6526 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6527 gsi = gsi_for_stmt (use_stmt);
6528 gsi_remove (&gsi, true);
6530 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6531 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6533 reassoc_branch_fixups.release ();
6536 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6537 void debug_ops_vector (vec<operand_entry *> ops);
6539 /* Dump the operand entry vector OPS to FILE. */
6541 void
6542 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6544 operand_entry *oe;
6545 unsigned int i;
6547 FOR_EACH_VEC_ELT (ops, i, oe)
6549 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6550 print_generic_expr (file, oe->op);
6551 fprintf (file, "\n");
6555 /* Dump the operand entry vector OPS to STDERR. */
6557 DEBUG_FUNCTION void
6558 debug_ops_vector (vec<operand_entry *> ops)
6560 dump_ops_vector (stderr, ops);
6563 /* Bubble up return status from reassociate_bb. */
6565 static bool
6566 do_reassoc (void)
6568 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6569 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6572 /* Initialize the reassociation pass. */
6574 static void
6575 init_reassoc (void)
6577 int i;
6578 long rank = 2;
6579 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6581 /* Find the loops, so that we can prevent moving calculations in
6582 them. */
6583 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6585 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6587 next_operand_entry_id = 0;
6589 /* Reverse RPO (Reverse Post Order) will give us something where
6590 deeper loops come later. */
6591 pre_and_rev_post_order_compute (NULL, bbs, false);
6592 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
6593 operand_rank = new hash_map<tree, long>;
6595 /* Give each default definition a distinct rank. This includes
6596 parameters and the static chain. Walk backwards over all
6597 SSA names so that we get proper rank ordering according
6598 to tree_swap_operands_p. */
6599 for (i = num_ssa_names - 1; i > 0; --i)
6601 tree name = ssa_name (i);
6602 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6603 insert_operand_rank (name, ++rank);
6606 /* Set up rank for each BB */
6607 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6608 bb_rank[bbs[i]] = ++rank << 16;
6610 free (bbs);
6611 calculate_dominance_info (CDI_POST_DOMINATORS);
6612 plus_negates = vNULL;
6615 /* Cleanup after the reassociation pass, and print stats if
6616 requested. */
6618 static void
6619 fini_reassoc (void)
6621 statistics_counter_event (cfun, "Linearized",
6622 reassociate_stats.linearized);
6623 statistics_counter_event (cfun, "Constants eliminated",
6624 reassociate_stats.constants_eliminated);
6625 statistics_counter_event (cfun, "Ops eliminated",
6626 reassociate_stats.ops_eliminated);
6627 statistics_counter_event (cfun, "Statements rewritten",
6628 reassociate_stats.rewritten);
6629 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6630 reassociate_stats.pows_encountered);
6631 statistics_counter_event (cfun, "Built-in powi calls created",
6632 reassociate_stats.pows_created);
6634 delete operand_rank;
6635 operand_entry_pool.release ();
6636 free (bb_rank);
6637 plus_negates.release ();
6638 free_dominance_info (CDI_POST_DOMINATORS);
6639 loop_optimizer_finalize ();
6642 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6643 insertion of __builtin_powi calls.
6645 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6646 optimization of a gimple conditional. Otherwise returns zero. */
6648 static unsigned int
6649 execute_reassoc (bool insert_powi_p)
6651 reassoc_insert_powi_p = insert_powi_p;
6653 init_reassoc ();
6655 bool cfg_cleanup_needed;
6656 cfg_cleanup_needed = do_reassoc ();
6657 repropagate_negates ();
6658 branch_fixup ();
6660 fini_reassoc ();
6661 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6664 namespace {
6666 const pass_data pass_data_reassoc =
6668 GIMPLE_PASS, /* type */
6669 "reassoc", /* name */
6670 OPTGROUP_NONE, /* optinfo_flags */
6671 TV_TREE_REASSOC, /* tv_id */
6672 ( PROP_cfg | PROP_ssa ), /* properties_required */
6673 0, /* properties_provided */
6674 0, /* properties_destroyed */
6675 0, /* todo_flags_start */
6676 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6679 class pass_reassoc : public gimple_opt_pass
6681 public:
6682 pass_reassoc (gcc::context *ctxt)
6683 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6686 /* opt_pass methods: */
6687 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
6688 void set_pass_param (unsigned int n, bool param)
6690 gcc_assert (n == 0);
6691 insert_powi_p = param;
6693 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
6694 virtual unsigned int execute (function *)
6695 { return execute_reassoc (insert_powi_p); }
6697 private:
6698 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6699 point 3a in the pass header comment. */
6700 bool insert_powi_p;
6701 }; // class pass_reassoc
6703 } // anon namespace
6705 gimple_opt_pass *
6706 make_pass_reassoc (gcc::context *ctxt)
6708 return new pass_reassoc (ctxt);