testsuite: localclass2 require LTO
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob89adafae32c002fa96be814172fb8c109948eb01
1 /* Reassociation for trees.
2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
59 It consists of five steps:
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_*
69 3. Optimization of the operand lists, eliminating things like a +
70 -a, a & a, etc.
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
79 5. Repropagate negates, as nothing else will clean it up ATM.
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
100 a (1), b (1), c (1), d (2), e (2)
103 We start with our merge worklist empty, and the ops list with all of
104 those on it.
106 You want to first merge all leaves of the same rank, as much as
107 possible.
109 So first build a binary op of
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
121 Then build a binary op of d + e
122 mergetmp2 = d + e
124 and put mergetmp2 on the merge worklist.
126 so merge worklist = {mergetmp, c, mergetmp2}
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
131 So we have
133 build binary op
134 mergetmp3 = mergetmp + c
136 worklist = {mergetmp2, mergetmp3}
138 mergetmp4 = mergetmp2 + mergetmp3
140 worklist = {mergetmp4}
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
151 So why don't we do this?
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
159 mergetmp = op1 + op2
160 newstmt = mergetmp + op3
162 instead of
163 mergetmp = op2 + op3
164 newstmt = mergetmp + op1
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
168 can do.
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
176 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
177 point 3a in the pass header comment. */
178 static bool reassoc_insert_powi_p;
180 /* Statistics */
181 static struct
183 int linearized;
184 int constants_eliminated;
185 int ops_eliminated;
186 int rewritten;
187 int pows_encountered;
188 int pows_created;
189 } reassociate_stats;
191 /* Operator, rank pair. */
192 struct operand_entry
194 unsigned int rank;
195 unsigned int id;
196 tree op;
197 unsigned int count;
198 gimple *stmt_to_insert;
201 static object_allocator<operand_entry> operand_entry_pool
202 ("operand entry pool");
204 /* This is used to assign a unique ID to each struct operand_entry
205 so that qsort results are identical on different hosts. */
206 static unsigned int next_operand_entry_id;
208 /* Starting rank number for a given basic block, so that we can rank
209 operations using unmovable instructions in that BB based on the bb
210 depth. */
211 static long *bb_rank;
213 /* Operand->rank hashtable. */
214 static hash_map<tree, long> *operand_rank;
216 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
217 all basic blocks the CFG should be adjusted - basic blocks
218 split right after that SSA_NAME's definition statement and before
219 the only use, which must be a bit ior. */
220 static vec<tree> reassoc_branch_fixups;
222 /* Forward decls. */
223 static long get_rank (tree);
224 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
226 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
227 possibly added by gsi_remove. */
229 bool
230 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
232 gimple *stmt = gsi_stmt (*gsi);
234 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
235 return gsi_remove (gsi, true);
237 gimple_stmt_iterator prev = *gsi;
238 gsi_prev (&prev);
239 unsigned uid = gimple_uid (stmt);
240 basic_block bb = gimple_bb (stmt);
241 bool ret = gsi_remove (gsi, true);
242 if (!gsi_end_p (prev))
243 gsi_next (&prev);
244 else
245 prev = gsi_start_bb (bb);
246 gimple *end_stmt = gsi_stmt (*gsi);
247 while ((stmt = gsi_stmt (prev)) != end_stmt)
249 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
250 gimple_set_uid (stmt, uid);
251 gsi_next (&prev);
253 return ret;
256 /* Bias amount for loop-carried phis. We want this to be larger than
257 the depth of any reassociation tree we can see, but not larger than
258 the rank difference between two blocks. */
259 #define PHI_LOOP_BIAS (1 << 15)
261 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
262 an innermost loop, and the phi has only a single use which is inside
263 the loop, then the rank is the block rank of the loop latch plus an
264 extra bias for the loop-carried dependence. This causes expressions
265 calculated into an accumulator variable to be independent for each
266 iteration of the loop. If STMT is some other phi, the rank is the
267 block rank of its containing block. */
268 static long
269 phi_rank (gimple *stmt)
271 basic_block bb = gimple_bb (stmt);
272 class loop *father = bb->loop_father;
273 tree res;
274 unsigned i;
275 use_operand_p use;
276 gimple *use_stmt;
278 /* We only care about real loops (those with a latch). */
279 if (!father->latch)
280 return bb_rank[bb->index];
282 /* Interesting phis must be in headers of innermost loops. */
283 if (bb != father->header
284 || father->inner)
285 return bb_rank[bb->index];
287 /* Ignore virtual SSA_NAMEs. */
288 res = gimple_phi_result (stmt);
289 if (virtual_operand_p (res))
290 return bb_rank[bb->index];
292 /* The phi definition must have a single use, and that use must be
293 within the loop. Otherwise this isn't an accumulator pattern. */
294 if (!single_imm_use (res, &use, &use_stmt)
295 || gimple_bb (use_stmt)->loop_father != father)
296 return bb_rank[bb->index];
298 /* Look for phi arguments from within the loop. If found, bias this phi. */
299 for (i = 0; i < gimple_phi_num_args (stmt); i++)
301 tree arg = gimple_phi_arg_def (stmt, i);
302 if (TREE_CODE (arg) == SSA_NAME
303 && !SSA_NAME_IS_DEFAULT_DEF (arg))
305 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
306 if (gimple_bb (def_stmt)->loop_father == father)
307 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
311 /* Must be an uninteresting phi. */
312 return bb_rank[bb->index];
315 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
316 loop-carried dependence of an innermost loop, return TRUE; else
317 return FALSE. */
318 static bool
319 loop_carried_phi (tree exp)
321 gimple *phi_stmt;
322 long block_rank;
324 if (TREE_CODE (exp) != SSA_NAME
325 || SSA_NAME_IS_DEFAULT_DEF (exp))
326 return false;
328 phi_stmt = SSA_NAME_DEF_STMT (exp);
330 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
331 return false;
333 /* Non-loop-carried phis have block rank. Loop-carried phis have
334 an additional bias added in. If this phi doesn't have block rank,
335 it's biased and should not be propagated. */
336 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
338 if (phi_rank (phi_stmt) != block_rank)
339 return true;
341 return false;
344 /* Return the maximum of RANK and the rank that should be propagated
345 from expression OP. For most operands, this is just the rank of OP.
346 For loop-carried phis, the value is zero to avoid undoing the bias
347 in favor of the phi. */
348 static long
349 propagate_rank (long rank, tree op)
351 long op_rank;
353 if (loop_carried_phi (op))
354 return rank;
356 op_rank = get_rank (op);
358 return MAX (rank, op_rank);
361 /* Look up the operand rank structure for expression E. */
363 static inline long
364 find_operand_rank (tree e)
366 long *slot = operand_rank->get (e);
367 return slot ? *slot : -1;
370 /* Insert {E,RANK} into the operand rank hashtable. */
372 static inline void
373 insert_operand_rank (tree e, long rank)
375 gcc_assert (rank > 0);
376 gcc_assert (!operand_rank->put (e, rank));
379 /* Given an expression E, return the rank of the expression. */
381 static long
382 get_rank (tree e)
384 /* SSA_NAME's have the rank of the expression they are the result
386 For globals and uninitialized values, the rank is 0.
387 For function arguments, use the pre-setup rank.
388 For PHI nodes, stores, asm statements, etc, we use the rank of
389 the BB.
390 For simple operations, the rank is the maximum rank of any of
391 its operands, or the bb_rank, whichever is less.
392 I make no claims that this is optimal, however, it gives good
393 results. */
395 /* We make an exception to the normal ranking system to break
396 dependences of accumulator variables in loops. Suppose we
397 have a simple one-block loop containing:
399 x_1 = phi(x_0, x_2)
400 b = a + x_1
401 c = b + d
402 x_2 = c + e
404 As shown, each iteration of the calculation into x is fully
405 dependent upon the iteration before it. We would prefer to
406 see this in the form:
408 x_1 = phi(x_0, x_2)
409 b = a + d
410 c = b + e
411 x_2 = c + x_1
413 If the loop is unrolled, the calculations of b and c from
414 different iterations can be interleaved.
416 To obtain this result during reassociation, we bias the rank
417 of the phi definition x_1 upward, when it is recognized as an
418 accumulator pattern. The artificial rank causes it to be
419 added last, providing the desired independence. */
421 if (TREE_CODE (e) == SSA_NAME)
423 ssa_op_iter iter;
424 gimple *stmt;
425 long rank;
426 tree op;
428 /* If we already have a rank for this expression, use that. */
429 rank = find_operand_rank (e);
430 if (rank != -1)
431 return rank;
433 stmt = SSA_NAME_DEF_STMT (e);
434 if (gimple_code (stmt) == GIMPLE_PHI)
435 rank = phi_rank (stmt);
437 else if (!is_gimple_assign (stmt))
438 rank = bb_rank[gimple_bb (stmt)->index];
440 else
442 /* Otherwise, find the maximum rank for the operands. As an
443 exception, remove the bias from loop-carried phis when propagating
444 the rank so that dependent operations are not also biased. */
445 /* Simply walk over all SSA uses - this takes advatage of the
446 fact that non-SSA operands are is_gimple_min_invariant and
447 thus have rank 0. */
448 rank = 0;
449 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
450 rank = propagate_rank (rank, op);
452 rank += 1;
455 if (dump_file && (dump_flags & TDF_DETAILS))
457 fprintf (dump_file, "Rank for ");
458 print_generic_expr (dump_file, e);
459 fprintf (dump_file, " is %ld\n", rank);
462 /* Note the rank in the hashtable so we don't recompute it. */
463 insert_operand_rank (e, rank);
464 return rank;
467 /* Constants, globals, etc., are rank 0 */
468 return 0;
472 /* We want integer ones to end up last no matter what, since they are
473 the ones we can do the most with. */
474 #define INTEGER_CONST_TYPE 1 << 4
475 #define FLOAT_ONE_CONST_TYPE 1 << 3
476 #define FLOAT_CONST_TYPE 1 << 2
477 #define OTHER_CONST_TYPE 1 << 1
479 /* Classify an invariant tree into integer, float, or other, so that
480 we can sort them to be near other constants of the same type. */
481 static inline int
482 constant_type (tree t)
484 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
485 return INTEGER_CONST_TYPE;
486 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
488 /* Sort -1.0 and 1.0 constants last, while in some cases
489 const_binop can't optimize some inexact operations, multiplication
490 by -1.0 or 1.0 can be always merged with others. */
491 if (real_onep (t) || real_minus_onep (t))
492 return FLOAT_ONE_CONST_TYPE;
493 return FLOAT_CONST_TYPE;
495 else
496 return OTHER_CONST_TYPE;
499 /* qsort comparison function to sort operand entries PA and PB by rank
500 so that the sorted array is ordered by rank in decreasing order. */
501 static int
502 sort_by_operand_rank (const void *pa, const void *pb)
504 const operand_entry *oea = *(const operand_entry *const *)pa;
505 const operand_entry *oeb = *(const operand_entry *const *)pb;
507 if (oeb->rank != oea->rank)
508 return oeb->rank > oea->rank ? 1 : -1;
510 /* It's nicer for optimize_expression if constants that are likely
511 to fold when added/multiplied/whatever are put next to each
512 other. Since all constants have rank 0, order them by type. */
513 if (oea->rank == 0)
515 if (constant_type (oeb->op) != constant_type (oea->op))
516 return constant_type (oea->op) - constant_type (oeb->op);
517 else
518 /* To make sorting result stable, we use unique IDs to determine
519 order. */
520 return oeb->id > oea->id ? 1 : -1;
523 if (TREE_CODE (oea->op) != SSA_NAME)
525 if (TREE_CODE (oeb->op) != SSA_NAME)
526 return oeb->id > oea->id ? 1 : -1;
527 else
528 return 1;
530 else if (TREE_CODE (oeb->op) != SSA_NAME)
531 return -1;
533 /* Lastly, make sure the versions that are the same go next to each
534 other. */
535 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
537 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
538 versions of removed SSA_NAMEs, so if possible, prefer to sort
539 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
540 See PR60418. */
541 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
542 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
543 basic_block bba = gimple_bb (stmta);
544 basic_block bbb = gimple_bb (stmtb);
545 if (bbb != bba)
547 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
548 but the other might not. */
549 if (!bba)
550 return 1;
551 if (!bbb)
552 return -1;
553 /* If neither is, compare bb_rank. */
554 if (bb_rank[bbb->index] != bb_rank[bba->index])
555 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
558 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
559 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
560 if (da != db)
561 return da ? 1 : -1;
563 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
566 return oeb->id > oea->id ? 1 : -1;
569 /* Add an operand entry to *OPS for the tree operand OP. */
571 static void
572 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
574 operand_entry *oe = operand_entry_pool.allocate ();
576 oe->op = op;
577 oe->rank = get_rank (op);
578 oe->id = next_operand_entry_id++;
579 oe->count = 1;
580 oe->stmt_to_insert = stmt_to_insert;
581 ops->safe_push (oe);
584 /* Add an operand entry to *OPS for the tree operand OP with repeat
585 count REPEAT. */
587 static void
588 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
589 HOST_WIDE_INT repeat)
591 operand_entry *oe = operand_entry_pool.allocate ();
593 oe->op = op;
594 oe->rank = get_rank (op);
595 oe->id = next_operand_entry_id++;
596 oe->count = repeat;
597 oe->stmt_to_insert = NULL;
598 ops->safe_push (oe);
600 reassociate_stats.pows_encountered++;
603 /* Return true if STMT is reassociable operation containing a binary
604 operation with tree code CODE, and is inside LOOP. */
606 static bool
607 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
609 basic_block bb = gimple_bb (stmt);
611 if (gimple_bb (stmt) == NULL)
612 return false;
614 if (!flow_bb_inside_loop_p (loop, bb))
615 return false;
617 if (is_gimple_assign (stmt)
618 && gimple_assign_rhs_code (stmt) == code
619 && has_single_use (gimple_assign_lhs (stmt)))
621 tree rhs1 = gimple_assign_rhs1 (stmt);
622 tree rhs2 = gimple_assign_rhs2 (stmt);
623 if (TREE_CODE (rhs1) == SSA_NAME
624 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
625 return false;
626 if (rhs2
627 && TREE_CODE (rhs2) == SSA_NAME
628 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
629 return false;
630 return true;
633 return false;
637 /* Return true if STMT is a nop-conversion. */
639 static bool
640 gimple_nop_conversion_p (gimple *stmt)
642 if (gassign *ass = dyn_cast <gassign *> (stmt))
644 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
645 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
646 TREE_TYPE (gimple_assign_rhs1 (ass))))
647 return true;
649 return false;
652 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
653 operand of the negate operation. Otherwise, return NULL. */
655 static tree
656 get_unary_op (tree name, enum tree_code opcode)
658 gimple *stmt = SSA_NAME_DEF_STMT (name);
660 /* Look through nop conversions (sign changes). */
661 if (gimple_nop_conversion_p (stmt)
662 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
663 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
665 if (!is_gimple_assign (stmt))
666 return NULL_TREE;
668 if (gimple_assign_rhs_code (stmt) == opcode)
669 return gimple_assign_rhs1 (stmt);
670 return NULL_TREE;
673 /* Return true if OP1 and OP2 have the same value if casted to either type. */
675 static bool
676 ops_equal_values_p (tree op1, tree op2)
678 if (op1 == op2)
679 return true;
681 tree orig_op1 = op1;
682 if (TREE_CODE (op1) == SSA_NAME)
684 gimple *stmt = SSA_NAME_DEF_STMT (op1);
685 if (gimple_nop_conversion_p (stmt))
687 op1 = gimple_assign_rhs1 (stmt);
688 if (op1 == op2)
689 return true;
693 if (TREE_CODE (op2) == SSA_NAME)
695 gimple *stmt = SSA_NAME_DEF_STMT (op2);
696 if (gimple_nop_conversion_p (stmt))
698 op2 = gimple_assign_rhs1 (stmt);
699 if (op1 == op2
700 || orig_op1 == op2)
701 return true;
705 return false;
709 /* If CURR and LAST are a pair of ops that OPCODE allows us to
710 eliminate through equivalences, do so, remove them from OPS, and
711 return true. Otherwise, return false. */
713 static bool
714 eliminate_duplicate_pair (enum tree_code opcode,
715 vec<operand_entry *> *ops,
716 bool *all_done,
717 unsigned int i,
718 operand_entry *curr,
719 operand_entry *last)
722 /* If we have two of the same op, and the opcode is & |, min, or max,
723 we can eliminate one of them.
724 If we have two of the same op, and the opcode is ^, we can
725 eliminate both of them. */
727 if (last && last->op == curr->op)
729 switch (opcode)
731 case MAX_EXPR:
732 case MIN_EXPR:
733 case BIT_IOR_EXPR:
734 case BIT_AND_EXPR:
735 if (dump_file && (dump_flags & TDF_DETAILS))
737 fprintf (dump_file, "Equivalence: ");
738 print_generic_expr (dump_file, curr->op);
739 fprintf (dump_file, " [&|minmax] ");
740 print_generic_expr (dump_file, last->op);
741 fprintf (dump_file, " -> ");
742 print_generic_stmt (dump_file, last->op);
745 ops->ordered_remove (i);
746 reassociate_stats.ops_eliminated ++;
748 return true;
750 case BIT_XOR_EXPR:
751 if (dump_file && (dump_flags & TDF_DETAILS))
753 fprintf (dump_file, "Equivalence: ");
754 print_generic_expr (dump_file, curr->op);
755 fprintf (dump_file, " ^ ");
756 print_generic_expr (dump_file, last->op);
757 fprintf (dump_file, " -> nothing\n");
760 reassociate_stats.ops_eliminated += 2;
762 if (ops->length () == 2)
764 ops->truncate (0);
765 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
766 *all_done = true;
768 else
770 ops->ordered_remove (i-1);
771 ops->ordered_remove (i-1);
774 return true;
776 default:
777 break;
780 return false;
783 static vec<tree> plus_negates;
785 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
786 expression, look in OPS for a corresponding positive operation to cancel
787 it out. If we find one, remove the other from OPS, replace
788 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
789 return false. */
791 static bool
792 eliminate_plus_minus_pair (enum tree_code opcode,
793 vec<operand_entry *> *ops,
794 unsigned int currindex,
795 operand_entry *curr)
797 tree negateop;
798 tree notop;
799 unsigned int i;
800 operand_entry *oe;
802 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
803 return false;
805 negateop = get_unary_op (curr->op, NEGATE_EXPR);
806 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
807 if (negateop == NULL_TREE && notop == NULL_TREE)
808 return false;
810 /* Any non-negated version will have a rank that is one less than
811 the current rank. So once we hit those ranks, if we don't find
812 one, we can stop. */
814 for (i = currindex + 1;
815 ops->iterate (i, &oe)
816 && oe->rank >= curr->rank - 1 ;
817 i++)
819 if (negateop
820 && ops_equal_values_p (oe->op, negateop))
822 if (dump_file && (dump_flags & TDF_DETAILS))
824 fprintf (dump_file, "Equivalence: ");
825 print_generic_expr (dump_file, negateop);
826 fprintf (dump_file, " + -");
827 print_generic_expr (dump_file, oe->op);
828 fprintf (dump_file, " -> 0\n");
831 ops->ordered_remove (i);
832 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
833 ops->ordered_remove (currindex);
834 reassociate_stats.ops_eliminated ++;
836 return true;
838 else if (notop
839 && ops_equal_values_p (oe->op, notop))
841 tree op_type = TREE_TYPE (oe->op);
843 if (dump_file && (dump_flags & TDF_DETAILS))
845 fprintf (dump_file, "Equivalence: ");
846 print_generic_expr (dump_file, notop);
847 fprintf (dump_file, " + ~");
848 print_generic_expr (dump_file, oe->op);
849 fprintf (dump_file, " -> -1\n");
852 ops->ordered_remove (i);
853 add_to_ops_vec (ops, build_all_ones_cst (op_type));
854 ops->ordered_remove (currindex);
855 reassociate_stats.ops_eliminated ++;
857 return true;
861 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
862 save it for later inspection in repropagate_negates(). */
863 if (negateop != NULL_TREE
864 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
865 plus_negates.safe_push (curr->op);
867 return false;
870 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
871 bitwise not expression, look in OPS for a corresponding operand to
872 cancel it out. If we find one, remove the other from OPS, replace
873 OPS[CURRINDEX] with 0, and return true. Otherwise, return
874 false. */
876 static bool
877 eliminate_not_pairs (enum tree_code opcode,
878 vec<operand_entry *> *ops,
879 unsigned int currindex,
880 operand_entry *curr)
882 tree notop;
883 unsigned int i;
884 operand_entry *oe;
886 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
887 || TREE_CODE (curr->op) != SSA_NAME)
888 return false;
890 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
891 if (notop == NULL_TREE)
892 return false;
894 /* Any non-not version will have a rank that is one less than
895 the current rank. So once we hit those ranks, if we don't find
896 one, we can stop. */
898 for (i = currindex + 1;
899 ops->iterate (i, &oe)
900 && oe->rank >= curr->rank - 1;
901 i++)
903 if (oe->op == notop)
905 if (dump_file && (dump_flags & TDF_DETAILS))
907 fprintf (dump_file, "Equivalence: ");
908 print_generic_expr (dump_file, notop);
909 if (opcode == BIT_AND_EXPR)
910 fprintf (dump_file, " & ~");
911 else if (opcode == BIT_IOR_EXPR)
912 fprintf (dump_file, " | ~");
913 print_generic_expr (dump_file, oe->op);
914 if (opcode == BIT_AND_EXPR)
915 fprintf (dump_file, " -> 0\n");
916 else if (opcode == BIT_IOR_EXPR)
917 fprintf (dump_file, " -> -1\n");
920 if (opcode == BIT_AND_EXPR)
921 oe->op = build_zero_cst (TREE_TYPE (oe->op));
922 else if (opcode == BIT_IOR_EXPR)
923 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
925 reassociate_stats.ops_eliminated += ops->length () - 1;
926 ops->truncate (0);
927 ops->quick_push (oe);
928 return true;
932 return false;
935 /* Use constant value that may be present in OPS to try to eliminate
936 operands. Note that this function is only really used when we've
937 eliminated ops for other reasons, or merged constants. Across
938 single statements, fold already does all of this, plus more. There
939 is little point in duplicating logic, so I've only included the
940 identities that I could ever construct testcases to trigger. */
942 static void
943 eliminate_using_constants (enum tree_code opcode,
944 vec<operand_entry *> *ops)
946 operand_entry *oelast = ops->last ();
947 tree type = TREE_TYPE (oelast->op);
949 if (oelast->rank == 0
950 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
952 switch (opcode)
954 case BIT_AND_EXPR:
955 if (integer_zerop (oelast->op))
957 if (ops->length () != 1)
959 if (dump_file && (dump_flags & TDF_DETAILS))
960 fprintf (dump_file, "Found & 0, removing all other ops\n");
962 reassociate_stats.ops_eliminated += ops->length () - 1;
964 ops->truncate (0);
965 ops->quick_push (oelast);
966 return;
969 else if (integer_all_onesp (oelast->op))
971 if (ops->length () != 1)
973 if (dump_file && (dump_flags & TDF_DETAILS))
974 fprintf (dump_file, "Found & -1, removing\n");
975 ops->pop ();
976 reassociate_stats.ops_eliminated++;
979 break;
980 case BIT_IOR_EXPR:
981 if (integer_all_onesp (oelast->op))
983 if (ops->length () != 1)
985 if (dump_file && (dump_flags & TDF_DETAILS))
986 fprintf (dump_file, "Found | -1, removing all other ops\n");
988 reassociate_stats.ops_eliminated += ops->length () - 1;
990 ops->truncate (0);
991 ops->quick_push (oelast);
992 return;
995 else if (integer_zerop (oelast->op))
997 if (ops->length () != 1)
999 if (dump_file && (dump_flags & TDF_DETAILS))
1000 fprintf (dump_file, "Found | 0, removing\n");
1001 ops->pop ();
1002 reassociate_stats.ops_eliminated++;
1005 break;
1006 case MULT_EXPR:
1007 if (integer_zerop (oelast->op)
1008 || (FLOAT_TYPE_P (type)
1009 && !HONOR_NANS (type)
1010 && !HONOR_SIGNED_ZEROS (type)
1011 && real_zerop (oelast->op)))
1013 if (ops->length () != 1)
1015 if (dump_file && (dump_flags & TDF_DETAILS))
1016 fprintf (dump_file, "Found * 0, removing all other ops\n");
1018 reassociate_stats.ops_eliminated += ops->length () - 1;
1019 ops->truncate (0);
1020 ops->quick_push (oelast);
1021 return;
1024 else if (integer_onep (oelast->op)
1025 || (FLOAT_TYPE_P (type)
1026 && !HONOR_SNANS (type)
1027 && real_onep (oelast->op)))
1029 if (ops->length () != 1)
1031 if (dump_file && (dump_flags & TDF_DETAILS))
1032 fprintf (dump_file, "Found * 1, removing\n");
1033 ops->pop ();
1034 reassociate_stats.ops_eliminated++;
1035 return;
1038 break;
1039 case BIT_XOR_EXPR:
1040 case PLUS_EXPR:
1041 case MINUS_EXPR:
1042 if (integer_zerop (oelast->op)
1043 || (FLOAT_TYPE_P (type)
1044 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1045 && fold_real_zero_addition_p (type, oelast->op,
1046 opcode == MINUS_EXPR)))
1048 if (ops->length () != 1)
1050 if (dump_file && (dump_flags & TDF_DETAILS))
1051 fprintf (dump_file, "Found [|^+] 0, removing\n");
1052 ops->pop ();
1053 reassociate_stats.ops_eliminated++;
1054 return;
1057 break;
1058 default:
1059 break;
1065 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1066 bool, bool);
1068 /* Structure for tracking and counting operands. */
1069 struct oecount {
1070 unsigned int cnt;
1071 unsigned int id;
1072 enum tree_code oecode;
1073 tree op;
1077 /* The heap for the oecount hashtable and the sorted list of operands. */
1078 static vec<oecount> cvec;
1081 /* Oecount hashtable helpers. */
1083 struct oecount_hasher : int_hash <int, 0, 1>
1085 static inline hashval_t hash (int);
1086 static inline bool equal (int, int);
1089 /* Hash function for oecount. */
1091 inline hashval_t
1092 oecount_hasher::hash (int p)
1094 const oecount *c = &cvec[p - 42];
1095 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1098 /* Comparison function for oecount. */
1100 inline bool
1101 oecount_hasher::equal (int p1, int p2)
1103 const oecount *c1 = &cvec[p1 - 42];
1104 const oecount *c2 = &cvec[p2 - 42];
1105 return c1->oecode == c2->oecode && c1->op == c2->op;
1108 /* Comparison function for qsort sorting oecount elements by count. */
1110 static int
1111 oecount_cmp (const void *p1, const void *p2)
1113 const oecount *c1 = (const oecount *)p1;
1114 const oecount *c2 = (const oecount *)p2;
1115 if (c1->cnt != c2->cnt)
1116 return c1->cnt > c2->cnt ? 1 : -1;
1117 else
1118 /* If counts are identical, use unique IDs to stabilize qsort. */
1119 return c1->id > c2->id ? 1 : -1;
1122 /* Return TRUE iff STMT represents a builtin call that raises OP
1123 to some exponent. */
1125 static bool
1126 stmt_is_power_of_op (gimple *stmt, tree op)
1128 if (!is_gimple_call (stmt))
1129 return false;
1131 switch (gimple_call_combined_fn (stmt))
1133 CASE_CFN_POW:
1134 CASE_CFN_POWI:
1135 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1137 default:
1138 return false;
1142 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1143 in place and return the result. Assumes that stmt_is_power_of_op
1144 was previously called for STMT and returned TRUE. */
1146 static HOST_WIDE_INT
1147 decrement_power (gimple *stmt)
1149 REAL_VALUE_TYPE c, cint;
1150 HOST_WIDE_INT power;
1151 tree arg1;
1153 switch (gimple_call_combined_fn (stmt))
1155 CASE_CFN_POW:
1156 arg1 = gimple_call_arg (stmt, 1);
1157 c = TREE_REAL_CST (arg1);
1158 power = real_to_integer (&c) - 1;
1159 real_from_integer (&cint, VOIDmode, power, SIGNED);
1160 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1161 return power;
1163 CASE_CFN_POWI:
1164 arg1 = gimple_call_arg (stmt, 1);
1165 power = TREE_INT_CST_LOW (arg1) - 1;
1166 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1167 return power;
1169 default:
1170 gcc_unreachable ();
1174 /* Replace SSA defined by STMT and replace all its uses with new
1175 SSA. Also return the new SSA. */
1177 static tree
1178 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1180 gimple *use_stmt;
1181 use_operand_p use;
1182 imm_use_iterator iter;
1183 tree new_lhs, new_debug_lhs = NULL_TREE;
1184 tree lhs = gimple_get_lhs (stmt);
1186 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1187 gimple_set_lhs (stmt, new_lhs);
1189 /* Also need to update GIMPLE_DEBUGs. */
1190 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1192 tree repl = new_lhs;
1193 if (is_gimple_debug (use_stmt))
1195 if (new_debug_lhs == NULL_TREE)
1197 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1198 gdebug *def_temp
1199 = gimple_build_debug_bind (new_debug_lhs,
1200 build2 (opcode, TREE_TYPE (lhs),
1201 new_lhs, op),
1202 stmt);
1203 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1204 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1205 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1206 gimple_set_uid (def_temp, gimple_uid (stmt));
1207 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1208 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1210 repl = new_debug_lhs;
1212 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1213 SET_USE (use, repl);
1214 update_stmt (use_stmt);
1216 return new_lhs;
1219 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1220 uses with new SSAs. Also do this for the stmt that defines DEF
1221 if *DEF is not OP. */
1223 static void
1224 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1225 vec<gimple *> &stmts_to_fix)
1227 unsigned i;
1228 gimple *stmt;
1230 if (*def != op
1231 && TREE_CODE (*def) == SSA_NAME
1232 && (stmt = SSA_NAME_DEF_STMT (*def))
1233 && gimple_code (stmt) != GIMPLE_NOP)
1234 *def = make_new_ssa_for_def (stmt, opcode, op);
1236 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1237 make_new_ssa_for_def (stmt, opcode, op);
1240 /* Find the single immediate use of STMT's LHS, and replace it
1241 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1242 replace *DEF with OP as well. */
1244 static void
1245 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1247 tree lhs;
1248 gimple *use_stmt;
1249 use_operand_p use;
1250 gimple_stmt_iterator gsi;
1252 if (is_gimple_call (stmt))
1253 lhs = gimple_call_lhs (stmt);
1254 else
1255 lhs = gimple_assign_lhs (stmt);
1257 gcc_assert (has_single_use (lhs));
1258 single_imm_use (lhs, &use, &use_stmt);
1259 if (lhs == *def)
1260 *def = op;
1261 SET_USE (use, op);
1262 if (TREE_CODE (op) != SSA_NAME)
1263 update_stmt (use_stmt);
1264 gsi = gsi_for_stmt (stmt);
1265 unlink_stmt_vdef (stmt);
1266 reassoc_remove_stmt (&gsi);
1267 release_defs (stmt);
1270 /* Walks the linear chain with result *DEF searching for an operation
1271 with operand OP and code OPCODE removing that from the chain. *DEF
1272 is updated if there is only one operand but no operation left. */
1274 static void
1275 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1277 tree orig_def = *def;
1278 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1279 /* PR72835 - Record the stmt chain that has to be updated such that
1280 we dont use the same LHS when the values computed are different. */
1281 auto_vec<gimple *, 64> stmts_to_fix;
1285 tree name;
1287 if (opcode == MULT_EXPR)
1289 if (stmt_is_power_of_op (stmt, op))
1291 if (decrement_power (stmt) == 1)
1293 if (stmts_to_fix.length () > 0)
1294 stmts_to_fix.pop ();
1295 propagate_op_to_single_use (op, stmt, def);
1297 break;
1299 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1301 if (gimple_assign_rhs1 (stmt) == op)
1303 tree cst = build_minus_one_cst (TREE_TYPE (op));
1304 if (stmts_to_fix.length () > 0)
1305 stmts_to_fix.pop ();
1306 propagate_op_to_single_use (cst, stmt, def);
1307 break;
1309 else if (integer_minus_onep (op)
1310 || real_minus_onep (op))
1312 gimple_assign_set_rhs_code
1313 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1314 break;
1319 name = gimple_assign_rhs1 (stmt);
1321 /* If this is the operation we look for and one of the operands
1322 is ours simply propagate the other operand into the stmts
1323 single use. */
1324 if (gimple_assign_rhs_code (stmt) == opcode
1325 && (name == op
1326 || gimple_assign_rhs2 (stmt) == op))
1328 if (name == op)
1329 name = gimple_assign_rhs2 (stmt);
1330 if (stmts_to_fix.length () > 0)
1331 stmts_to_fix.pop ();
1332 propagate_op_to_single_use (name, stmt, def);
1333 break;
1336 /* We might have a multiply of two __builtin_pow* calls, and
1337 the operand might be hiding in the rightmost one. Likewise
1338 this can happen for a negate. */
1339 if (opcode == MULT_EXPR
1340 && gimple_assign_rhs_code (stmt) == opcode
1341 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1342 && has_single_use (gimple_assign_rhs2 (stmt)))
1344 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1345 if (stmt_is_power_of_op (stmt2, op))
1347 if (decrement_power (stmt2) == 1)
1348 propagate_op_to_single_use (op, stmt2, def);
1349 else
1350 stmts_to_fix.safe_push (stmt2);
1351 break;
1353 else if (is_gimple_assign (stmt2)
1354 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1356 if (gimple_assign_rhs1 (stmt2) == op)
1358 tree cst = build_minus_one_cst (TREE_TYPE (op));
1359 propagate_op_to_single_use (cst, stmt2, def);
1360 break;
1362 else if (integer_minus_onep (op)
1363 || real_minus_onep (op))
1365 stmts_to_fix.safe_push (stmt2);
1366 gimple_assign_set_rhs_code
1367 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1368 break;
1373 /* Continue walking the chain. */
1374 gcc_assert (name != op
1375 && TREE_CODE (name) == SSA_NAME);
1376 stmt = SSA_NAME_DEF_STMT (name);
1377 stmts_to_fix.safe_push (stmt);
1379 while (1);
1381 if (stmts_to_fix.length () > 0 || *def == orig_def)
1382 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1385 /* Returns true if statement S1 dominates statement S2. Like
1386 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1388 static bool
1389 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1391 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1393 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1394 SSA_NAME. Assume it lives at the beginning of function and
1395 thus dominates everything. */
1396 if (!bb1 || s1 == s2)
1397 return true;
1399 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1400 if (!bb2)
1401 return false;
1403 if (bb1 == bb2)
1405 /* PHIs in the same basic block are assumed to be
1406 executed all in parallel, if only one stmt is a PHI,
1407 it dominates the other stmt in the same basic block. */
1408 if (gimple_code (s1) == GIMPLE_PHI)
1409 return true;
1411 if (gimple_code (s2) == GIMPLE_PHI)
1412 return false;
1414 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1416 if (gimple_uid (s1) < gimple_uid (s2))
1417 return true;
1419 if (gimple_uid (s1) > gimple_uid (s2))
1420 return false;
1422 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1423 unsigned int uid = gimple_uid (s1);
1424 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1426 gimple *s = gsi_stmt (gsi);
1427 if (gimple_uid (s) != uid)
1428 break;
1429 if (s == s2)
1430 return true;
1433 return false;
1436 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1439 /* Insert STMT after INSERT_POINT. */
1441 static void
1442 insert_stmt_after (gimple *stmt, gimple *insert_point)
1444 gimple_stmt_iterator gsi;
1445 basic_block bb;
1447 if (gimple_code (insert_point) == GIMPLE_PHI)
1448 bb = gimple_bb (insert_point);
1449 else if (!stmt_ends_bb_p (insert_point))
1451 gsi = gsi_for_stmt (insert_point);
1452 gimple_set_uid (stmt, gimple_uid (insert_point));
1453 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1454 return;
1456 else
1457 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1458 thus if it must end a basic block, it should be a call that can
1459 throw, or some assignment that can throw. If it throws, the LHS
1460 of it will not be initialized though, so only valid places using
1461 the SSA_NAME should be dominated by the fallthru edge. */
1462 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1463 gsi = gsi_after_labels (bb);
1464 if (gsi_end_p (gsi))
1466 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1467 gimple_set_uid (stmt,
1468 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1470 else
1471 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1472 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1475 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1476 the result. Places the statement after the definition of either
1477 OP1 or OP2. Returns the new statement. */
1479 static gimple *
1480 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1482 gimple *op1def = NULL, *op2def = NULL;
1483 gimple_stmt_iterator gsi;
1484 tree op;
1485 gassign *sum;
1487 /* Create the addition statement. */
1488 op = make_ssa_name (type);
1489 sum = gimple_build_assign (op, opcode, op1, op2);
1491 /* Find an insertion place and insert. */
1492 if (TREE_CODE (op1) == SSA_NAME)
1493 op1def = SSA_NAME_DEF_STMT (op1);
1494 if (TREE_CODE (op2) == SSA_NAME)
1495 op2def = SSA_NAME_DEF_STMT (op2);
1496 if ((!op1def || gimple_nop_p (op1def))
1497 && (!op2def || gimple_nop_p (op2def)))
1499 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1500 if (gsi_end_p (gsi))
1502 gimple_stmt_iterator gsi2
1503 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1504 gimple_set_uid (sum,
1505 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1507 else
1508 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1509 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1511 else
1513 gimple *insert_point;
1514 if ((!op1def || gimple_nop_p (op1def))
1515 || (op2def && !gimple_nop_p (op2def)
1516 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1517 insert_point = op2def;
1518 else
1519 insert_point = op1def;
1520 insert_stmt_after (sum, insert_point);
1522 update_stmt (sum);
1524 return sum;
1527 /* Perform un-distribution of divisions and multiplications.
1528 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1529 to (A + B) / X for real X.
1531 The algorithm is organized as follows.
1533 - First we walk the addition chain *OPS looking for summands that
1534 are defined by a multiplication or a real division. This results
1535 in the candidates bitmap with relevant indices into *OPS.
1537 - Second we build the chains of multiplications or divisions for
1538 these candidates, counting the number of occurrences of (operand, code)
1539 pairs in all of the candidates chains.
1541 - Third we sort the (operand, code) pairs by number of occurrence and
1542 process them starting with the pair with the most uses.
1544 * For each such pair we walk the candidates again to build a
1545 second candidate bitmap noting all multiplication/division chains
1546 that have at least one occurrence of (operand, code).
1548 * We build an alternate addition chain only covering these
1549 candidates with one (operand, code) operation removed from their
1550 multiplication/division chain.
1552 * The first candidate gets replaced by the alternate addition chain
1553 multiplied/divided by the operand.
1555 * All candidate chains get disabled for further processing and
1556 processing of (operand, code) pairs continues.
1558 The alternate addition chains built are re-processed by the main
1559 reassociation algorithm which allows optimizing a * x * y + b * y * x
1560 to (a + b ) * x * y in one invocation of the reassociation pass. */
1562 static bool
1563 undistribute_ops_list (enum tree_code opcode,
1564 vec<operand_entry *> *ops, class loop *loop)
1566 unsigned int length = ops->length ();
1567 operand_entry *oe1;
1568 unsigned i, j;
1569 unsigned nr_candidates, nr_candidates2;
1570 sbitmap_iterator sbi0;
1571 vec<operand_entry *> *subops;
1572 bool changed = false;
1573 unsigned int next_oecount_id = 0;
1575 if (length <= 1
1576 || opcode != PLUS_EXPR)
1577 return false;
1579 /* Build a list of candidates to process. */
1580 auto_sbitmap candidates (length);
1581 bitmap_clear (candidates);
1582 nr_candidates = 0;
1583 FOR_EACH_VEC_ELT (*ops, i, oe1)
1585 enum tree_code dcode;
1586 gimple *oe1def;
1588 if (TREE_CODE (oe1->op) != SSA_NAME)
1589 continue;
1590 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1591 if (!is_gimple_assign (oe1def))
1592 continue;
1593 dcode = gimple_assign_rhs_code (oe1def);
1594 if ((dcode != MULT_EXPR
1595 && dcode != RDIV_EXPR)
1596 || !is_reassociable_op (oe1def, dcode, loop))
1597 continue;
1599 bitmap_set_bit (candidates, i);
1600 nr_candidates++;
1603 if (nr_candidates < 2)
1604 return false;
1606 if (dump_file && (dump_flags & TDF_DETAILS))
1608 fprintf (dump_file, "searching for un-distribute opportunities ");
1609 print_generic_expr (dump_file,
1610 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1611 fprintf (dump_file, " %d\n", nr_candidates);
1614 /* Build linearized sub-operand lists and the counting table. */
1615 cvec.create (0);
1617 hash_table<oecount_hasher> ctable (15);
1619 /* ??? Macro arguments cannot have multi-argument template types in
1620 them. This typedef is needed to workaround that limitation. */
1621 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1622 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1623 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1625 gimple *oedef;
1626 enum tree_code oecode;
1627 unsigned j;
1629 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1630 oecode = gimple_assign_rhs_code (oedef);
1631 linearize_expr_tree (&subops[i], oedef,
1632 associative_tree_code (oecode), false);
1634 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1636 oecount c;
1637 int *slot;
1638 int idx;
1639 c.oecode = oecode;
1640 c.cnt = 1;
1641 c.id = next_oecount_id++;
1642 c.op = oe1->op;
1643 cvec.safe_push (c);
1644 idx = cvec.length () + 41;
1645 slot = ctable.find_slot (idx, INSERT);
1646 if (!*slot)
1648 *slot = idx;
1650 else
1652 cvec.pop ();
1653 cvec[*slot - 42].cnt++;
1658 /* Sort the counting table. */
1659 cvec.qsort (oecount_cmp);
1661 if (dump_file && (dump_flags & TDF_DETAILS))
1663 oecount *c;
1664 fprintf (dump_file, "Candidates:\n");
1665 FOR_EACH_VEC_ELT (cvec, j, c)
1667 fprintf (dump_file, " %u %s: ", c->cnt,
1668 c->oecode == MULT_EXPR
1669 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1670 print_generic_expr (dump_file, c->op);
1671 fprintf (dump_file, "\n");
1675 /* Process the (operand, code) pairs in order of most occurrence. */
1676 auto_sbitmap candidates2 (length);
1677 while (!cvec.is_empty ())
1679 oecount *c = &cvec.last ();
1680 if (c->cnt < 2)
1681 break;
1683 /* Now collect the operands in the outer chain that contain
1684 the common operand in their inner chain. */
1685 bitmap_clear (candidates2);
1686 nr_candidates2 = 0;
1687 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1689 gimple *oedef;
1690 enum tree_code oecode;
1691 unsigned j;
1692 tree op = (*ops)[i]->op;
1694 /* If we undistributed in this chain already this may be
1695 a constant. */
1696 if (TREE_CODE (op) != SSA_NAME)
1697 continue;
1699 oedef = SSA_NAME_DEF_STMT (op);
1700 oecode = gimple_assign_rhs_code (oedef);
1701 if (oecode != c->oecode)
1702 continue;
1704 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1706 if (oe1->op == c->op)
1708 bitmap_set_bit (candidates2, i);
1709 ++nr_candidates2;
1710 break;
1715 if (nr_candidates2 >= 2)
1717 operand_entry *oe1, *oe2;
1718 gimple *prod;
1719 int first = bitmap_first_set_bit (candidates2);
1721 /* Build the new addition chain. */
1722 oe1 = (*ops)[first];
1723 if (dump_file && (dump_flags & TDF_DETAILS))
1725 fprintf (dump_file, "Building (");
1726 print_generic_expr (dump_file, oe1->op);
1728 zero_one_operation (&oe1->op, c->oecode, c->op);
1729 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1731 gimple *sum;
1732 oe2 = (*ops)[i];
1733 if (dump_file && (dump_flags & TDF_DETAILS))
1735 fprintf (dump_file, " + ");
1736 print_generic_expr (dump_file, oe2->op);
1738 zero_one_operation (&oe2->op, c->oecode, c->op);
1739 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1740 oe1->op, oe2->op, opcode);
1741 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1742 oe2->rank = 0;
1743 oe1->op = gimple_get_lhs (sum);
1746 /* Apply the multiplication/division. */
1747 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1748 oe1->op, c->op, c->oecode);
1749 if (dump_file && (dump_flags & TDF_DETAILS))
1751 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1752 print_generic_expr (dump_file, c->op);
1753 fprintf (dump_file, "\n");
1756 /* Record it in the addition chain and disable further
1757 undistribution with this op. */
1758 oe1->op = gimple_assign_lhs (prod);
1759 oe1->rank = get_rank (oe1->op);
1760 subops[first].release ();
1762 changed = true;
1765 cvec.pop ();
1768 for (i = 0; i < ops->length (); ++i)
1769 subops[i].release ();
1770 free (subops);
1771 cvec.release ();
1773 return changed;
1776 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1777 first: element index for each relevant BIT_FIELD_REF.
1778 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1779 typedef std::pair<unsigned, unsigned> v_info_elem;
1780 struct v_info {
1781 tree vec_type;
1782 auto_vec<v_info_elem, 32> vec;
1784 typedef v_info *v_info_ptr;
1786 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1787 static int
1788 sort_by_mach_mode (const void *p_i, const void *p_j)
1790 const tree tr1 = *((const tree *) p_i);
1791 const tree tr2 = *((const tree *) p_j);
1792 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1793 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1794 if (mode1 > mode2)
1795 return 1;
1796 else if (mode1 < mode2)
1797 return -1;
1798 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1799 return -1;
1800 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1801 return 1;
1802 return 0;
1805 /* Cleanup hash map for VECTOR information. */
1806 static void
1807 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1809 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1810 it != info_map.end (); ++it)
1812 v_info_ptr info = (*it).second;
1813 delete info;
1814 (*it).second = NULL;
1818 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1819 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1820 is transformed to
1821 Vs = (V1 + V2 + ... + Vn)
1822 Vs[0] + Vs[1] + ... + Vs[k]
1824 The basic steps are listed below:
1826 1) Check the addition chain *OPS by looking those summands coming from
1827 VECTOR bit_field_ref on VECTOR type. Put the information into
1828 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1830 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1831 continuous, they can cover the whole VECTOR perfectly without any holes.
1832 Obtain one VECTOR list which contain candidates to be transformed.
1834 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1835 candidates with same mode, build the addition statements for them and
1836 generate BIT_FIELD_REFs accordingly.
1838 TODO:
1839 The current implementation requires the whole VECTORs should be fully
1840 covered, but it can be extended to support partial, checking adjacent
1841 but not fill the whole, it may need some cost model to define the
1842 boundary to do or not.
1844 static bool
1845 undistribute_bitref_for_vector (enum tree_code opcode,
1846 vec<operand_entry *> *ops, struct loop *loop)
1848 if (ops->length () <= 1)
1849 return false;
1851 if (opcode != PLUS_EXPR
1852 && opcode != MULT_EXPR
1853 && opcode != BIT_XOR_EXPR
1854 && opcode != BIT_IOR_EXPR
1855 && opcode != BIT_AND_EXPR)
1856 return false;
1858 hash_map<tree, v_info_ptr> v_info_map;
1859 operand_entry *oe1;
1860 unsigned i;
1862 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1863 information into map. */
1864 FOR_EACH_VEC_ELT (*ops, i, oe1)
1866 enum tree_code dcode;
1867 gimple *oe1def;
1869 if (TREE_CODE (oe1->op) != SSA_NAME)
1870 continue;
1871 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1872 if (!is_gimple_assign (oe1def))
1873 continue;
1874 dcode = gimple_assign_rhs_code (oe1def);
1875 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1876 continue;
1878 tree rhs = gimple_assign_rhs1 (oe1def);
1879 tree vec = TREE_OPERAND (rhs, 0);
1880 tree vec_type = TREE_TYPE (vec);
1882 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1883 continue;
1885 /* Ignore it if target machine can't support this VECTOR type. */
1886 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1887 continue;
1889 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1890 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1891 continue;
1893 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1894 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1895 continue;
1897 /* The type of BIT_FIELD_REF might not be equal to the element type of
1898 the vector. We want to use a vector type with element type the
1899 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1900 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1902 machine_mode simd_mode;
1903 unsigned HOST_WIDE_INT size, nunits;
1904 unsigned HOST_WIDE_INT elem_size
1905 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1906 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1907 continue;
1908 if (size <= elem_size || (size % elem_size) != 0)
1909 continue;
1910 nunits = size / elem_size;
1911 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1912 nunits).exists (&simd_mode))
1913 continue;
1914 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1916 /* Ignore it if target machine can't support this VECTOR type. */
1917 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1918 continue;
1920 /* Check const vector type, constrain BIT_FIELD_REF offset and
1921 size. */
1922 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1923 continue;
1925 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1926 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1927 continue;
1930 tree elem_type = TREE_TYPE (vec_type);
1931 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
1932 if (maybe_ne (bit_field_size (rhs), elem_size))
1933 continue;
1935 unsigned idx;
1936 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
1937 continue;
1939 /* Ignore it if target machine can't support this type of VECTOR
1940 operation. */
1941 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
1942 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
1943 continue;
1945 bool existed;
1946 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
1947 if (!existed)
1949 info = new v_info;
1950 info->vec_type = vec_type;
1952 else if (!types_compatible_p (vec_type, info->vec_type))
1953 continue;
1954 info->vec.safe_push (std::make_pair (idx, i));
1957 /* At least two VECTOR to combine. */
1958 if (v_info_map.elements () <= 1)
1960 cleanup_vinfo_map (v_info_map);
1961 return false;
1964 /* Verify all VECTOR candidates by checking two conditions:
1965 1) sorted offsets are adjacent, no holes.
1966 2) can fill the whole VECTOR perfectly.
1967 And add the valid candidates to a vector for further handling. */
1968 auto_vec<tree> valid_vecs (v_info_map.elements ());
1969 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
1970 it != v_info_map.end (); ++it)
1972 tree cand_vec = (*it).first;
1973 v_info_ptr cand_info = (*it).second;
1974 unsigned int num_elems
1975 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
1976 if (cand_info->vec.length () != num_elems)
1977 continue;
1978 sbitmap holes = sbitmap_alloc (num_elems);
1979 bitmap_ones (holes);
1980 bool valid = true;
1981 v_info_elem *curr;
1982 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
1984 if (!bitmap_bit_p (holes, curr->first))
1986 valid = false;
1987 break;
1989 else
1990 bitmap_clear_bit (holes, curr->first);
1992 if (valid && bitmap_empty_p (holes))
1993 valid_vecs.quick_push (cand_vec);
1994 sbitmap_free (holes);
1997 /* At least two VECTOR to combine. */
1998 if (valid_vecs.length () <= 1)
2000 cleanup_vinfo_map (v_info_map);
2001 return false;
2004 valid_vecs.qsort (sort_by_mach_mode);
2005 /* Go through all candidates by machine mode order, query the mode_to_total
2006 to get the total number for each mode and skip the single one. */
2007 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2009 tree tvec = valid_vecs[i];
2010 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2012 /* Skip modes with only a single candidate. */
2013 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2014 continue;
2016 unsigned int idx, j;
2017 gimple *sum = NULL;
2018 tree sum_vec = tvec;
2019 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2020 v_info_elem *elem;
2021 tree vec_type = info_ptr->vec_type;
2023 /* Build the sum for all candidates with same mode. */
2026 sum = build_and_add_sum (vec_type, sum_vec,
2027 valid_vecs[i + 1], opcode);
2028 if (!useless_type_conversion_p (vec_type,
2029 TREE_TYPE (valid_vecs[i + 1])))
2031 /* Update the operands only after build_and_add_sum,
2032 so that we don't have to repeat the placement algorithm
2033 of build_and_add_sum. */
2034 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2035 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2036 valid_vecs[i + 1]);
2037 tree lhs = make_ssa_name (vec_type);
2038 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2039 gimple_set_uid (g, gimple_uid (sum));
2040 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2041 gimple_assign_set_rhs2 (sum, lhs);
2042 if (sum_vec == tvec)
2044 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2045 lhs = make_ssa_name (vec_type);
2046 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2047 gimple_set_uid (g, gimple_uid (sum));
2048 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2049 gimple_assign_set_rhs1 (sum, lhs);
2051 update_stmt (sum);
2053 sum_vec = gimple_get_lhs (sum);
2054 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2055 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2056 /* Update those related ops of current candidate VECTOR. */
2057 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2059 idx = elem->second;
2060 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2061 /* Set this then op definition will get DCEd later. */
2062 gimple_set_visited (def, true);
2063 if (opcode == PLUS_EXPR
2064 || opcode == BIT_XOR_EXPR
2065 || opcode == BIT_IOR_EXPR)
2066 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2067 else if (opcode == MULT_EXPR)
2068 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2069 else
2071 gcc_assert (opcode == BIT_AND_EXPR);
2072 (*ops)[idx]->op
2073 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2075 (*ops)[idx]->rank = 0;
2077 if (dump_file && (dump_flags & TDF_DETAILS))
2079 fprintf (dump_file, "Generating addition -> ");
2080 print_gimple_stmt (dump_file, sum, 0);
2082 i++;
2084 while ((i < valid_vecs.length () - 1)
2085 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2087 /* Referring to first valid VECTOR with this mode, generate the
2088 BIT_FIELD_REF statements accordingly. */
2089 info_ptr = *(v_info_map.get (tvec));
2090 gcc_assert (sum);
2091 tree elem_type = TREE_TYPE (vec_type);
2092 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2094 idx = elem->second;
2095 tree dst = make_ssa_name (elem_type);
2096 tree pos = bitsize_int (elem->first
2097 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2098 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2099 TYPE_SIZE (elem_type), pos);
2100 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2101 insert_stmt_after (gs, sum);
2102 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2103 /* Set this then op definition will get DCEd later. */
2104 gimple_set_visited (def, true);
2105 (*ops)[idx]->op = gimple_assign_lhs (gs);
2106 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2107 if (dump_file && (dump_flags & TDF_DETAILS))
2109 fprintf (dump_file, "Generating bit_field_ref -> ");
2110 print_gimple_stmt (dump_file, gs, 0);
2115 if (dump_file && (dump_flags & TDF_DETAILS))
2116 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2118 cleanup_vinfo_map (v_info_map);
2120 return true;
2123 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2124 expression, examine the other OPS to see if any of them are comparisons
2125 of the same values, which we may be able to combine or eliminate.
2126 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2128 static bool
2129 eliminate_redundant_comparison (enum tree_code opcode,
2130 vec<operand_entry *> *ops,
2131 unsigned int currindex,
2132 operand_entry *curr)
2134 tree op1, op2;
2135 enum tree_code lcode, rcode;
2136 gimple *def1, *def2;
2137 int i;
2138 operand_entry *oe;
2140 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2141 return false;
2143 /* Check that CURR is a comparison. */
2144 if (TREE_CODE (curr->op) != SSA_NAME)
2145 return false;
2146 def1 = SSA_NAME_DEF_STMT (curr->op);
2147 if (!is_gimple_assign (def1))
2148 return false;
2149 lcode = gimple_assign_rhs_code (def1);
2150 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2151 return false;
2152 op1 = gimple_assign_rhs1 (def1);
2153 op2 = gimple_assign_rhs2 (def1);
2155 /* Now look for a similar comparison in the remaining OPS. */
2156 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2158 tree t;
2160 if (TREE_CODE (oe->op) != SSA_NAME)
2161 continue;
2162 def2 = SSA_NAME_DEF_STMT (oe->op);
2163 if (!is_gimple_assign (def2))
2164 continue;
2165 rcode = gimple_assign_rhs_code (def2);
2166 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2167 continue;
2169 /* If we got here, we have a match. See if we can combine the
2170 two comparisons. */
2171 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2172 if (opcode == BIT_IOR_EXPR)
2173 t = maybe_fold_or_comparisons (type,
2174 lcode, op1, op2,
2175 rcode, gimple_assign_rhs1 (def2),
2176 gimple_assign_rhs2 (def2));
2177 else
2178 t = maybe_fold_and_comparisons (type,
2179 lcode, op1, op2,
2180 rcode, gimple_assign_rhs1 (def2),
2181 gimple_assign_rhs2 (def2));
2182 if (!t)
2183 continue;
2185 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2186 always give us a boolean_type_node value back. If the original
2187 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2188 we need to convert. */
2189 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2190 t = fold_convert (TREE_TYPE (curr->op), t);
2192 if (TREE_CODE (t) != INTEGER_CST
2193 && !operand_equal_p (t, curr->op, 0))
2195 enum tree_code subcode;
2196 tree newop1, newop2;
2197 if (!COMPARISON_CLASS_P (t))
2198 continue;
2199 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2200 STRIP_USELESS_TYPE_CONVERSION (newop1);
2201 STRIP_USELESS_TYPE_CONVERSION (newop2);
2202 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2203 continue;
2206 if (dump_file && (dump_flags & TDF_DETAILS))
2208 fprintf (dump_file, "Equivalence: ");
2209 print_generic_expr (dump_file, curr->op);
2210 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2211 print_generic_expr (dump_file, oe->op);
2212 fprintf (dump_file, " -> ");
2213 print_generic_expr (dump_file, t);
2214 fprintf (dump_file, "\n");
2217 /* Now we can delete oe, as it has been subsumed by the new combined
2218 expression t. */
2219 ops->ordered_remove (i);
2220 reassociate_stats.ops_eliminated ++;
2222 /* If t is the same as curr->op, we're done. Otherwise we must
2223 replace curr->op with t. Special case is if we got a constant
2224 back, in which case we add it to the end instead of in place of
2225 the current entry. */
2226 if (TREE_CODE (t) == INTEGER_CST)
2228 ops->ordered_remove (currindex);
2229 add_to_ops_vec (ops, t);
2231 else if (!operand_equal_p (t, curr->op, 0))
2233 gimple *sum;
2234 enum tree_code subcode;
2235 tree newop1;
2236 tree newop2;
2237 gcc_assert (COMPARISON_CLASS_P (t));
2238 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2239 STRIP_USELESS_TYPE_CONVERSION (newop1);
2240 STRIP_USELESS_TYPE_CONVERSION (newop2);
2241 gcc_checking_assert (is_gimple_val (newop1)
2242 && is_gimple_val (newop2));
2243 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2244 curr->op = gimple_get_lhs (sum);
2246 return true;
2249 return false;
2253 /* Transform repeated addition of same values into multiply with
2254 constant. */
2255 static bool
2256 transform_add_to_multiply (vec<operand_entry *> *ops)
2258 operand_entry *oe;
2259 tree op = NULL_TREE;
2260 int j;
2261 int i, start = -1, end = 0, count = 0;
2262 auto_vec<std::pair <int, int> > indxs;
2263 bool changed = false;
2265 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2266 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2267 || !flag_unsafe_math_optimizations))
2268 return false;
2270 /* Look for repeated operands. */
2271 FOR_EACH_VEC_ELT (*ops, i, oe)
2273 if (start == -1)
2275 count = 1;
2276 op = oe->op;
2277 start = i;
2279 else if (operand_equal_p (oe->op, op, 0))
2281 count++;
2282 end = i;
2284 else
2286 if (count > 1)
2287 indxs.safe_push (std::make_pair (start, end));
2288 count = 1;
2289 op = oe->op;
2290 start = i;
2294 if (count > 1)
2295 indxs.safe_push (std::make_pair (start, end));
2297 for (j = indxs.length () - 1; j >= 0; --j)
2299 /* Convert repeated operand addition to multiplication. */
2300 start = indxs[j].first;
2301 end = indxs[j].second;
2302 op = (*ops)[start]->op;
2303 count = end - start + 1;
2304 for (i = end; i >= start; --i)
2305 ops->unordered_remove (i);
2306 tree tmp = make_ssa_name (TREE_TYPE (op));
2307 tree cst = build_int_cst (integer_type_node, count);
2308 gassign *mul_stmt
2309 = gimple_build_assign (tmp, MULT_EXPR,
2310 op, fold_convert (TREE_TYPE (op), cst));
2311 gimple_set_visited (mul_stmt, true);
2312 add_to_ops_vec (ops, tmp, mul_stmt);
2313 changed = true;
2316 return changed;
2320 /* Perform various identities and other optimizations on the list of
2321 operand entries, stored in OPS. The tree code for the binary
2322 operation between all the operands is OPCODE. */
2324 static void
2325 optimize_ops_list (enum tree_code opcode,
2326 vec<operand_entry *> *ops)
2328 unsigned int length = ops->length ();
2329 unsigned int i;
2330 operand_entry *oe;
2331 operand_entry *oelast = NULL;
2332 bool iterate = false;
2334 if (length == 1)
2335 return;
2337 oelast = ops->last ();
2339 /* If the last two are constants, pop the constants off, merge them
2340 and try the next two. */
2341 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2343 operand_entry *oelm1 = (*ops)[length - 2];
2345 if (oelm1->rank == 0
2346 && is_gimple_min_invariant (oelm1->op)
2347 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2348 TREE_TYPE (oelast->op)))
2350 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2351 oelm1->op, oelast->op);
2353 if (folded && is_gimple_min_invariant (folded))
2355 if (dump_file && (dump_flags & TDF_DETAILS))
2356 fprintf (dump_file, "Merging constants\n");
2358 ops->pop ();
2359 ops->pop ();
2361 add_to_ops_vec (ops, folded);
2362 reassociate_stats.constants_eliminated++;
2364 optimize_ops_list (opcode, ops);
2365 return;
2370 eliminate_using_constants (opcode, ops);
2371 oelast = NULL;
2373 for (i = 0; ops->iterate (i, &oe);)
2375 bool done = false;
2377 if (eliminate_not_pairs (opcode, ops, i, oe))
2378 return;
2379 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2380 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2381 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2383 if (done)
2384 return;
2385 iterate = true;
2386 oelast = NULL;
2387 continue;
2389 oelast = oe;
2390 i++;
2393 if (iterate)
2394 optimize_ops_list (opcode, ops);
2397 /* The following functions are subroutines to optimize_range_tests and allow
2398 it to try to change a logical combination of comparisons into a range
2399 test.
2401 For example, both
2402 X == 2 || X == 5 || X == 3 || X == 4
2404 X >= 2 && X <= 5
2405 are converted to
2406 (unsigned) (X - 2) <= 3
2408 For more information see comments above fold_test_range in fold-const.c,
2409 this implementation is for GIMPLE. */
2411 struct range_entry
2413 tree exp;
2414 tree low;
2415 tree high;
2416 bool in_p;
2417 bool strict_overflow_p;
2418 unsigned int idx, next;
2421 void dump_range_entry (FILE *file, struct range_entry *r);
2422 void debug_range_entry (struct range_entry *r);
2424 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2426 void
2427 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2429 if (!skip_exp)
2430 print_generic_expr (file, r->exp);
2431 fprintf (file, " %c[", r->in_p ? '+' : '-');
2432 print_generic_expr (file, r->low);
2433 fputs (", ", file);
2434 print_generic_expr (file, r->high);
2435 fputc (']', file);
2438 /* Dump the range entry R to STDERR. */
2440 DEBUG_FUNCTION void
2441 debug_range_entry (struct range_entry *r)
2443 dump_range_entry (stderr, r, false);
2444 fputc ('\n', stderr);
2447 /* This is similar to make_range in fold-const.c, but on top of
2448 GIMPLE instead of trees. If EXP is non-NULL, it should be
2449 an SSA_NAME and STMT argument is ignored, otherwise STMT
2450 argument should be a GIMPLE_COND. */
2452 static void
2453 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2455 int in_p;
2456 tree low, high;
2457 bool is_bool, strict_overflow_p;
2459 r->exp = NULL_TREE;
2460 r->in_p = false;
2461 r->strict_overflow_p = false;
2462 r->low = NULL_TREE;
2463 r->high = NULL_TREE;
2464 if (exp != NULL_TREE
2465 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2466 return;
2468 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2469 and see if we can refine the range. Some of the cases below may not
2470 happen, but it doesn't seem worth worrying about this. We "continue"
2471 the outer loop when we've changed something; otherwise we "break"
2472 the switch, which will "break" the while. */
2473 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2474 high = low;
2475 in_p = 0;
2476 strict_overflow_p = false;
2477 is_bool = false;
2478 if (exp == NULL_TREE)
2479 is_bool = true;
2480 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2482 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2483 is_bool = true;
2484 else
2485 return;
2487 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2488 is_bool = true;
2490 while (1)
2492 enum tree_code code;
2493 tree arg0, arg1, exp_type;
2494 tree nexp;
2495 location_t loc;
2497 if (exp != NULL_TREE)
2499 if (TREE_CODE (exp) != SSA_NAME
2500 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2501 break;
2503 stmt = SSA_NAME_DEF_STMT (exp);
2504 if (!is_gimple_assign (stmt))
2505 break;
2507 code = gimple_assign_rhs_code (stmt);
2508 arg0 = gimple_assign_rhs1 (stmt);
2509 arg1 = gimple_assign_rhs2 (stmt);
2510 exp_type = TREE_TYPE (exp);
2512 else
2514 code = gimple_cond_code (stmt);
2515 arg0 = gimple_cond_lhs (stmt);
2516 arg1 = gimple_cond_rhs (stmt);
2517 exp_type = boolean_type_node;
2520 if (TREE_CODE (arg0) != SSA_NAME
2521 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2522 break;
2523 loc = gimple_location (stmt);
2524 switch (code)
2526 case BIT_NOT_EXPR:
2527 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2528 /* Ensure the range is either +[-,0], +[0,0],
2529 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2530 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2531 or similar expression of unconditional true or
2532 false, it should not be negated. */
2533 && ((high && integer_zerop (high))
2534 || (low && integer_onep (low))))
2536 in_p = !in_p;
2537 exp = arg0;
2538 continue;
2540 break;
2541 case SSA_NAME:
2542 exp = arg0;
2543 continue;
2544 CASE_CONVERT:
2545 if (is_bool)
2547 if ((TYPE_PRECISION (exp_type) == 1
2548 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2549 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2550 return;
2552 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2554 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2555 is_bool = true;
2556 else
2557 return;
2559 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2560 is_bool = true;
2561 goto do_default;
2562 case EQ_EXPR:
2563 case NE_EXPR:
2564 case LT_EXPR:
2565 case LE_EXPR:
2566 case GE_EXPR:
2567 case GT_EXPR:
2568 is_bool = true;
2569 /* FALLTHRU */
2570 default:
2571 if (!is_bool)
2572 return;
2573 do_default:
2574 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2575 &low, &high, &in_p,
2576 &strict_overflow_p);
2577 if (nexp != NULL_TREE)
2579 exp = nexp;
2580 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2581 continue;
2583 break;
2585 break;
2587 if (is_bool)
2589 r->exp = exp;
2590 r->in_p = in_p;
2591 r->low = low;
2592 r->high = high;
2593 r->strict_overflow_p = strict_overflow_p;
2597 /* Comparison function for qsort. Sort entries
2598 without SSA_NAME exp first, then with SSA_NAMEs sorted
2599 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2600 by increasing ->low and if ->low is the same, by increasing
2601 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2602 maximum. */
2604 static int
2605 range_entry_cmp (const void *a, const void *b)
2607 const struct range_entry *p = (const struct range_entry *) a;
2608 const struct range_entry *q = (const struct range_entry *) b;
2610 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2612 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2614 /* Group range_entries for the same SSA_NAME together. */
2615 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2616 return -1;
2617 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2618 return 1;
2619 /* If ->low is different, NULL low goes first, then by
2620 ascending low. */
2621 if (p->low != NULL_TREE)
2623 if (q->low != NULL_TREE)
2625 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2626 p->low, q->low);
2627 if (tem && integer_onep (tem))
2628 return -1;
2629 tem = fold_binary (GT_EXPR, boolean_type_node,
2630 p->low, q->low);
2631 if (tem && integer_onep (tem))
2632 return 1;
2634 else
2635 return 1;
2637 else if (q->low != NULL_TREE)
2638 return -1;
2639 /* If ->high is different, NULL high goes last, before that by
2640 ascending high. */
2641 if (p->high != NULL_TREE)
2643 if (q->high != NULL_TREE)
2645 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2646 p->high, q->high);
2647 if (tem && integer_onep (tem))
2648 return -1;
2649 tem = fold_binary (GT_EXPR, boolean_type_node,
2650 p->high, q->high);
2651 if (tem && integer_onep (tem))
2652 return 1;
2654 else
2655 return -1;
2657 else if (q->high != NULL_TREE)
2658 return 1;
2659 /* If both ranges are the same, sort below by ascending idx. */
2661 else
2662 return 1;
2664 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2665 return -1;
2667 if (p->idx < q->idx)
2668 return -1;
2669 else
2671 gcc_checking_assert (p->idx > q->idx);
2672 return 1;
2676 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2677 insert needed statements BEFORE or after GSI. */
2679 static tree
2680 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2682 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2683 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2684 if (TREE_CODE (ret) != SSA_NAME)
2686 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2687 if (before)
2688 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2689 else
2690 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2691 ret = gimple_assign_lhs (g);
2693 return ret;
2696 /* Helper routine of optimize_range_test.
2697 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2698 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2699 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2700 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2701 an array of COUNT pointers to other ranges. Return
2702 true if the range merge has been successful.
2703 If OPCODE is ERROR_MARK, this is called from within
2704 maybe_optimize_range_tests and is performing inter-bb range optimization.
2705 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2706 oe->rank. */
2708 static bool
2709 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2710 struct range_entry **otherrangep,
2711 unsigned int count, enum tree_code opcode,
2712 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2713 bool in_p, tree low, tree high, bool strict_overflow_p)
2715 operand_entry *oe = (*ops)[range->idx];
2716 tree op = oe->op;
2717 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2718 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2719 location_t loc = gimple_location (stmt);
2720 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2721 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2722 in_p, low, high);
2723 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2724 gimple_stmt_iterator gsi;
2725 unsigned int i, uid;
2727 if (tem == NULL_TREE)
2728 return false;
2730 /* If op is default def SSA_NAME, there is no place to insert the
2731 new comparison. Give up, unless we can use OP itself as the
2732 range test. */
2733 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2735 if (op == range->exp
2736 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2737 || TREE_CODE (optype) == BOOLEAN_TYPE)
2738 && (op == tem
2739 || (TREE_CODE (tem) == EQ_EXPR
2740 && TREE_OPERAND (tem, 0) == op
2741 && integer_onep (TREE_OPERAND (tem, 1))))
2742 && opcode != BIT_IOR_EXPR
2743 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2745 stmt = NULL;
2746 tem = op;
2748 else
2749 return false;
2752 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2753 warning_at (loc, OPT_Wstrict_overflow,
2754 "assuming signed overflow does not occur "
2755 "when simplifying range test");
2757 if (dump_file && (dump_flags & TDF_DETAILS))
2759 struct range_entry *r;
2760 fprintf (dump_file, "Optimizing range tests ");
2761 dump_range_entry (dump_file, range, false);
2762 for (i = 0; i < count; i++)
2764 if (otherrange)
2765 r = otherrange + i;
2766 else
2767 r = otherrangep[i];
2768 if (r->exp
2769 && r->exp != range->exp
2770 && TREE_CODE (r->exp) == SSA_NAME)
2772 fprintf (dump_file, " and ");
2773 dump_range_entry (dump_file, r, false);
2775 else
2777 fprintf (dump_file, " and");
2778 dump_range_entry (dump_file, r, true);
2781 fprintf (dump_file, "\n into ");
2782 print_generic_expr (dump_file, tem);
2783 fprintf (dump_file, "\n");
2786 if (opcode == BIT_IOR_EXPR
2787 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2788 tem = invert_truthvalue_loc (loc, tem);
2790 tem = fold_convert_loc (loc, optype, tem);
2791 if (stmt)
2793 gsi = gsi_for_stmt (stmt);
2794 uid = gimple_uid (stmt);
2796 else
2798 gsi = gsi_none ();
2799 uid = 0;
2801 if (stmt == NULL)
2802 gcc_checking_assert (tem == op);
2803 /* In rare cases range->exp can be equal to lhs of stmt.
2804 In that case we have to insert after the stmt rather then before
2805 it. If stmt is a PHI, insert it at the start of the basic block. */
2806 else if (op != range->exp)
2808 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2809 tem = force_into_ssa_name (&gsi, tem, true);
2810 gsi_prev (&gsi);
2812 else if (gimple_code (stmt) != GIMPLE_PHI)
2814 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2815 tem = force_into_ssa_name (&gsi, tem, false);
2817 else
2819 gsi = gsi_after_labels (gimple_bb (stmt));
2820 if (!gsi_end_p (gsi))
2821 uid = gimple_uid (gsi_stmt (gsi));
2822 else
2824 gsi = gsi_start_bb (gimple_bb (stmt));
2825 uid = 1;
2826 while (!gsi_end_p (gsi))
2828 uid = gimple_uid (gsi_stmt (gsi));
2829 gsi_next (&gsi);
2832 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2833 tem = force_into_ssa_name (&gsi, tem, true);
2834 if (gsi_end_p (gsi))
2835 gsi = gsi_last_bb (gimple_bb (stmt));
2836 else
2837 gsi_prev (&gsi);
2839 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2840 if (gimple_uid (gsi_stmt (gsi)))
2841 break;
2842 else
2843 gimple_set_uid (gsi_stmt (gsi), uid);
2845 oe->op = tem;
2846 range->exp = exp;
2847 range->low = low;
2848 range->high = high;
2849 range->in_p = in_p;
2850 range->strict_overflow_p = false;
2852 for (i = 0; i < count; i++)
2854 if (otherrange)
2855 range = otherrange + i;
2856 else
2857 range = otherrangep[i];
2858 oe = (*ops)[range->idx];
2859 /* Now change all the other range test immediate uses, so that
2860 those tests will be optimized away. */
2861 if (opcode == ERROR_MARK)
2863 if (oe->op)
2864 oe->op = build_int_cst (TREE_TYPE (oe->op),
2865 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2866 else
2867 oe->op = (oe->rank == BIT_IOR_EXPR
2868 ? boolean_false_node : boolean_true_node);
2870 else
2871 oe->op = error_mark_node;
2872 range->exp = NULL_TREE;
2873 range->low = NULL_TREE;
2874 range->high = NULL_TREE;
2876 return true;
2879 /* Optimize X == CST1 || X == CST2
2880 if popcount (CST1 ^ CST2) == 1 into
2881 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2882 Similarly for ranges. E.g.
2883 X != 2 && X != 3 && X != 10 && X != 11
2884 will be transformed by the previous optimization into
2885 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2886 and this loop can transform that into
2887 !(((X & ~8) - 2U) <= 1U). */
2889 static bool
2890 optimize_range_tests_xor (enum tree_code opcode, tree type,
2891 tree lowi, tree lowj, tree highi, tree highj,
2892 vec<operand_entry *> *ops,
2893 struct range_entry *rangei,
2894 struct range_entry *rangej)
2896 tree lowxor, highxor, tem, exp;
2897 /* Check lowi ^ lowj == highi ^ highj and
2898 popcount (lowi ^ lowj) == 1. */
2899 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2900 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2901 return false;
2902 if (!integer_pow2p (lowxor))
2903 return false;
2904 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2905 if (!tree_int_cst_equal (lowxor, highxor))
2906 return false;
2908 exp = rangei->exp;
2909 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2910 int prec = GET_MODE_PRECISION (mode);
2911 if (TYPE_PRECISION (type) < prec
2912 || (wi::to_wide (TYPE_MIN_VALUE (type))
2913 != wi::min_value (prec, TYPE_SIGN (type)))
2914 || (wi::to_wide (TYPE_MAX_VALUE (type))
2915 != wi::max_value (prec, TYPE_SIGN (type))))
2917 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2918 exp = fold_convert (type, exp);
2919 lowxor = fold_convert (type, lowxor);
2920 lowi = fold_convert (type, lowi);
2921 highi = fold_convert (type, highi);
2923 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2924 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2925 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2926 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2927 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2928 NULL, rangei->in_p, lowj, highj,
2929 rangei->strict_overflow_p
2930 || rangej->strict_overflow_p))
2931 return true;
2932 return false;
2935 /* Optimize X == CST1 || X == CST2
2936 if popcount (CST2 - CST1) == 1 into
2937 ((X - CST1) & ~(CST2 - CST1)) == 0.
2938 Similarly for ranges. E.g.
2939 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2940 || X == 75 || X == 45
2941 will be transformed by the previous optimization into
2942 (X - 43U) <= 3U || (X - 75U) <= 3U
2943 and this loop can transform that into
2944 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2945 static bool
2946 optimize_range_tests_diff (enum tree_code opcode, tree type,
2947 tree lowi, tree lowj, tree highi, tree highj,
2948 vec<operand_entry *> *ops,
2949 struct range_entry *rangei,
2950 struct range_entry *rangej)
2952 tree tem1, tem2, mask;
2953 /* Check highi - lowi == highj - lowj. */
2954 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2955 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2956 return false;
2957 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2958 if (!tree_int_cst_equal (tem1, tem2))
2959 return false;
2960 /* Check popcount (lowj - lowi) == 1. */
2961 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2962 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2963 return false;
2964 if (!integer_pow2p (tem1))
2965 return false;
2967 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2968 int prec = GET_MODE_PRECISION (mode);
2969 if (TYPE_PRECISION (type) < prec
2970 || (wi::to_wide (TYPE_MIN_VALUE (type))
2971 != wi::min_value (prec, TYPE_SIGN (type)))
2972 || (wi::to_wide (TYPE_MAX_VALUE (type))
2973 != wi::max_value (prec, TYPE_SIGN (type))))
2974 type = build_nonstandard_integer_type (prec, 1);
2975 else
2976 type = unsigned_type_for (type);
2977 tem1 = fold_convert (type, tem1);
2978 tem2 = fold_convert (type, tem2);
2979 lowi = fold_convert (type, lowi);
2980 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2981 tem1 = fold_build2 (MINUS_EXPR, type,
2982 fold_convert (type, rangei->exp), lowi);
2983 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2984 lowj = build_int_cst (type, 0);
2985 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2986 NULL, rangei->in_p, lowj, tem2,
2987 rangei->strict_overflow_p
2988 || rangej->strict_overflow_p))
2989 return true;
2990 return false;
2993 /* It does some common checks for function optimize_range_tests_xor and
2994 optimize_range_tests_diff.
2995 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2996 Else it calls optimize_range_tests_diff. */
2998 static bool
2999 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3000 bool optimize_xor, vec<operand_entry *> *ops,
3001 struct range_entry *ranges)
3003 int i, j;
3004 bool any_changes = false;
3005 for (i = first; i < length; i++)
3007 tree lowi, highi, lowj, highj, type, tem;
3009 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3010 continue;
3011 type = TREE_TYPE (ranges[i].exp);
3012 if (!INTEGRAL_TYPE_P (type))
3013 continue;
3014 lowi = ranges[i].low;
3015 if (lowi == NULL_TREE)
3016 lowi = TYPE_MIN_VALUE (type);
3017 highi = ranges[i].high;
3018 if (highi == NULL_TREE)
3019 continue;
3020 for (j = i + 1; j < length && j < i + 64; j++)
3022 bool changes;
3023 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3024 continue;
3025 lowj = ranges[j].low;
3026 if (lowj == NULL_TREE)
3027 continue;
3028 highj = ranges[j].high;
3029 if (highj == NULL_TREE)
3030 highj = TYPE_MAX_VALUE (type);
3031 /* Check lowj > highi. */
3032 tem = fold_binary (GT_EXPR, boolean_type_node,
3033 lowj, highi);
3034 if (tem == NULL_TREE || !integer_onep (tem))
3035 continue;
3036 if (optimize_xor)
3037 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3038 highi, highj, ops,
3039 ranges + i, ranges + j);
3040 else
3041 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3042 highi, highj, ops,
3043 ranges + i, ranges + j);
3044 if (changes)
3046 any_changes = true;
3047 break;
3051 return any_changes;
3054 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3055 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3056 EXP on success, NULL otherwise. */
3058 static tree
3059 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3060 wide_int *mask, tree *totallowp)
3062 tree tem = int_const_binop (MINUS_EXPR, high, low);
3063 if (tem == NULL_TREE
3064 || TREE_CODE (tem) != INTEGER_CST
3065 || TREE_OVERFLOW (tem)
3066 || tree_int_cst_sgn (tem) == -1
3067 || compare_tree_int (tem, prec) != -1)
3068 return NULL_TREE;
3070 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3071 *mask = wi::shifted_mask (0, max, false, prec);
3072 if (TREE_CODE (exp) == BIT_AND_EXPR
3073 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3075 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3076 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3077 if (wi::popcount (msk) == 1
3078 && wi::ltu_p (msk, prec - max))
3080 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3081 max += msk.to_uhwi ();
3082 exp = TREE_OPERAND (exp, 0);
3083 if (integer_zerop (low)
3084 && TREE_CODE (exp) == PLUS_EXPR
3085 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3087 tree ret = TREE_OPERAND (exp, 0);
3088 STRIP_NOPS (ret);
3089 widest_int bias
3090 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3091 TYPE_PRECISION (TREE_TYPE (low))));
3092 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3093 if (totallowp)
3095 *totallowp = tbias;
3096 return ret;
3098 else if (!tree_int_cst_lt (totallow, tbias))
3099 return NULL_TREE;
3100 bias = wi::to_widest (tbias);
3101 bias -= wi::to_widest (totallow);
3102 if (bias >= 0 && bias < prec - max)
3104 *mask = wi::lshift (*mask, bias);
3105 return ret;
3110 if (totallowp)
3111 return exp;
3112 if (!tree_int_cst_lt (totallow, low))
3113 return exp;
3114 tem = int_const_binop (MINUS_EXPR, low, totallow);
3115 if (tem == NULL_TREE
3116 || TREE_CODE (tem) != INTEGER_CST
3117 || TREE_OVERFLOW (tem)
3118 || compare_tree_int (tem, prec - max) == 1)
3119 return NULL_TREE;
3121 *mask = wi::lshift (*mask, wi::to_widest (tem));
3122 return exp;
3125 /* Attempt to optimize small range tests using bit test.
3126 E.g.
3127 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3128 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3129 has been by earlier optimizations optimized into:
3130 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3131 As all the 43 through 82 range is less than 64 numbers,
3132 for 64-bit word targets optimize that into:
3133 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3135 static bool
3136 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3137 vec<operand_entry *> *ops,
3138 struct range_entry *ranges)
3140 int i, j;
3141 bool any_changes = false;
3142 int prec = GET_MODE_BITSIZE (word_mode);
3143 auto_vec<struct range_entry *, 64> candidates;
3145 for (i = first; i < length - 1; i++)
3147 tree lowi, highi, lowj, highj, type;
3149 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3150 continue;
3151 type = TREE_TYPE (ranges[i].exp);
3152 if (!INTEGRAL_TYPE_P (type))
3153 continue;
3154 lowi = ranges[i].low;
3155 if (lowi == NULL_TREE)
3156 lowi = TYPE_MIN_VALUE (type);
3157 highi = ranges[i].high;
3158 if (highi == NULL_TREE)
3159 continue;
3160 wide_int mask;
3161 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3162 highi, &mask, &lowi);
3163 if (exp == NULL_TREE)
3164 continue;
3165 bool strict_overflow_p = ranges[i].strict_overflow_p;
3166 candidates.truncate (0);
3167 int end = MIN (i + 64, length);
3168 for (j = i + 1; j < end; j++)
3170 tree exp2;
3171 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3172 continue;
3173 if (ranges[j].exp == exp)
3175 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3177 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3178 if (exp2 == exp)
3180 else if (TREE_CODE (exp2) == PLUS_EXPR)
3182 exp2 = TREE_OPERAND (exp2, 0);
3183 STRIP_NOPS (exp2);
3184 if (exp2 != exp)
3185 continue;
3187 else
3188 continue;
3190 else
3191 continue;
3192 lowj = ranges[j].low;
3193 if (lowj == NULL_TREE)
3194 continue;
3195 highj = ranges[j].high;
3196 if (highj == NULL_TREE)
3197 highj = TYPE_MAX_VALUE (type);
3198 wide_int mask2;
3199 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3200 highj, &mask2, NULL);
3201 if (exp2 != exp)
3202 continue;
3203 mask |= mask2;
3204 strict_overflow_p |= ranges[j].strict_overflow_p;
3205 candidates.safe_push (&ranges[j]);
3208 /* If every possible relative value of the expression is a valid shift
3209 amount, then we can merge the entry test in the bit test. In this
3210 case, if we would need otherwise 2 or more comparisons, then use
3211 the bit test; in the other cases, the threshold is 3 comparisons. */
3212 wide_int min, max;
3213 bool entry_test_needed;
3214 if (TREE_CODE (exp) == SSA_NAME
3215 && get_range_info (exp, &min, &max) == VR_RANGE
3216 && wi::leu_p (max - min, prec - 1))
3218 wide_int ilowi = wi::to_wide (lowi);
3219 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3221 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3222 mask = wi::lshift (mask, ilowi - min);
3224 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3226 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3227 mask = wi::lrshift (mask, min - ilowi);
3229 entry_test_needed = false;
3231 else
3232 entry_test_needed = true;
3233 if (candidates.length () >= (entry_test_needed ? 2 : 1))
3235 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3236 wi::to_widest (lowi)
3237 + prec - 1 - wi::clz (mask));
3238 operand_entry *oe = (*ops)[ranges[i].idx];
3239 tree op = oe->op;
3240 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3241 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3242 location_t loc = gimple_location (stmt);
3243 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3245 /* See if it isn't cheaper to pretend the minimum value of the
3246 range is 0, if maximum value is small enough.
3247 We can avoid then subtraction of the minimum value, but the
3248 mask constant could be perhaps more expensive. */
3249 if (compare_tree_int (lowi, 0) > 0
3250 && compare_tree_int (high, prec) < 0)
3252 int cost_diff;
3253 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3254 rtx reg = gen_raw_REG (word_mode, 10000);
3255 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3256 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3257 GEN_INT (-m)),
3258 word_mode, speed_p);
3259 rtx r = immed_wide_int_const (mask, word_mode);
3260 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3261 word_mode, speed_p);
3262 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3263 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3264 word_mode, speed_p);
3265 if (cost_diff > 0)
3267 mask = wi::lshift (mask, m);
3268 lowi = build_zero_cst (TREE_TYPE (lowi));
3272 tree tem;
3273 if (entry_test_needed)
3275 tem = build_range_check (loc, optype, unshare_expr (exp),
3276 false, lowi, high);
3277 if (tem == NULL_TREE || is_gimple_val (tem))
3278 continue;
3280 else
3281 tem = NULL_TREE;
3282 tree etype = unsigned_type_for (TREE_TYPE (exp));
3283 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3284 fold_convert_loc (loc, etype, exp),
3285 fold_convert_loc (loc, etype, lowi));
3286 exp = fold_convert_loc (loc, integer_type_node, exp);
3287 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3288 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3289 build_int_cst (word_type, 1), exp);
3290 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3291 wide_int_to_tree (word_type, mask));
3292 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3293 build_zero_cst (word_type));
3294 if (is_gimple_val (exp))
3295 continue;
3297 /* The shift might have undefined behavior if TEM is true,
3298 but reassociate_bb isn't prepared to have basic blocks
3299 split when it is running. So, temporarily emit a code
3300 with BIT_IOR_EXPR instead of &&, and fix it up in
3301 branch_fixup. */
3302 gimple_seq seq = NULL;
3303 if (tem)
3305 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3306 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3307 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3309 gimple_seq seq2;
3310 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3311 gimple_seq_add_seq_without_update (&seq, seq2);
3312 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3313 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3314 if (tem)
3316 gimple *g = gimple_build_assign (make_ssa_name (optype),
3317 BIT_IOR_EXPR, tem, exp);
3318 gimple_set_location (g, loc);
3319 gimple_seq_add_stmt_without_update (&seq, g);
3320 exp = gimple_assign_lhs (g);
3322 tree val = build_zero_cst (optype);
3323 if (update_range_test (&ranges[i], NULL, candidates.address (),
3324 candidates.length (), opcode, ops, exp,
3325 seq, false, val, val, strict_overflow_p))
3327 any_changes = true;
3328 if (tem)
3329 reassoc_branch_fixups.safe_push (tem);
3331 else
3332 gimple_seq_discard (seq);
3335 return any_changes;
3338 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3339 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
3341 static bool
3342 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3343 vec<operand_entry *> *ops,
3344 struct range_entry *ranges)
3346 int i;
3347 unsigned int b;
3348 bool any_changes = false;
3349 auto_vec<int, 128> buckets;
3350 auto_vec<int, 32> chains;
3351 auto_vec<struct range_entry *, 32> candidates;
3353 for (i = first; i < length; i++)
3355 if (ranges[i].exp == NULL_TREE
3356 || TREE_CODE (ranges[i].exp) != SSA_NAME
3357 || !ranges[i].in_p
3358 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3359 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
3360 || ranges[i].low == NULL_TREE
3361 || ranges[i].low != ranges[i].high)
3362 continue;
3364 bool zero_p = integer_zerop (ranges[i].low);
3365 if (!zero_p && !integer_all_onesp (ranges[i].low))
3366 continue;
3368 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
3369 if (buckets.length () <= b)
3370 buckets.safe_grow_cleared (b + 1, true);
3371 if (chains.length () <= (unsigned) i)
3372 chains.safe_grow (i + 1, true);
3373 chains[i] = buckets[b];
3374 buckets[b] = i + 1;
3377 FOR_EACH_VEC_ELT (buckets, b, i)
3378 if (i && chains[i - 1])
3380 int j, k = i;
3381 for (j = chains[i - 1]; j; j = chains[j - 1])
3383 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3384 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3385 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3386 k = j;
3388 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3389 tree type2 = NULL_TREE;
3390 bool strict_overflow_p = false;
3391 candidates.truncate (0);
3392 for (j = i; j; j = chains[j - 1])
3394 tree type = TREE_TYPE (ranges[j - 1].exp);
3395 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3396 if (j == k
3397 || useless_type_conversion_p (type1, type))
3399 else if (type2 == NULL_TREE
3400 || useless_type_conversion_p (type2, type))
3402 if (type2 == NULL_TREE)
3403 type2 = type;
3404 candidates.safe_push (&ranges[j - 1]);
3407 unsigned l = candidates.length ();
3408 for (j = i; j; j = chains[j - 1])
3410 tree type = TREE_TYPE (ranges[j - 1].exp);
3411 if (j == k)
3412 continue;
3413 if (useless_type_conversion_p (type1, type))
3415 else if (type2 == NULL_TREE
3416 || useless_type_conversion_p (type2, type))
3417 continue;
3418 candidates.safe_push (&ranges[j - 1]);
3420 gimple_seq seq = NULL;
3421 tree op = NULL_TREE;
3422 unsigned int id;
3423 struct range_entry *r;
3424 candidates.safe_push (&ranges[k - 1]);
3425 FOR_EACH_VEC_ELT (candidates, id, r)
3427 gimple *g;
3428 if (id == 0)
3430 op = r->exp;
3431 continue;
3433 if (id == l)
3435 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3436 gimple_seq_add_stmt_without_update (&seq, g);
3437 op = gimple_assign_lhs (g);
3439 tree type = TREE_TYPE (r->exp);
3440 tree exp = r->exp;
3441 if (id >= l && !useless_type_conversion_p (type1, type))
3443 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3444 gimple_seq_add_stmt_without_update (&seq, g);
3445 exp = gimple_assign_lhs (g);
3447 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3448 (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3449 op, exp);
3450 gimple_seq_add_stmt_without_update (&seq, g);
3451 op = gimple_assign_lhs (g);
3453 candidates.pop ();
3454 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3455 candidates.length (), opcode, ops, op,
3456 seq, true, ranges[k - 1].low,
3457 ranges[k - 1].low, strict_overflow_p))
3458 any_changes = true;
3459 else
3460 gimple_seq_discard (seq);
3463 return any_changes;
3466 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3467 a >= 0 && a < b into (unsigned) a < (unsigned) b
3468 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3470 static bool
3471 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3472 vec<operand_entry *> *ops,
3473 struct range_entry *ranges,
3474 basic_block first_bb)
3476 int i;
3477 bool any_changes = false;
3478 hash_map<tree, int> *map = NULL;
3480 for (i = first; i < length; i++)
3482 if (ranges[i].exp == NULL_TREE
3483 || TREE_CODE (ranges[i].exp) != SSA_NAME
3484 || !ranges[i].in_p)
3485 continue;
3487 tree type = TREE_TYPE (ranges[i].exp);
3488 if (!INTEGRAL_TYPE_P (type)
3489 || TYPE_UNSIGNED (type)
3490 || ranges[i].low == NULL_TREE
3491 || !integer_zerop (ranges[i].low)
3492 || ranges[i].high != NULL_TREE)
3493 continue;
3494 /* EXP >= 0 here. */
3495 if (map == NULL)
3496 map = new hash_map <tree, int>;
3497 map->put (ranges[i].exp, i);
3500 if (map == NULL)
3501 return false;
3503 for (i = 0; i < length; i++)
3505 bool in_p = ranges[i].in_p;
3506 if (ranges[i].low == NULL_TREE
3507 || ranges[i].high == NULL_TREE)
3508 continue;
3509 if (!integer_zerop (ranges[i].low)
3510 || !integer_zerop (ranges[i].high))
3512 if (ranges[i].exp
3513 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3514 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3515 && integer_onep (ranges[i].low)
3516 && integer_onep (ranges[i].high))
3517 in_p = !in_p;
3518 else
3519 continue;
3522 gimple *stmt;
3523 tree_code ccode;
3524 tree rhs1, rhs2;
3525 if (ranges[i].exp)
3527 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3528 continue;
3529 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3530 if (!is_gimple_assign (stmt))
3531 continue;
3532 ccode = gimple_assign_rhs_code (stmt);
3533 rhs1 = gimple_assign_rhs1 (stmt);
3534 rhs2 = gimple_assign_rhs2 (stmt);
3536 else
3538 operand_entry *oe = (*ops)[ranges[i].idx];
3539 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3540 if (gimple_code (stmt) != GIMPLE_COND)
3541 continue;
3542 ccode = gimple_cond_code (stmt);
3543 rhs1 = gimple_cond_lhs (stmt);
3544 rhs2 = gimple_cond_rhs (stmt);
3547 if (TREE_CODE (rhs1) != SSA_NAME
3548 || rhs2 == NULL_TREE
3549 || TREE_CODE (rhs2) != SSA_NAME)
3550 continue;
3552 switch (ccode)
3554 case GT_EXPR:
3555 case GE_EXPR:
3556 case LT_EXPR:
3557 case LE_EXPR:
3558 break;
3559 default:
3560 continue;
3562 if (in_p)
3563 ccode = invert_tree_comparison (ccode, false);
3564 switch (ccode)
3566 case GT_EXPR:
3567 case GE_EXPR:
3568 std::swap (rhs1, rhs2);
3569 ccode = swap_tree_comparison (ccode);
3570 break;
3571 case LT_EXPR:
3572 case LE_EXPR:
3573 break;
3574 default:
3575 gcc_unreachable ();
3578 int *idx = map->get (rhs1);
3579 if (idx == NULL)
3580 continue;
3582 /* maybe_optimize_range_tests allows statements without side-effects
3583 in the basic blocks as long as they are consumed in the same bb.
3584 Make sure rhs2's def stmt is not among them, otherwise we can't
3585 use safely get_nonzero_bits on it. E.g. in:
3586 # RANGE [-83, 1] NONZERO 173
3587 # k_32 = PHI <k_47(13), k_12(9)>
3589 if (k_32 >= 0)
3590 goto <bb 5>; [26.46%]
3591 else
3592 goto <bb 9>; [73.54%]
3594 <bb 5> [local count: 140323371]:
3595 # RANGE [0, 1] NONZERO 1
3596 _5 = (int) k_32;
3597 # RANGE [0, 4] NONZERO 4
3598 _21 = _5 << 2;
3599 # RANGE [0, 4] NONZERO 4
3600 iftmp.0_44 = (char) _21;
3601 if (k_32 < iftmp.0_44)
3602 goto <bb 6>; [84.48%]
3603 else
3604 goto <bb 9>; [15.52%]
3605 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3606 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3607 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3608 those stmts even for negative k_32 and the value ranges would be no
3609 longer guaranteed and so the optimization would be invalid. */
3610 while (opcode == ERROR_MARK)
3612 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3613 basic_block bb2 = gimple_bb (g);
3614 if (bb2
3615 && bb2 != first_bb
3616 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3618 /* As an exception, handle a few common cases. */
3619 if (gimple_assign_cast_p (g)
3620 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3622 tree op0 = gimple_assign_rhs1 (g);
3623 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3624 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3625 > TYPE_PRECISION (TREE_TYPE (op0))))
3626 /* Zero-extension is always ok. */
3627 break;
3628 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3629 == TYPE_PRECISION (TREE_TYPE (op0))
3630 && TREE_CODE (op0) == SSA_NAME)
3632 /* Cast from signed to unsigned or vice versa. Retry
3633 with the op0 as new rhs2. */
3634 rhs2 = op0;
3635 continue;
3638 else if (is_gimple_assign (g)
3639 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3640 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3641 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3642 /* Masking with INTEGER_CST with MSB clear is always ok
3643 too. */
3644 break;
3645 rhs2 = NULL_TREE;
3647 break;
3649 if (rhs2 == NULL_TREE)
3650 continue;
3652 wide_int nz = get_nonzero_bits (rhs2);
3653 if (wi::neg_p (nz))
3654 continue;
3656 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3657 and RHS2 is known to be RHS2 >= 0. */
3658 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3660 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3661 if ((ranges[*idx].strict_overflow_p
3662 || ranges[i].strict_overflow_p)
3663 && issue_strict_overflow_warning (wc))
3664 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3665 "assuming signed overflow does not occur "
3666 "when simplifying range test");
3668 if (dump_file && (dump_flags & TDF_DETAILS))
3670 struct range_entry *r = &ranges[*idx];
3671 fprintf (dump_file, "Optimizing range test ");
3672 print_generic_expr (dump_file, r->exp);
3673 fprintf (dump_file, " +[");
3674 print_generic_expr (dump_file, r->low);
3675 fprintf (dump_file, ", ");
3676 print_generic_expr (dump_file, r->high);
3677 fprintf (dump_file, "] and comparison ");
3678 print_generic_expr (dump_file, rhs1);
3679 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3680 print_generic_expr (dump_file, rhs2);
3681 fprintf (dump_file, "\n into (");
3682 print_generic_expr (dump_file, utype);
3683 fprintf (dump_file, ") ");
3684 print_generic_expr (dump_file, rhs1);
3685 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3686 print_generic_expr (dump_file, utype);
3687 fprintf (dump_file, ") ");
3688 print_generic_expr (dump_file, rhs2);
3689 fprintf (dump_file, "\n");
3692 operand_entry *oe = (*ops)[ranges[i].idx];
3693 ranges[i].in_p = 0;
3694 if (opcode == BIT_IOR_EXPR
3695 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3697 ranges[i].in_p = 1;
3698 ccode = invert_tree_comparison (ccode, false);
3701 unsigned int uid = gimple_uid (stmt);
3702 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3703 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3704 gimple_set_uid (g, uid);
3705 rhs1 = gimple_assign_lhs (g);
3706 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3707 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3709 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3710 gimple_set_uid (g, uid);
3711 rhs2 = gimple_assign_lhs (g);
3712 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3714 if (tree_swap_operands_p (rhs1, rhs2))
3716 std::swap (rhs1, rhs2);
3717 ccode = swap_tree_comparison (ccode);
3719 if (gimple_code (stmt) == GIMPLE_COND)
3721 gcond *c = as_a <gcond *> (stmt);
3722 gimple_cond_set_code (c, ccode);
3723 gimple_cond_set_lhs (c, rhs1);
3724 gimple_cond_set_rhs (c, rhs2);
3725 update_stmt (stmt);
3727 else
3729 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3730 if (!INTEGRAL_TYPE_P (ctype)
3731 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3732 && TYPE_PRECISION (ctype) != 1))
3733 ctype = boolean_type_node;
3734 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3735 gimple_set_uid (g, uid);
3736 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3737 if (oe->op && ctype != TREE_TYPE (oe->op))
3739 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3740 NOP_EXPR, gimple_assign_lhs (g));
3741 gimple_set_uid (g, uid);
3742 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3744 ranges[i].exp = gimple_assign_lhs (g);
3745 oe->op = ranges[i].exp;
3746 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3747 ranges[i].high = ranges[i].low;
3749 ranges[i].strict_overflow_p = false;
3750 oe = (*ops)[ranges[*idx].idx];
3751 /* Now change all the other range test immediate uses, so that
3752 those tests will be optimized away. */
3753 if (opcode == ERROR_MARK)
3755 if (oe->op)
3756 oe->op = build_int_cst (TREE_TYPE (oe->op),
3757 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3758 else
3759 oe->op = (oe->rank == BIT_IOR_EXPR
3760 ? boolean_false_node : boolean_true_node);
3762 else
3763 oe->op = error_mark_node;
3764 ranges[*idx].exp = NULL_TREE;
3765 ranges[*idx].low = NULL_TREE;
3766 ranges[*idx].high = NULL_TREE;
3767 any_changes = true;
3770 delete map;
3771 return any_changes;
3774 /* Optimize range tests, similarly how fold_range_test optimizes
3775 it on trees. The tree code for the binary
3776 operation between all the operands is OPCODE.
3777 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3778 maybe_optimize_range_tests for inter-bb range optimization.
3779 In that case if oe->op is NULL, oe->id is bb->index whose
3780 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3781 the actual opcode.
3782 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3784 static bool
3785 optimize_range_tests (enum tree_code opcode,
3786 vec<operand_entry *> *ops, basic_block first_bb)
3788 unsigned int length = ops->length (), i, j, first;
3789 operand_entry *oe;
3790 struct range_entry *ranges;
3791 bool any_changes = false;
3793 if (length == 1)
3794 return false;
3796 ranges = XNEWVEC (struct range_entry, length);
3797 for (i = 0; i < length; i++)
3799 oe = (*ops)[i];
3800 ranges[i].idx = i;
3801 init_range_entry (ranges + i, oe->op,
3802 oe->op
3803 ? NULL
3804 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3805 /* For | invert it now, we will invert it again before emitting
3806 the optimized expression. */
3807 if (opcode == BIT_IOR_EXPR
3808 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3809 ranges[i].in_p = !ranges[i].in_p;
3812 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3813 for (i = 0; i < length; i++)
3814 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3815 break;
3817 /* Try to merge ranges. */
3818 for (first = i; i < length; i++)
3820 tree low = ranges[i].low;
3821 tree high = ranges[i].high;
3822 int in_p = ranges[i].in_p;
3823 bool strict_overflow_p = ranges[i].strict_overflow_p;
3824 int update_fail_count = 0;
3826 for (j = i + 1; j < length; j++)
3828 if (ranges[i].exp != ranges[j].exp)
3829 break;
3830 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3831 ranges[j].in_p, ranges[j].low, ranges[j].high))
3832 break;
3833 strict_overflow_p |= ranges[j].strict_overflow_p;
3836 if (j == i + 1)
3837 continue;
3839 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3840 opcode, ops, ranges[i].exp, NULL, in_p,
3841 low, high, strict_overflow_p))
3843 i = j - 1;
3844 any_changes = true;
3846 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3847 while update_range_test would fail. */
3848 else if (update_fail_count == 64)
3849 i = j - 1;
3850 else
3851 ++update_fail_count;
3854 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3855 ops, ranges);
3857 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3858 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3859 ops, ranges);
3860 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3861 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3862 ops, ranges);
3863 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3864 ops, ranges);
3865 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3866 ranges, first_bb);
3868 if (any_changes && opcode != ERROR_MARK)
3870 j = 0;
3871 FOR_EACH_VEC_ELT (*ops, i, oe)
3873 if (oe->op == error_mark_node)
3874 continue;
3875 else if (i != j)
3876 (*ops)[j] = oe;
3877 j++;
3879 ops->truncate (j);
3882 XDELETEVEC (ranges);
3883 return any_changes;
3886 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3887 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3888 otherwise the comparison code. TYPE is a return value that is set
3889 to type of comparison. */
3891 static tree_code
3892 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
3893 tree *lhs, tree *rhs, gassign **vcond)
3895 if (TREE_CODE (var) != SSA_NAME)
3896 return ERROR_MARK;
3898 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3899 if (stmt == NULL)
3900 return ERROR_MARK;
3901 if (vcond)
3902 *vcond = stmt;
3904 /* ??? If we start creating more COND_EXPR, we could perform
3905 this same optimization with them. For now, simplify. */
3906 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3907 return ERROR_MARK;
3909 tree cond = gimple_assign_rhs1 (stmt);
3910 tree_code cmp = TREE_CODE (cond);
3911 if (cmp != SSA_NAME)
3912 return ERROR_MARK;
3914 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
3915 if (assign == NULL
3916 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
3917 return ERROR_MARK;
3919 cmp = gimple_assign_rhs_code (assign);
3920 if (lhs)
3921 *lhs = gimple_assign_rhs1 (assign);
3922 if (rhs)
3923 *rhs = gimple_assign_rhs2 (assign);
3925 /* ??? For now, allow only canonical true and false result vectors.
3926 We could expand this to other constants should the need arise,
3927 but at the moment we don't create them. */
3928 tree t = gimple_assign_rhs2 (stmt);
3929 tree f = gimple_assign_rhs3 (stmt);
3930 bool inv;
3931 if (integer_all_onesp (t))
3932 inv = false;
3933 else if (integer_all_onesp (f))
3935 cmp = invert_tree_comparison (cmp, false);
3936 inv = true;
3938 else
3939 return ERROR_MARK;
3940 if (!integer_zerop (f))
3941 return ERROR_MARK;
3943 /* Success! */
3944 if (rets)
3945 *rets = assign;
3946 if (reti)
3947 *reti = inv;
3948 if (type)
3949 *type = TREE_TYPE (cond);
3950 return cmp;
3953 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3954 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3956 static bool
3957 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3959 unsigned int length = ops->length (), i, j;
3960 bool any_changes = false;
3962 if (length == 1)
3963 return false;
3965 for (i = 0; i < length; ++i)
3967 tree elt0 = (*ops)[i]->op;
3969 gassign *stmt0, *vcond0;
3970 bool invert;
3971 tree type, lhs0, rhs0;
3972 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
3973 &rhs0, &vcond0);
3974 if (cmp0 == ERROR_MARK)
3975 continue;
3977 for (j = i + 1; j < length; ++j)
3979 tree &elt1 = (*ops)[j]->op;
3981 gassign *stmt1, *vcond1;
3982 tree lhs1, rhs1;
3983 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
3984 &rhs1, &vcond1);
3985 if (cmp1 == ERROR_MARK)
3986 continue;
3988 tree comb;
3989 if (opcode == BIT_AND_EXPR)
3990 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
3991 cmp1, lhs1, rhs1);
3992 else if (opcode == BIT_IOR_EXPR)
3993 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
3994 cmp1, lhs1, rhs1);
3995 else
3996 gcc_unreachable ();
3997 if (comb == NULL)
3998 continue;
4000 /* Success! */
4001 if (dump_file && (dump_flags & TDF_DETAILS))
4003 fprintf (dump_file, "Transforming ");
4004 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4005 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4006 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4007 fprintf (dump_file, " into ");
4008 print_generic_expr (dump_file, comb);
4009 fputc ('\n', dump_file);
4012 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4013 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4014 true, GSI_SAME_STMT);
4015 if (invert)
4016 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4017 gimple_assign_rhs3_ptr (vcond0));
4018 gimple_assign_set_rhs1 (vcond0, exp);
4019 update_stmt (vcond0);
4021 elt1 = error_mark_node;
4022 any_changes = true;
4026 if (any_changes)
4028 operand_entry *oe;
4029 j = 0;
4030 FOR_EACH_VEC_ELT (*ops, i, oe)
4032 if (oe->op == error_mark_node)
4033 continue;
4034 else if (i != j)
4035 (*ops)[j] = oe;
4036 j++;
4038 ops->truncate (j);
4041 return any_changes;
4044 /* Return true if STMT is a cast like:
4045 <bb N>:
4047 _123 = (int) _234;
4049 <bb M>:
4050 # _345 = PHI <_123(N), 1(...), 1(...)>
4051 where _234 has bool type, _123 has single use and
4052 bb N has a single successor M. This is commonly used in
4053 the last block of a range test.
4055 Also Return true if STMT is tcc_compare like:
4056 <bb N>:
4058 _234 = a_2(D) == 2;
4060 <bb M>:
4061 # _345 = PHI <_234(N), 1(...), 1(...)>
4062 _346 = (int) _345;
4063 where _234 has booltype, single use and
4064 bb N has a single successor M. This is commonly used in
4065 the last block of a range test. */
4067 static bool
4068 final_range_test_p (gimple *stmt)
4070 basic_block bb, rhs_bb, lhs_bb;
4071 edge e;
4072 tree lhs, rhs;
4073 use_operand_p use_p;
4074 gimple *use_stmt;
4076 if (!gimple_assign_cast_p (stmt)
4077 && (!is_gimple_assign (stmt)
4078 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4079 != tcc_comparison)))
4080 return false;
4081 bb = gimple_bb (stmt);
4082 if (!single_succ_p (bb))
4083 return false;
4084 e = single_succ_edge (bb);
4085 if (e->flags & EDGE_COMPLEX)
4086 return false;
4088 lhs = gimple_assign_lhs (stmt);
4089 rhs = gimple_assign_rhs1 (stmt);
4090 if (gimple_assign_cast_p (stmt)
4091 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4092 || TREE_CODE (rhs) != SSA_NAME
4093 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4094 return false;
4096 if (!gimple_assign_cast_p (stmt)
4097 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4098 return false;
4100 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4101 if (!single_imm_use (lhs, &use_p, &use_stmt))
4102 return false;
4104 if (gimple_code (use_stmt) != GIMPLE_PHI
4105 || gimple_bb (use_stmt) != e->dest)
4106 return false;
4108 /* And that the rhs is defined in the same loop. */
4109 if (gimple_assign_cast_p (stmt))
4111 if (TREE_CODE (rhs) != SSA_NAME
4112 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4113 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4114 return false;
4116 else
4118 if (TREE_CODE (lhs) != SSA_NAME
4119 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4120 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4121 return false;
4124 return true;
4127 /* Return true if BB is suitable basic block for inter-bb range test
4128 optimization. If BACKWARD is true, BB should be the only predecessor
4129 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4130 or compared with to find a common basic block to which all conditions
4131 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4132 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4133 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4134 block points to an empty block that falls through into *OTHER_BB and
4135 the phi args match that path. */
4137 static bool
4138 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4139 bool *test_swapped_p, bool backward)
4141 edge_iterator ei, ei2;
4142 edge e, e2;
4143 gimple *stmt;
4144 gphi_iterator gsi;
4145 bool other_edge_seen = false;
4146 bool is_cond;
4148 if (test_bb == bb)
4149 return false;
4150 /* Check last stmt first. */
4151 stmt = last_stmt (bb);
4152 if (stmt == NULL
4153 || (gimple_code (stmt) != GIMPLE_COND
4154 && (backward || !final_range_test_p (stmt)))
4155 || gimple_visited_p (stmt)
4156 || stmt_could_throw_p (cfun, stmt)
4157 || *other_bb == bb)
4158 return false;
4159 is_cond = gimple_code (stmt) == GIMPLE_COND;
4160 if (is_cond)
4162 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4163 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4164 to *OTHER_BB (if not set yet, try to find it out). */
4165 if (EDGE_COUNT (bb->succs) != 2)
4166 return false;
4167 FOR_EACH_EDGE (e, ei, bb->succs)
4169 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4170 return false;
4171 if (e->dest == test_bb)
4173 if (backward)
4174 continue;
4175 else
4176 return false;
4178 if (e->dest == bb)
4179 return false;
4180 if (*other_bb == NULL)
4182 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4183 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4184 return false;
4185 else if (e->dest == e2->dest)
4186 *other_bb = e->dest;
4187 if (*other_bb == NULL)
4188 return false;
4190 if (e->dest == *other_bb)
4191 other_edge_seen = true;
4192 else if (backward)
4193 return false;
4195 if (*other_bb == NULL || !other_edge_seen)
4196 return false;
4198 else if (single_succ (bb) != *other_bb)
4199 return false;
4201 /* Now check all PHIs of *OTHER_BB. */
4202 e = find_edge (bb, *other_bb);
4203 e2 = find_edge (test_bb, *other_bb);
4204 retry:;
4205 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4207 gphi *phi = gsi.phi ();
4208 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4209 corresponding to BB and TEST_BB predecessor must be the same. */
4210 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4211 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4213 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4214 one of the PHIs should have the lhs of the last stmt in
4215 that block as PHI arg and that PHI should have 0 or 1
4216 corresponding to it in all other range test basic blocks
4217 considered. */
4218 if (!is_cond)
4220 if (gimple_phi_arg_def (phi, e->dest_idx)
4221 == gimple_assign_lhs (stmt)
4222 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4223 || integer_onep (gimple_phi_arg_def (phi,
4224 e2->dest_idx))))
4225 continue;
4227 else
4229 gimple *test_last = last_stmt (test_bb);
4230 if (gimple_code (test_last) == GIMPLE_COND)
4232 if (backward ? e2->src != test_bb : e->src != bb)
4233 return false;
4235 /* For last_bb, handle also:
4236 if (x_3(D) == 3)
4237 goto <bb 6>; [34.00%]
4238 else
4239 goto <bb 7>; [66.00%]
4241 <bb 6> [local count: 79512730]:
4243 <bb 7> [local count: 1073741824]:
4244 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4245 where bb 7 is *OTHER_BB, but the PHI values from the
4246 earlier bbs match the path through the empty bb
4247 in between. */
4248 edge e3;
4249 if (backward)
4250 e3 = EDGE_SUCC (test_bb,
4251 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4252 else
4253 e3 = EDGE_SUCC (bb,
4254 e == EDGE_SUCC (bb, 0) ? 1 : 0);
4255 if (empty_block_p (e3->dest)
4256 && single_succ_p (e3->dest)
4257 && single_succ (e3->dest) == *other_bb
4258 && single_pred_p (e3->dest)
4259 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4261 if (backward)
4262 e2 = single_succ_edge (e3->dest);
4263 else
4264 e = single_succ_edge (e3->dest);
4265 if (test_swapped_p)
4266 *test_swapped_p = true;
4267 goto retry;
4270 else if (gimple_phi_arg_def (phi, e2->dest_idx)
4271 == gimple_assign_lhs (test_last)
4272 && (integer_zerop (gimple_phi_arg_def (phi,
4273 e->dest_idx))
4274 || integer_onep (gimple_phi_arg_def (phi,
4275 e->dest_idx))))
4276 continue;
4279 return false;
4282 return true;
4285 /* Return true if BB doesn't have side-effects that would disallow
4286 range test optimization, all SSA_NAMEs set in the bb are consumed
4287 in the bb and there are no PHIs. */
4289 static bool
4290 no_side_effect_bb (basic_block bb)
4292 gimple_stmt_iterator gsi;
4293 gimple *last;
4295 if (!gimple_seq_empty_p (phi_nodes (bb)))
4296 return false;
4297 last = last_stmt (bb);
4298 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4300 gimple *stmt = gsi_stmt (gsi);
4301 tree lhs;
4302 imm_use_iterator imm_iter;
4303 use_operand_p use_p;
4305 if (is_gimple_debug (stmt))
4306 continue;
4307 if (gimple_has_side_effects (stmt))
4308 return false;
4309 if (stmt == last)
4310 return true;
4311 if (!is_gimple_assign (stmt))
4312 return false;
4313 lhs = gimple_assign_lhs (stmt);
4314 if (TREE_CODE (lhs) != SSA_NAME)
4315 return false;
4316 if (gimple_assign_rhs_could_trap_p (stmt))
4317 return false;
4318 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4320 gimple *use_stmt = USE_STMT (use_p);
4321 if (is_gimple_debug (use_stmt))
4322 continue;
4323 if (gimple_bb (use_stmt) != bb)
4324 return false;
4327 return false;
4330 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4331 return true and fill in *OPS recursively. */
4333 static bool
4334 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4335 class loop *loop)
4337 gimple *stmt = SSA_NAME_DEF_STMT (var);
4338 tree rhs[2];
4339 int i;
4341 if (!is_reassociable_op (stmt, code, loop))
4342 return false;
4344 rhs[0] = gimple_assign_rhs1 (stmt);
4345 rhs[1] = gimple_assign_rhs2 (stmt);
4346 gimple_set_visited (stmt, true);
4347 for (i = 0; i < 2; i++)
4348 if (TREE_CODE (rhs[i]) == SSA_NAME
4349 && !get_ops (rhs[i], code, ops, loop)
4350 && has_single_use (rhs[i]))
4352 operand_entry *oe = operand_entry_pool.allocate ();
4354 oe->op = rhs[i];
4355 oe->rank = code;
4356 oe->id = 0;
4357 oe->count = 1;
4358 oe->stmt_to_insert = NULL;
4359 ops->safe_push (oe);
4361 return true;
4364 /* Find the ops that were added by get_ops starting from VAR, see if
4365 they were changed during update_range_test and if yes, create new
4366 stmts. */
4368 static tree
4369 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
4370 unsigned int *pidx, class loop *loop)
4372 gimple *stmt = SSA_NAME_DEF_STMT (var);
4373 tree rhs[4];
4374 int i;
4376 if (!is_reassociable_op (stmt, code, loop))
4377 return NULL;
4379 rhs[0] = gimple_assign_rhs1 (stmt);
4380 rhs[1] = gimple_assign_rhs2 (stmt);
4381 rhs[2] = rhs[0];
4382 rhs[3] = rhs[1];
4383 for (i = 0; i < 2; i++)
4384 if (TREE_CODE (rhs[i]) == SSA_NAME)
4386 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4387 if (rhs[2 + i] == NULL_TREE)
4389 if (has_single_use (rhs[i]))
4390 rhs[2 + i] = ops[(*pidx)++]->op;
4391 else
4392 rhs[2 + i] = rhs[i];
4395 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4396 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4398 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4399 var = make_ssa_name (TREE_TYPE (var));
4400 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4401 rhs[2], rhs[3]);
4402 gimple_set_uid (g, gimple_uid (stmt));
4403 gimple_set_visited (g, true);
4404 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4406 return var;
4409 /* Structure to track the initial value passed to get_ops and
4410 the range in the ops vector for each basic block. */
4412 struct inter_bb_range_test_entry
4414 tree op;
4415 unsigned int first_idx, last_idx;
4418 /* Inter-bb range test optimization.
4420 Returns TRUE if a gimple conditional is optimized to a true/false,
4421 otherwise return FALSE.
4423 This indicates to the caller that it should run a CFG cleanup pass
4424 once reassociation is completed. */
4426 static bool
4427 maybe_optimize_range_tests (gimple *stmt)
4429 basic_block first_bb = gimple_bb (stmt);
4430 basic_block last_bb = first_bb;
4431 basic_block other_bb = NULL;
4432 basic_block bb;
4433 edge_iterator ei;
4434 edge e;
4435 auto_vec<operand_entry *> ops;
4436 auto_vec<inter_bb_range_test_entry> bbinfo;
4437 bool any_changes = false;
4438 bool cfg_cleanup_needed = false;
4440 /* Consider only basic blocks that end with GIMPLE_COND or
4441 a cast statement satisfying final_range_test_p. All
4442 but the last bb in the first_bb .. last_bb range
4443 should end with GIMPLE_COND. */
4444 if (gimple_code (stmt) == GIMPLE_COND)
4446 if (EDGE_COUNT (first_bb->succs) != 2)
4447 return cfg_cleanup_needed;
4449 else if (final_range_test_p (stmt))
4450 other_bb = single_succ (first_bb);
4451 else
4452 return cfg_cleanup_needed;
4454 if (stmt_could_throw_p (cfun, stmt))
4455 return cfg_cleanup_needed;
4457 /* As relative ordering of post-dominator sons isn't fixed,
4458 maybe_optimize_range_tests can be called first on any
4459 bb in the range we want to optimize. So, start searching
4460 backwards, if first_bb can be set to a predecessor. */
4461 while (single_pred_p (first_bb))
4463 basic_block pred_bb = single_pred (first_bb);
4464 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4465 break;
4466 if (!no_side_effect_bb (first_bb))
4467 break;
4468 first_bb = pred_bb;
4470 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4471 Before starting forward search in last_bb successors, find
4472 out the other_bb. */
4473 if (first_bb == last_bb)
4475 other_bb = NULL;
4476 /* As non-GIMPLE_COND last stmt always terminates the range,
4477 if forward search didn't discover anything, just give up. */
4478 if (gimple_code (stmt) != GIMPLE_COND)
4479 return cfg_cleanup_needed;
4480 /* Look at both successors. Either it ends with a GIMPLE_COND
4481 and satisfies suitable_cond_bb, or ends with a cast and
4482 other_bb is that cast's successor. */
4483 FOR_EACH_EDGE (e, ei, first_bb->succs)
4484 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4485 || e->dest == first_bb)
4486 return cfg_cleanup_needed;
4487 else if (single_pred_p (e->dest))
4489 stmt = last_stmt (e->dest);
4490 if (stmt
4491 && gimple_code (stmt) == GIMPLE_COND
4492 && EDGE_COUNT (e->dest->succs) == 2)
4494 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4495 NULL, true))
4496 break;
4497 else
4498 other_bb = NULL;
4500 else if (stmt
4501 && final_range_test_p (stmt)
4502 && find_edge (first_bb, single_succ (e->dest)))
4504 other_bb = single_succ (e->dest);
4505 if (other_bb == first_bb)
4506 other_bb = NULL;
4509 if (other_bb == NULL)
4510 return cfg_cleanup_needed;
4512 /* Now do the forward search, moving last_bb to successor bbs
4513 that aren't other_bb. */
4514 while (EDGE_COUNT (last_bb->succs) == 2)
4516 FOR_EACH_EDGE (e, ei, last_bb->succs)
4517 if (e->dest != other_bb)
4518 break;
4519 if (e == NULL)
4520 break;
4521 if (!single_pred_p (e->dest))
4522 break;
4523 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4524 break;
4525 if (!no_side_effect_bb (e->dest))
4526 break;
4527 last_bb = e->dest;
4529 if (first_bb == last_bb)
4530 return cfg_cleanup_needed;
4531 /* Here basic blocks first_bb through last_bb's predecessor
4532 end with GIMPLE_COND, all of them have one of the edges to
4533 other_bb and another to another block in the range,
4534 all blocks except first_bb don't have side-effects and
4535 last_bb ends with either GIMPLE_COND, or cast satisfying
4536 final_range_test_p. */
4537 for (bb = last_bb; ; bb = single_pred (bb))
4539 enum tree_code code;
4540 tree lhs, rhs;
4541 inter_bb_range_test_entry bb_ent;
4543 bb_ent.op = NULL_TREE;
4544 bb_ent.first_idx = ops.length ();
4545 bb_ent.last_idx = bb_ent.first_idx;
4546 e = find_edge (bb, other_bb);
4547 stmt = last_stmt (bb);
4548 gimple_set_visited (stmt, true);
4549 if (gimple_code (stmt) != GIMPLE_COND)
4551 use_operand_p use_p;
4552 gimple *phi;
4553 edge e2;
4554 unsigned int d;
4556 lhs = gimple_assign_lhs (stmt);
4557 rhs = gimple_assign_rhs1 (stmt);
4558 gcc_assert (bb == last_bb);
4560 /* stmt is
4561 _123 = (int) _234;
4563 _234 = a_2(D) == 2;
4565 followed by:
4566 <bb M>:
4567 # _345 = PHI <_123(N), 1(...), 1(...)>
4569 or 0 instead of 1. If it is 0, the _234
4570 range test is anded together with all the
4571 other range tests, if it is 1, it is ored with
4572 them. */
4573 single_imm_use (lhs, &use_p, &phi);
4574 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4575 e2 = find_edge (first_bb, other_bb);
4576 d = e2->dest_idx;
4577 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4578 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4579 code = BIT_AND_EXPR;
4580 else
4582 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4583 code = BIT_IOR_EXPR;
4586 /* If _234 SSA_NAME_DEF_STMT is
4587 _234 = _567 | _789;
4588 (or &, corresponding to 1/0 in the phi arguments,
4589 push into ops the individual range test arguments
4590 of the bitwise or resp. and, recursively. */
4591 if (TREE_CODE (rhs) == SSA_NAME
4592 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4593 != tcc_comparison)
4594 && !get_ops (rhs, code, &ops,
4595 loop_containing_stmt (stmt))
4596 && has_single_use (rhs))
4598 /* Otherwise, push the _234 range test itself. */
4599 operand_entry *oe = operand_entry_pool.allocate ();
4601 oe->op = rhs;
4602 oe->rank = code;
4603 oe->id = 0;
4604 oe->count = 1;
4605 oe->stmt_to_insert = NULL;
4606 ops.safe_push (oe);
4607 bb_ent.last_idx++;
4608 bb_ent.op = rhs;
4610 else if (is_gimple_assign (stmt)
4611 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4612 == tcc_comparison)
4613 && !get_ops (lhs, code, &ops,
4614 loop_containing_stmt (stmt))
4615 && has_single_use (lhs))
4617 operand_entry *oe = operand_entry_pool.allocate ();
4618 oe->op = lhs;
4619 oe->rank = code;
4620 oe->id = 0;
4621 oe->count = 1;
4622 ops.safe_push (oe);
4623 bb_ent.last_idx++;
4624 bb_ent.op = lhs;
4626 else
4628 bb_ent.last_idx = ops.length ();
4629 bb_ent.op = rhs;
4631 bbinfo.safe_push (bb_ent);
4632 continue;
4634 else if (bb == last_bb)
4636 /* For last_bb, handle also:
4637 if (x_3(D) == 3)
4638 goto <bb 6>; [34.00%]
4639 else
4640 goto <bb 7>; [66.00%]
4642 <bb 6> [local count: 79512730]:
4644 <bb 7> [local count: 1073741824]:
4645 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4646 where bb 7 is OTHER_BB, but the PHI values from the
4647 earlier bbs match the path through the empty bb
4648 in between. */
4649 bool test_swapped_p = false;
4650 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4651 &other_bb, &test_swapped_p, true);
4652 gcc_assert (ok);
4653 if (test_swapped_p)
4654 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4656 /* Otherwise stmt is GIMPLE_COND. */
4657 code = gimple_cond_code (stmt);
4658 lhs = gimple_cond_lhs (stmt);
4659 rhs = gimple_cond_rhs (stmt);
4660 if (TREE_CODE (lhs) == SSA_NAME
4661 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4662 && ((code != EQ_EXPR && code != NE_EXPR)
4663 || rhs != boolean_false_node
4664 /* Either push into ops the individual bitwise
4665 or resp. and operands, depending on which
4666 edge is other_bb. */
4667 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4668 ^ (code == EQ_EXPR))
4669 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4670 loop_containing_stmt (stmt))))
4672 /* Or push the GIMPLE_COND stmt itself. */
4673 operand_entry *oe = operand_entry_pool.allocate ();
4675 oe->op = NULL;
4676 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4677 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4678 /* oe->op = NULL signs that there is no SSA_NAME
4679 for the range test, and oe->id instead is the
4680 basic block number, at which's end the GIMPLE_COND
4681 is. */
4682 oe->id = bb->index;
4683 oe->count = 1;
4684 oe->stmt_to_insert = NULL;
4685 ops.safe_push (oe);
4686 bb_ent.op = NULL;
4687 bb_ent.last_idx++;
4689 else if (ops.length () > bb_ent.first_idx)
4691 bb_ent.op = lhs;
4692 bb_ent.last_idx = ops.length ();
4694 bbinfo.safe_push (bb_ent);
4695 if (bb == first_bb)
4696 break;
4698 if (ops.length () > 1)
4699 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4700 if (any_changes)
4702 unsigned int idx, max_idx = 0;
4703 /* update_ops relies on has_single_use predicates returning the
4704 same values as it did during get_ops earlier. Additionally it
4705 never removes statements, only adds new ones and it should walk
4706 from the single imm use and check the predicate already before
4707 making those changes.
4708 On the other side, the handling of GIMPLE_COND directly can turn
4709 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4710 it needs to be done in a separate loop afterwards. */
4711 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4713 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4714 && bbinfo[idx].op != NULL_TREE)
4716 tree new_op;
4718 max_idx = idx;
4719 stmt = last_stmt (bb);
4720 new_op = update_ops (bbinfo[idx].op,
4721 (enum tree_code)
4722 ops[bbinfo[idx].first_idx]->rank,
4723 ops, &bbinfo[idx].first_idx,
4724 loop_containing_stmt (stmt));
4725 if (new_op == NULL_TREE)
4727 gcc_assert (bb == last_bb);
4728 new_op = ops[bbinfo[idx].first_idx++]->op;
4730 if (bbinfo[idx].op != new_op)
4732 imm_use_iterator iter;
4733 use_operand_p use_p;
4734 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4736 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4737 if (is_gimple_debug (use_stmt))
4738 continue;
4739 else if (gimple_code (use_stmt) == GIMPLE_COND
4740 || gimple_code (use_stmt) == GIMPLE_PHI)
4741 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4742 SET_USE (use_p, new_op);
4743 else if ((is_gimple_assign (use_stmt)
4744 && (TREE_CODE_CLASS
4745 (gimple_assign_rhs_code (use_stmt))
4746 == tcc_comparison)))
4747 cast_or_tcc_cmp_stmt = use_stmt;
4748 else if (gimple_assign_cast_p (use_stmt))
4749 cast_or_tcc_cmp_stmt = use_stmt;
4750 else
4751 gcc_unreachable ();
4753 if (cast_or_tcc_cmp_stmt)
4755 gcc_assert (bb == last_bb);
4756 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4757 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4758 enum tree_code rhs_code
4759 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4760 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4761 : CONVERT_EXPR;
4762 gassign *g;
4763 if (is_gimple_min_invariant (new_op))
4765 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4766 g = gimple_build_assign (new_lhs, new_op);
4768 else
4769 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4770 gimple_stmt_iterator gsi
4771 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4772 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4773 gimple_set_visited (g, true);
4774 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4775 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4776 if (is_gimple_debug (use_stmt))
4777 continue;
4778 else if (gimple_code (use_stmt) == GIMPLE_COND
4779 || gimple_code (use_stmt) == GIMPLE_PHI)
4780 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4781 SET_USE (use_p, new_lhs);
4782 else
4783 gcc_unreachable ();
4787 if (bb == first_bb)
4788 break;
4790 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4792 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4793 && bbinfo[idx].op == NULL_TREE
4794 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4796 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4798 if (idx > max_idx)
4799 max_idx = idx;
4801 /* If we collapse the conditional to a true/false
4802 condition, then bubble that knowledge up to our caller. */
4803 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4805 gimple_cond_make_false (cond_stmt);
4806 cfg_cleanup_needed = true;
4808 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4810 gimple_cond_make_true (cond_stmt);
4811 cfg_cleanup_needed = true;
4813 else
4815 gimple_cond_set_code (cond_stmt, NE_EXPR);
4816 gimple_cond_set_lhs (cond_stmt,
4817 ops[bbinfo[idx].first_idx]->op);
4818 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4820 update_stmt (cond_stmt);
4822 if (bb == first_bb)
4823 break;
4826 /* The above changes could result in basic blocks after the first
4827 modified one, up to and including last_bb, to be executed even if
4828 they would not be in the original program. If the value ranges of
4829 assignment lhs' in those bbs were dependent on the conditions
4830 guarding those basic blocks which now can change, the VRs might
4831 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4832 are only used within the same bb, it should be not a big deal if
4833 we just reset all the VRs in those bbs. See PR68671. */
4834 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4835 reset_flow_sensitive_info_in_bb (bb);
4837 return cfg_cleanup_needed;
4840 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4841 of STMT in it's operands. This is also known as a "destructive
4842 update" operation. */
4844 static bool
4845 is_phi_for_stmt (gimple *stmt, tree operand)
4847 gimple *def_stmt;
4848 gphi *def_phi;
4849 tree lhs;
4850 use_operand_p arg_p;
4851 ssa_op_iter i;
4853 if (TREE_CODE (operand) != SSA_NAME)
4854 return false;
4856 lhs = gimple_assign_lhs (stmt);
4858 def_stmt = SSA_NAME_DEF_STMT (operand);
4859 def_phi = dyn_cast <gphi *> (def_stmt);
4860 if (!def_phi)
4861 return false;
4863 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4864 if (lhs == USE_FROM_PTR (arg_p))
4865 return true;
4866 return false;
4869 /* Remove def stmt of VAR if VAR has zero uses and recurse
4870 on rhs1 operand if so. */
4872 static void
4873 remove_visited_stmt_chain (tree var)
4875 gimple *stmt;
4876 gimple_stmt_iterator gsi;
4878 while (1)
4880 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4881 return;
4882 stmt = SSA_NAME_DEF_STMT (var);
4883 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4885 var = gimple_assign_rhs1 (stmt);
4886 gsi = gsi_for_stmt (stmt);
4887 reassoc_remove_stmt (&gsi);
4888 release_defs (stmt);
4890 else
4891 return;
4895 /* This function checks three consequtive operands in
4896 passed operands vector OPS starting from OPINDEX and
4897 swaps two operands if it is profitable for binary operation
4898 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4900 We pair ops with the same rank if possible.
4902 The alternative we try is to see if STMT is a destructive
4903 update style statement, which is like:
4904 b = phi (a, ...)
4905 a = c + b;
4906 In that case, we want to use the destructive update form to
4907 expose the possible vectorizer sum reduction opportunity.
4908 In that case, the third operand will be the phi node. This
4909 check is not performed if STMT is null.
4911 We could, of course, try to be better as noted above, and do a
4912 lot of work to try to find these opportunities in >3 operand
4913 cases, but it is unlikely to be worth it. */
4915 static void
4916 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4917 unsigned int opindex, gimple *stmt)
4919 operand_entry *oe1, *oe2, *oe3;
4921 oe1 = ops[opindex];
4922 oe2 = ops[opindex + 1];
4923 oe3 = ops[opindex + 2];
4925 if ((oe1->rank == oe2->rank
4926 && oe2->rank != oe3->rank)
4927 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4928 && !is_phi_for_stmt (stmt, oe1->op)
4929 && !is_phi_for_stmt (stmt, oe2->op)))
4930 std::swap (*oe1, *oe3);
4931 else if ((oe1->rank == oe3->rank
4932 && oe2->rank != oe3->rank)
4933 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4934 && !is_phi_for_stmt (stmt, oe1->op)
4935 && !is_phi_for_stmt (stmt, oe3->op)))
4936 std::swap (*oe1, *oe2);
4939 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4940 two definitions, otherwise return STMT. */
4942 static inline gimple *
4943 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4945 if (TREE_CODE (rhs1) == SSA_NAME
4946 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4947 stmt = SSA_NAME_DEF_STMT (rhs1);
4948 if (TREE_CODE (rhs2) == SSA_NAME
4949 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4950 stmt = SSA_NAME_DEF_STMT (rhs2);
4951 return stmt;
4954 /* If the stmt that defines operand has to be inserted, insert it
4955 before the use. */
4956 static void
4957 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4959 gcc_assert (is_gimple_assign (stmt_to_insert));
4960 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4961 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4962 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4963 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4964 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4966 /* If the insert point is not stmt, then insert_point would be
4967 the point where operand rhs1 or rhs2 is defined. In this case,
4968 stmt_to_insert has to be inserted afterwards. This would
4969 only happen when the stmt insertion point is flexible. */
4970 if (stmt == insert_point)
4971 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4972 else
4973 insert_stmt_after (stmt_to_insert, insert_point);
4977 /* Recursively rewrite our linearized statements so that the operators
4978 match those in OPS[OPINDEX], putting the computation in rank
4979 order. Return new lhs.
4980 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4981 the current stmt and during recursive invocations.
4982 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4983 recursive invocations. */
4985 static tree
4986 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
4987 vec<operand_entry *> ops, bool changed, bool next_changed)
4989 tree rhs1 = gimple_assign_rhs1 (stmt);
4990 tree rhs2 = gimple_assign_rhs2 (stmt);
4991 tree lhs = gimple_assign_lhs (stmt);
4992 operand_entry *oe;
4994 /* The final recursion case for this function is that you have
4995 exactly two operations left.
4996 If we had exactly one op in the entire list to start with, we
4997 would have never called this function, and the tail recursion
4998 rewrites them one at a time. */
4999 if (opindex + 2 == ops.length ())
5001 operand_entry *oe1, *oe2;
5003 oe1 = ops[opindex];
5004 oe2 = ops[opindex + 1];
5006 if (rhs1 != oe1->op || rhs2 != oe2->op)
5008 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5009 unsigned int uid = gimple_uid (stmt);
5011 if (dump_file && (dump_flags & TDF_DETAILS))
5013 fprintf (dump_file, "Transforming ");
5014 print_gimple_stmt (dump_file, stmt, 0);
5017 /* If the stmt that defines operand has to be inserted, insert it
5018 before the use. */
5019 if (oe1->stmt_to_insert)
5020 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5021 if (oe2->stmt_to_insert)
5022 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5023 /* Even when changed is false, reassociation could have e.g. removed
5024 some redundant operations, so unless we are just swapping the
5025 arguments or unless there is no change at all (then we just
5026 return lhs), force creation of a new SSA_NAME. */
5027 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5029 gimple *insert_point
5030 = find_insert_point (stmt, oe1->op, oe2->op);
5031 lhs = make_ssa_name (TREE_TYPE (lhs));
5032 stmt
5033 = gimple_build_assign (lhs, rhs_code,
5034 oe1->op, oe2->op);
5035 gimple_set_uid (stmt, uid);
5036 gimple_set_visited (stmt, true);
5037 if (insert_point == gsi_stmt (gsi))
5038 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5039 else
5040 insert_stmt_after (stmt, insert_point);
5042 else
5044 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
5045 == stmt);
5046 gimple_assign_set_rhs1 (stmt, oe1->op);
5047 gimple_assign_set_rhs2 (stmt, oe2->op);
5048 update_stmt (stmt);
5051 if (rhs1 != oe1->op && rhs1 != oe2->op)
5052 remove_visited_stmt_chain (rhs1);
5054 if (dump_file && (dump_flags & TDF_DETAILS))
5056 fprintf (dump_file, " into ");
5057 print_gimple_stmt (dump_file, stmt, 0);
5060 return lhs;
5063 /* If we hit here, we should have 3 or more ops left. */
5064 gcc_assert (opindex + 2 < ops.length ());
5066 /* Rewrite the next operator. */
5067 oe = ops[opindex];
5069 /* If the stmt that defines operand has to be inserted, insert it
5070 before the use. */
5071 if (oe->stmt_to_insert)
5072 insert_stmt_before_use (stmt, oe->stmt_to_insert);
5074 /* Recurse on the LHS of the binary operator, which is guaranteed to
5075 be the non-leaf side. */
5076 tree new_rhs1
5077 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5078 changed || oe->op != rhs2 || next_changed,
5079 false);
5081 if (oe->op != rhs2 || new_rhs1 != rhs1)
5083 if (dump_file && (dump_flags & TDF_DETAILS))
5085 fprintf (dump_file, "Transforming ");
5086 print_gimple_stmt (dump_file, stmt, 0);
5089 /* If changed is false, this is either opindex == 0
5090 or all outer rhs2's were equal to corresponding oe->op,
5091 and powi_result is NULL.
5092 That means lhs is equivalent before and after reassociation.
5093 Otherwise ensure the old lhs SSA_NAME is not reused and
5094 create a new stmt as well, so that any debug stmts will be
5095 properly adjusted. */
5096 if (changed)
5098 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5099 unsigned int uid = gimple_uid (stmt);
5100 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
5102 lhs = make_ssa_name (TREE_TYPE (lhs));
5103 stmt = gimple_build_assign (lhs, rhs_code,
5104 new_rhs1, oe->op);
5105 gimple_set_uid (stmt, uid);
5106 gimple_set_visited (stmt, true);
5107 if (insert_point == gsi_stmt (gsi))
5108 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5109 else
5110 insert_stmt_after (stmt, insert_point);
5112 else
5114 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
5115 == stmt);
5116 gimple_assign_set_rhs1 (stmt, new_rhs1);
5117 gimple_assign_set_rhs2 (stmt, oe->op);
5118 update_stmt (stmt);
5121 if (dump_file && (dump_flags & TDF_DETAILS))
5123 fprintf (dump_file, " into ");
5124 print_gimple_stmt (dump_file, stmt, 0);
5127 return lhs;
5130 /* Find out how many cycles we need to compute statements chain.
5131 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5132 maximum number of independent statements we may execute per cycle. */
5134 static int
5135 get_required_cycles (int ops_num, int cpu_width)
5137 int res;
5138 int elog;
5139 unsigned int rest;
5141 /* While we have more than 2 * cpu_width operands
5142 we may reduce number of operands by cpu_width
5143 per cycle. */
5144 res = ops_num / (2 * cpu_width);
5146 /* Remained operands count may be reduced twice per cycle
5147 until we have only one operand. */
5148 rest = (unsigned)(ops_num - res * cpu_width);
5149 elog = exact_log2 (rest);
5150 if (elog >= 0)
5151 res += elog;
5152 else
5153 res += floor_log2 (rest) + 1;
5155 return res;
5158 /* Returns an optimal number of registers to use for computation of
5159 given statements. */
5161 static int
5162 get_reassociation_width (int ops_num, enum tree_code opc,
5163 machine_mode mode)
5165 int param_width = param_tree_reassoc_width;
5166 int width;
5167 int width_min;
5168 int cycles_best;
5170 if (param_width > 0)
5171 width = param_width;
5172 else
5173 width = targetm.sched.reassociation_width (opc, mode);
5175 if (width == 1)
5176 return width;
5178 /* Get the minimal time required for sequence computation. */
5179 cycles_best = get_required_cycles (ops_num, width);
5181 /* Check if we may use less width and still compute sequence for
5182 the same time. It will allow us to reduce registers usage.
5183 get_required_cycles is monotonically increasing with lower width
5184 so we can perform a binary search for the minimal width that still
5185 results in the optimal cycle count. */
5186 width_min = 1;
5187 while (width > width_min)
5189 int width_mid = (width + width_min) / 2;
5191 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5192 width = width_mid;
5193 else if (width_min < width_mid)
5194 width_min = width_mid;
5195 else
5196 break;
5199 return width;
5202 /* Recursively rewrite our linearized statements so that the operators
5203 match those in OPS[OPINDEX], putting the computation in rank
5204 order and trying to allow operations to be executed in
5205 parallel. */
5207 static void
5208 rewrite_expr_tree_parallel (gassign *stmt, int width,
5209 vec<operand_entry *> ops)
5211 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5212 int op_num = ops.length ();
5213 gcc_assert (op_num > 0);
5214 int stmt_num = op_num - 1;
5215 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5216 int op_index = op_num - 1;
5217 int stmt_index = 0;
5218 int ready_stmts_end = 0;
5219 int i = 0;
5220 gimple *stmt1 = NULL, *stmt2 = NULL;
5221 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5223 /* We start expression rewriting from the top statements.
5224 So, in this loop we create a full list of statements
5225 we will work with. */
5226 stmts[stmt_num - 1] = stmt;
5227 for (i = stmt_num - 2; i >= 0; i--)
5228 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5230 for (i = 0; i < stmt_num; i++)
5232 tree op1, op2;
5234 /* Determine whether we should use results of
5235 already handled statements or not. */
5236 if (ready_stmts_end == 0
5237 && (i - stmt_index >= width || op_index < 1))
5238 ready_stmts_end = i;
5240 /* Now we choose operands for the next statement. Non zero
5241 value in ready_stmts_end means here that we should use
5242 the result of already generated statements as new operand. */
5243 if (ready_stmts_end > 0)
5245 op1 = gimple_assign_lhs (stmts[stmt_index++]);
5246 if (ready_stmts_end > stmt_index)
5247 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5248 else if (op_index >= 0)
5250 operand_entry *oe = ops[op_index--];
5251 stmt2 = oe->stmt_to_insert;
5252 op2 = oe->op;
5254 else
5256 gcc_assert (stmt_index < i);
5257 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5260 if (stmt_index >= ready_stmts_end)
5261 ready_stmts_end = 0;
5263 else
5265 if (op_index > 1)
5266 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5267 operand_entry *oe2 = ops[op_index--];
5268 operand_entry *oe1 = ops[op_index--];
5269 op2 = oe2->op;
5270 stmt2 = oe2->stmt_to_insert;
5271 op1 = oe1->op;
5272 stmt1 = oe1->stmt_to_insert;
5275 /* If we emit the last statement then we should put
5276 operands into the last statement. It will also
5277 break the loop. */
5278 if (op_index < 0 && stmt_index == i)
5279 i = stmt_num - 1;
5281 if (dump_file && (dump_flags & TDF_DETAILS))
5283 fprintf (dump_file, "Transforming ");
5284 print_gimple_stmt (dump_file, stmts[i], 0);
5287 /* If the stmt that defines operand has to be inserted, insert it
5288 before the use. */
5289 if (stmt1)
5290 insert_stmt_before_use (stmts[i], stmt1);
5291 if (stmt2)
5292 insert_stmt_before_use (stmts[i], stmt2);
5293 stmt1 = stmt2 = NULL;
5295 /* We keep original statement only for the last one. All
5296 others are recreated. */
5297 if (i == stmt_num - 1)
5299 gimple_assign_set_rhs1 (stmts[i], op1);
5300 gimple_assign_set_rhs2 (stmts[i], op2);
5301 update_stmt (stmts[i]);
5303 else
5305 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5306 gimple_set_visited (stmts[i], true);
5308 if (dump_file && (dump_flags & TDF_DETAILS))
5310 fprintf (dump_file, " into ");
5311 print_gimple_stmt (dump_file, stmts[i], 0);
5315 remove_visited_stmt_chain (last_rhs1);
5318 /* Transform STMT, which is really (A +B) + (C + D) into the left
5319 linear form, ((A+B)+C)+D.
5320 Recurse on D if necessary. */
5322 static void
5323 linearize_expr (gimple *stmt)
5325 gimple_stmt_iterator gsi;
5326 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5327 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5328 gimple *oldbinrhs = binrhs;
5329 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5330 gimple *newbinrhs = NULL;
5331 class loop *loop = loop_containing_stmt (stmt);
5332 tree lhs = gimple_assign_lhs (stmt);
5334 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5335 && is_reassociable_op (binrhs, rhscode, loop));
5337 gsi = gsi_for_stmt (stmt);
5339 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5340 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5341 gimple_assign_rhs_code (binrhs),
5342 gimple_assign_lhs (binlhs),
5343 gimple_assign_rhs2 (binrhs));
5344 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5345 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5346 gimple_set_uid (binrhs, gimple_uid (stmt));
5348 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5349 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5351 if (dump_file && (dump_flags & TDF_DETAILS))
5353 fprintf (dump_file, "Linearized: ");
5354 print_gimple_stmt (dump_file, stmt, 0);
5357 reassociate_stats.linearized++;
5358 update_stmt (stmt);
5360 gsi = gsi_for_stmt (oldbinrhs);
5361 reassoc_remove_stmt (&gsi);
5362 release_defs (oldbinrhs);
5364 gimple_set_visited (stmt, true);
5365 gimple_set_visited (binlhs, true);
5366 gimple_set_visited (binrhs, true);
5368 /* Tail recurse on the new rhs if it still needs reassociation. */
5369 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5370 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5371 want to change the algorithm while converting to tuples. */
5372 linearize_expr (stmt);
5375 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5376 it. Otherwise, return NULL. */
5378 static gimple *
5379 get_single_immediate_use (tree lhs)
5381 use_operand_p immuse;
5382 gimple *immusestmt;
5384 if (TREE_CODE (lhs) == SSA_NAME
5385 && single_imm_use (lhs, &immuse, &immusestmt)
5386 && is_gimple_assign (immusestmt))
5387 return immusestmt;
5389 return NULL;
5392 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5393 representing the negated value. Insertions of any necessary
5394 instructions go before GSI.
5395 This function is recursive in that, if you hand it "a_5" as the
5396 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5397 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5399 static tree
5400 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5402 gimple *negatedefstmt = NULL;
5403 tree resultofnegate;
5404 gimple_stmt_iterator gsi;
5405 unsigned int uid;
5407 /* If we are trying to negate a name, defined by an add, negate the
5408 add operands instead. */
5409 if (TREE_CODE (tonegate) == SSA_NAME)
5410 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5411 if (TREE_CODE (tonegate) == SSA_NAME
5412 && is_gimple_assign (negatedefstmt)
5413 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5414 && has_single_use (gimple_assign_lhs (negatedefstmt))
5415 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5417 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5418 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5419 tree lhs = gimple_assign_lhs (negatedefstmt);
5420 gimple *g;
5422 gsi = gsi_for_stmt (negatedefstmt);
5423 rhs1 = negate_value (rhs1, &gsi);
5425 gsi = gsi_for_stmt (negatedefstmt);
5426 rhs2 = negate_value (rhs2, &gsi);
5428 gsi = gsi_for_stmt (negatedefstmt);
5429 lhs = make_ssa_name (TREE_TYPE (lhs));
5430 gimple_set_visited (negatedefstmt, true);
5431 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5432 gimple_set_uid (g, gimple_uid (negatedefstmt));
5433 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5434 return lhs;
5437 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5438 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5439 NULL_TREE, true, GSI_SAME_STMT);
5440 gsi = *gsip;
5441 uid = gimple_uid (gsi_stmt (gsi));
5442 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5444 gimple *stmt = gsi_stmt (gsi);
5445 if (gimple_uid (stmt) != 0)
5446 break;
5447 gimple_set_uid (stmt, uid);
5449 return resultofnegate;
5452 /* Return true if we should break up the subtract in STMT into an add
5453 with negate. This is true when we the subtract operands are really
5454 adds, or the subtract itself is used in an add expression. In
5455 either case, breaking up the subtract into an add with negate
5456 exposes the adds to reassociation. */
5458 static bool
5459 should_break_up_subtract (gimple *stmt)
5461 tree lhs = gimple_assign_lhs (stmt);
5462 tree binlhs = gimple_assign_rhs1 (stmt);
5463 tree binrhs = gimple_assign_rhs2 (stmt);
5464 gimple *immusestmt;
5465 class loop *loop = loop_containing_stmt (stmt);
5467 if (TREE_CODE (binlhs) == SSA_NAME
5468 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5469 return true;
5471 if (TREE_CODE (binrhs) == SSA_NAME
5472 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5473 return true;
5475 if (TREE_CODE (lhs) == SSA_NAME
5476 && (immusestmt = get_single_immediate_use (lhs))
5477 && is_gimple_assign (immusestmt)
5478 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5479 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5480 && gimple_assign_rhs1 (immusestmt) == lhs)
5481 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5482 return true;
5483 return false;
5486 /* Transform STMT from A - B into A + -B. */
5488 static void
5489 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5491 tree rhs1 = gimple_assign_rhs1 (stmt);
5492 tree rhs2 = gimple_assign_rhs2 (stmt);
5494 if (dump_file && (dump_flags & TDF_DETAILS))
5496 fprintf (dump_file, "Breaking up subtract ");
5497 print_gimple_stmt (dump_file, stmt, 0);
5500 rhs2 = negate_value (rhs2, gsip);
5501 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5502 update_stmt (stmt);
5505 /* Determine whether STMT is a builtin call that raises an SSA name
5506 to an integer power and has only one use. If so, and this is early
5507 reassociation and unsafe math optimizations are permitted, place
5508 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5509 If any of these conditions does not hold, return FALSE. */
5511 static bool
5512 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5514 tree arg1;
5515 REAL_VALUE_TYPE c, cint;
5517 switch (gimple_call_combined_fn (stmt))
5519 CASE_CFN_POW:
5520 if (flag_errno_math)
5521 return false;
5523 *base = gimple_call_arg (stmt, 0);
5524 arg1 = gimple_call_arg (stmt, 1);
5526 if (TREE_CODE (arg1) != REAL_CST)
5527 return false;
5529 c = TREE_REAL_CST (arg1);
5531 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5532 return false;
5534 *exponent = real_to_integer (&c);
5535 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5536 if (!real_identical (&c, &cint))
5537 return false;
5539 break;
5541 CASE_CFN_POWI:
5542 *base = gimple_call_arg (stmt, 0);
5543 arg1 = gimple_call_arg (stmt, 1);
5545 if (!tree_fits_shwi_p (arg1))
5546 return false;
5548 *exponent = tree_to_shwi (arg1);
5549 break;
5551 default:
5552 return false;
5555 /* Expanding negative exponents is generally unproductive, so we don't
5556 complicate matters with those. Exponents of zero and one should
5557 have been handled by expression folding. */
5558 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5559 return false;
5561 return true;
5564 /* Try to derive and add operand entry for OP to *OPS. Return false if
5565 unsuccessful. */
5567 static bool
5568 try_special_add_to_ops (vec<operand_entry *> *ops,
5569 enum tree_code code,
5570 tree op, gimple* def_stmt)
5572 tree base = NULL_TREE;
5573 HOST_WIDE_INT exponent = 0;
5575 if (TREE_CODE (op) != SSA_NAME
5576 || ! has_single_use (op))
5577 return false;
5579 if (code == MULT_EXPR
5580 && reassoc_insert_powi_p
5581 && flag_unsafe_math_optimizations
5582 && is_gimple_call (def_stmt)
5583 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5585 add_repeat_to_ops_vec (ops, base, exponent);
5586 gimple_set_visited (def_stmt, true);
5587 return true;
5589 else if (code == MULT_EXPR
5590 && is_gimple_assign (def_stmt)
5591 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5592 && !HONOR_SNANS (TREE_TYPE (op))
5593 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5594 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5596 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5597 tree cst = build_minus_one_cst (TREE_TYPE (op));
5598 add_to_ops_vec (ops, rhs1);
5599 add_to_ops_vec (ops, cst);
5600 gimple_set_visited (def_stmt, true);
5601 return true;
5604 return false;
5607 /* Recursively linearize a binary expression that is the RHS of STMT.
5608 Place the operands of the expression tree in the vector named OPS. */
5610 static void
5611 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5612 bool is_associative, bool set_visited)
5614 tree binlhs = gimple_assign_rhs1 (stmt);
5615 tree binrhs = gimple_assign_rhs2 (stmt);
5616 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5617 bool binlhsisreassoc = false;
5618 bool binrhsisreassoc = false;
5619 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5620 class loop *loop = loop_containing_stmt (stmt);
5622 if (set_visited)
5623 gimple_set_visited (stmt, true);
5625 if (TREE_CODE (binlhs) == SSA_NAME)
5627 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5628 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5629 && !stmt_could_throw_p (cfun, binlhsdef));
5632 if (TREE_CODE (binrhs) == SSA_NAME)
5634 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5635 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5636 && !stmt_could_throw_p (cfun, binrhsdef));
5639 /* If the LHS is not reassociable, but the RHS is, we need to swap
5640 them. If neither is reassociable, there is nothing we can do, so
5641 just put them in the ops vector. If the LHS is reassociable,
5642 linearize it. If both are reassociable, then linearize the RHS
5643 and the LHS. */
5645 if (!binlhsisreassoc)
5647 /* If this is not a associative operation like division, give up. */
5648 if (!is_associative)
5650 add_to_ops_vec (ops, binrhs);
5651 return;
5654 if (!binrhsisreassoc)
5656 bool swap = false;
5657 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5658 /* If we add ops for the rhs we expect to be able to recurse
5659 to it via the lhs during expression rewrite so swap
5660 operands. */
5661 swap = true;
5662 else
5663 add_to_ops_vec (ops, binrhs);
5665 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5666 add_to_ops_vec (ops, binlhs);
5668 if (!swap)
5669 return;
5672 if (dump_file && (dump_flags & TDF_DETAILS))
5674 fprintf (dump_file, "swapping operands of ");
5675 print_gimple_stmt (dump_file, stmt, 0);
5678 swap_ssa_operands (stmt,
5679 gimple_assign_rhs1_ptr (stmt),
5680 gimple_assign_rhs2_ptr (stmt));
5681 update_stmt (stmt);
5683 if (dump_file && (dump_flags & TDF_DETAILS))
5685 fprintf (dump_file, " is now ");
5686 print_gimple_stmt (dump_file, stmt, 0);
5688 if (!binrhsisreassoc)
5689 return;
5691 /* We want to make it so the lhs is always the reassociative op,
5692 so swap. */
5693 std::swap (binlhs, binrhs);
5695 else if (binrhsisreassoc)
5697 linearize_expr (stmt);
5698 binlhs = gimple_assign_rhs1 (stmt);
5699 binrhs = gimple_assign_rhs2 (stmt);
5702 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5703 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5704 rhscode, loop));
5705 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5706 is_associative, set_visited);
5708 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5709 add_to_ops_vec (ops, binrhs);
5712 /* Repropagate the negates back into subtracts, since no other pass
5713 currently does it. */
5715 static void
5716 repropagate_negates (void)
5718 unsigned int i = 0;
5719 tree negate;
5721 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5723 gimple *user = get_single_immediate_use (negate);
5725 if (!user || !is_gimple_assign (user))
5726 continue;
5728 /* The negate operand can be either operand of a PLUS_EXPR
5729 (it can be the LHS if the RHS is a constant for example).
5731 Force the negate operand to the RHS of the PLUS_EXPR, then
5732 transform the PLUS_EXPR into a MINUS_EXPR. */
5733 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5735 /* If the negated operand appears on the LHS of the
5736 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5737 to force the negated operand to the RHS of the PLUS_EXPR. */
5738 if (gimple_assign_rhs1 (user) == negate)
5740 swap_ssa_operands (user,
5741 gimple_assign_rhs1_ptr (user),
5742 gimple_assign_rhs2_ptr (user));
5745 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5746 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5747 if (gimple_assign_rhs2 (user) == negate)
5749 tree rhs1 = gimple_assign_rhs1 (user);
5750 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5751 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5752 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5753 update_stmt (user);
5756 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5758 if (gimple_assign_rhs1 (user) == negate)
5760 /* We have
5761 x = -a
5762 y = x - b
5763 which we transform into
5764 x = a + b
5765 y = -x .
5766 This pushes down the negate which we possibly can merge
5767 into some other operation, hence insert it into the
5768 plus_negates vector. */
5769 gimple *feed = SSA_NAME_DEF_STMT (negate);
5770 tree a = gimple_assign_rhs1 (feed);
5771 tree b = gimple_assign_rhs2 (user);
5772 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5773 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5774 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5775 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5776 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5777 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5778 user = gsi_stmt (gsi2);
5779 update_stmt (user);
5780 reassoc_remove_stmt (&gsi);
5781 release_defs (feed);
5782 plus_negates.safe_push (gimple_assign_lhs (user));
5784 else
5786 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5787 rid of one operation. */
5788 gimple *feed = SSA_NAME_DEF_STMT (negate);
5789 tree a = gimple_assign_rhs1 (feed);
5790 tree rhs1 = gimple_assign_rhs1 (user);
5791 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5792 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5793 update_stmt (gsi_stmt (gsi));
5799 /* Returns true if OP is of a type for which we can do reassociation.
5800 That is for integral or non-saturating fixed-point types, and for
5801 floating point type when associative-math is enabled. */
5803 static bool
5804 can_reassociate_p (tree op)
5806 tree type = TREE_TYPE (op);
5807 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5808 return false;
5809 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5810 || NON_SAT_FIXED_POINT_TYPE_P (type)
5811 || (flag_associative_math && FLOAT_TYPE_P (type)))
5812 return true;
5813 return false;
5816 /* Break up subtract operations in block BB.
5818 We do this top down because we don't know whether the subtract is
5819 part of a possible chain of reassociation except at the top.
5821 IE given
5822 d = f + g
5823 c = a + e
5824 b = c - d
5825 q = b - r
5826 k = t - q
5828 we want to break up k = t - q, but we won't until we've transformed q
5829 = b - r, which won't be broken up until we transform b = c - d.
5831 En passant, clear the GIMPLE visited flag on every statement
5832 and set UIDs within each basic block. */
5834 static void
5835 break_up_subtract_bb (basic_block bb)
5837 gimple_stmt_iterator gsi;
5838 basic_block son;
5839 unsigned int uid = 1;
5841 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5843 gimple *stmt = gsi_stmt (gsi);
5844 gimple_set_visited (stmt, false);
5845 gimple_set_uid (stmt, uid++);
5847 if (!is_gimple_assign (stmt)
5848 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5849 continue;
5851 /* Look for simple gimple subtract operations. */
5852 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5854 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5855 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5856 continue;
5858 /* Check for a subtract used only in an addition. If this
5859 is the case, transform it into add of a negate for better
5860 reassociation. IE transform C = A-B into C = A + -B if C
5861 is only used in an addition. */
5862 if (should_break_up_subtract (stmt))
5863 break_up_subtract (stmt, &gsi);
5865 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5866 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5867 plus_negates.safe_push (gimple_assign_lhs (stmt));
5869 for (son = first_dom_son (CDI_DOMINATORS, bb);
5870 son;
5871 son = next_dom_son (CDI_DOMINATORS, son))
5872 break_up_subtract_bb (son);
5875 /* Used for repeated factor analysis. */
5876 struct repeat_factor
5878 /* An SSA name that occurs in a multiply chain. */
5879 tree factor;
5881 /* Cached rank of the factor. */
5882 unsigned rank;
5884 /* Number of occurrences of the factor in the chain. */
5885 HOST_WIDE_INT count;
5887 /* An SSA name representing the product of this factor and
5888 all factors appearing later in the repeated factor vector. */
5889 tree repr;
5893 static vec<repeat_factor> repeat_factor_vec;
5895 /* Used for sorting the repeat factor vector. Sort primarily by
5896 ascending occurrence count, secondarily by descending rank. */
5898 static int
5899 compare_repeat_factors (const void *x1, const void *x2)
5901 const repeat_factor *rf1 = (const repeat_factor *) x1;
5902 const repeat_factor *rf2 = (const repeat_factor *) x2;
5904 if (rf1->count != rf2->count)
5905 return rf1->count - rf2->count;
5907 return rf2->rank - rf1->rank;
5910 /* Look for repeated operands in OPS in the multiply tree rooted at
5911 STMT. Replace them with an optimal sequence of multiplies and powi
5912 builtin calls, and remove the used operands from OPS. Return an
5913 SSA name representing the value of the replacement sequence. */
5915 static tree
5916 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5918 unsigned i, j, vec_len;
5919 int ii;
5920 operand_entry *oe;
5921 repeat_factor *rf1, *rf2;
5922 repeat_factor rfnew;
5923 tree result = NULL_TREE;
5924 tree target_ssa, iter_result;
5925 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5926 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5927 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5928 gimple *mul_stmt, *pow_stmt;
5930 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5931 target. */
5932 if (!powi_fndecl)
5933 return NULL_TREE;
5935 /* Allocate the repeated factor vector. */
5936 repeat_factor_vec.create (10);
5938 /* Scan the OPS vector for all SSA names in the product and build
5939 up a vector of occurrence counts for each factor. */
5940 FOR_EACH_VEC_ELT (*ops, i, oe)
5942 if (TREE_CODE (oe->op) == SSA_NAME)
5944 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5946 if (rf1->factor == oe->op)
5948 rf1->count += oe->count;
5949 break;
5953 if (j >= repeat_factor_vec.length ())
5955 rfnew.factor = oe->op;
5956 rfnew.rank = oe->rank;
5957 rfnew.count = oe->count;
5958 rfnew.repr = NULL_TREE;
5959 repeat_factor_vec.safe_push (rfnew);
5964 /* Sort the repeated factor vector by (a) increasing occurrence count,
5965 and (b) decreasing rank. */
5966 repeat_factor_vec.qsort (compare_repeat_factors);
5968 /* It is generally best to combine as many base factors as possible
5969 into a product before applying __builtin_powi to the result.
5970 However, the sort order chosen for the repeated factor vector
5971 allows us to cache partial results for the product of the base
5972 factors for subsequent use. When we already have a cached partial
5973 result from a previous iteration, it is best to make use of it
5974 before looking for another __builtin_pow opportunity.
5976 As an example, consider x * x * y * y * y * z * z * z * z.
5977 We want to first compose the product x * y * z, raise it to the
5978 second power, then multiply this by y * z, and finally multiply
5979 by z. This can be done in 5 multiplies provided we cache y * z
5980 for use in both expressions:
5982 t1 = y * z
5983 t2 = t1 * x
5984 t3 = t2 * t2
5985 t4 = t1 * t3
5986 result = t4 * z
5988 If we instead ignored the cached y * z and first multiplied by
5989 the __builtin_pow opportunity z * z, we would get the inferior:
5991 t1 = y * z
5992 t2 = t1 * x
5993 t3 = t2 * t2
5994 t4 = z * z
5995 t5 = t3 * t4
5996 result = t5 * y */
5998 vec_len = repeat_factor_vec.length ();
6000 /* Repeatedly look for opportunities to create a builtin_powi call. */
6001 while (true)
6003 HOST_WIDE_INT power;
6005 /* First look for the largest cached product of factors from
6006 preceding iterations. If found, create a builtin_powi for
6007 it if the minimum occurrence count for its factors is at
6008 least 2, or just use this cached product as our next
6009 multiplicand if the minimum occurrence count is 1. */
6010 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6012 if (rf1->repr && rf1->count > 0)
6013 break;
6016 if (j < vec_len)
6018 power = rf1->count;
6020 if (power == 1)
6022 iter_result = rf1->repr;
6024 if (dump_file && (dump_flags & TDF_DETAILS))
6026 unsigned elt;
6027 repeat_factor *rf;
6028 fputs ("Multiplying by cached product ", dump_file);
6029 for (elt = j; elt < vec_len; elt++)
6031 rf = &repeat_factor_vec[elt];
6032 print_generic_expr (dump_file, rf->factor);
6033 if (elt < vec_len - 1)
6034 fputs (" * ", dump_file);
6036 fputs ("\n", dump_file);
6039 else
6041 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6042 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6043 build_int_cst (integer_type_node,
6044 power));
6045 gimple_call_set_lhs (pow_stmt, iter_result);
6046 gimple_set_location (pow_stmt, gimple_location (stmt));
6047 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6048 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6050 if (dump_file && (dump_flags & TDF_DETAILS))
6052 unsigned elt;
6053 repeat_factor *rf;
6054 fputs ("Building __builtin_pow call for cached product (",
6055 dump_file);
6056 for (elt = j; elt < vec_len; elt++)
6058 rf = &repeat_factor_vec[elt];
6059 print_generic_expr (dump_file, rf->factor);
6060 if (elt < vec_len - 1)
6061 fputs (" * ", dump_file);
6063 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6064 power);
6068 else
6070 /* Otherwise, find the first factor in the repeated factor
6071 vector whose occurrence count is at least 2. If no such
6072 factor exists, there are no builtin_powi opportunities
6073 remaining. */
6074 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6076 if (rf1->count >= 2)
6077 break;
6080 if (j >= vec_len)
6081 break;
6083 power = rf1->count;
6085 if (dump_file && (dump_flags & TDF_DETAILS))
6087 unsigned elt;
6088 repeat_factor *rf;
6089 fputs ("Building __builtin_pow call for (", dump_file);
6090 for (elt = j; elt < vec_len; elt++)
6092 rf = &repeat_factor_vec[elt];
6093 print_generic_expr (dump_file, rf->factor);
6094 if (elt < vec_len - 1)
6095 fputs (" * ", dump_file);
6097 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6100 reassociate_stats.pows_created++;
6102 /* Visit each element of the vector in reverse order (so that
6103 high-occurrence elements are visited first, and within the
6104 same occurrence count, lower-ranked elements are visited
6105 first). Form a linear product of all elements in this order
6106 whose occurrencce count is at least that of element J.
6107 Record the SSA name representing the product of each element
6108 with all subsequent elements in the vector. */
6109 if (j == vec_len - 1)
6110 rf1->repr = rf1->factor;
6111 else
6113 for (ii = vec_len - 2; ii >= (int)j; ii--)
6115 tree op1, op2;
6117 rf1 = &repeat_factor_vec[ii];
6118 rf2 = &repeat_factor_vec[ii + 1];
6120 /* Init the last factor's representative to be itself. */
6121 if (!rf2->repr)
6122 rf2->repr = rf2->factor;
6124 op1 = rf1->factor;
6125 op2 = rf2->repr;
6127 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6128 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6129 op1, op2);
6130 gimple_set_location (mul_stmt, gimple_location (stmt));
6131 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6132 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6133 rf1->repr = target_ssa;
6135 /* Don't reprocess the multiply we just introduced. */
6136 gimple_set_visited (mul_stmt, true);
6140 /* Form a call to __builtin_powi for the maximum product
6141 just formed, raised to the power obtained earlier. */
6142 rf1 = &repeat_factor_vec[j];
6143 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6144 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6145 build_int_cst (integer_type_node,
6146 power));
6147 gimple_call_set_lhs (pow_stmt, iter_result);
6148 gimple_set_location (pow_stmt, gimple_location (stmt));
6149 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6150 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6153 /* If we previously formed at least one other builtin_powi call,
6154 form the product of this one and those others. */
6155 if (result)
6157 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6158 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6159 result, iter_result);
6160 gimple_set_location (mul_stmt, gimple_location (stmt));
6161 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6162 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6163 gimple_set_visited (mul_stmt, true);
6164 result = new_result;
6166 else
6167 result = iter_result;
6169 /* Decrement the occurrence count of each element in the product
6170 by the count found above, and remove this many copies of each
6171 factor from OPS. */
6172 for (i = j; i < vec_len; i++)
6174 unsigned k = power;
6175 unsigned n;
6177 rf1 = &repeat_factor_vec[i];
6178 rf1->count -= power;
6180 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6182 if (oe->op == rf1->factor)
6184 if (oe->count <= k)
6186 ops->ordered_remove (n);
6187 k -= oe->count;
6189 if (k == 0)
6190 break;
6192 else
6194 oe->count -= k;
6195 break;
6202 /* At this point all elements in the repeated factor vector have a
6203 remaining occurrence count of 0 or 1, and those with a count of 1
6204 don't have cached representatives. Re-sort the ops vector and
6205 clean up. */
6206 ops->qsort (sort_by_operand_rank);
6207 repeat_factor_vec.release ();
6209 /* Return the final product computed herein. Note that there may
6210 still be some elements with single occurrence count left in OPS;
6211 those will be handled by the normal reassociation logic. */
6212 return result;
6215 /* Attempt to optimize
6216 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6217 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6219 static void
6220 attempt_builtin_copysign (vec<operand_entry *> *ops)
6222 operand_entry *oe;
6223 unsigned int i;
6224 unsigned int length = ops->length ();
6225 tree cst = ops->last ()->op;
6227 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6228 return;
6230 FOR_EACH_VEC_ELT (*ops, i, oe)
6232 if (TREE_CODE (oe->op) == SSA_NAME
6233 && has_single_use (oe->op))
6235 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6236 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6238 tree arg0, arg1;
6239 switch (gimple_call_combined_fn (old_call))
6241 CASE_CFN_COPYSIGN:
6242 CASE_CFN_COPYSIGN_FN:
6243 arg0 = gimple_call_arg (old_call, 0);
6244 arg1 = gimple_call_arg (old_call, 1);
6245 /* The first argument of copysign must be a constant,
6246 otherwise there's nothing to do. */
6247 if (TREE_CODE (arg0) == REAL_CST)
6249 tree type = TREE_TYPE (arg0);
6250 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6251 /* If we couldn't fold to a single constant, skip it.
6252 That happens e.g. for inexact multiplication when
6253 -frounding-math. */
6254 if (mul == NULL_TREE)
6255 break;
6256 /* Instead of adjusting OLD_CALL, let's build a new
6257 call to not leak the LHS and prevent keeping bogus
6258 debug statements. DCE will clean up the old call. */
6259 gcall *new_call;
6260 if (gimple_call_internal_p (old_call))
6261 new_call = gimple_build_call_internal
6262 (IFN_COPYSIGN, 2, mul, arg1);
6263 else
6264 new_call = gimple_build_call
6265 (gimple_call_fndecl (old_call), 2, mul, arg1);
6266 tree lhs = make_ssa_name (type);
6267 gimple_call_set_lhs (new_call, lhs);
6268 gimple_set_location (new_call,
6269 gimple_location (old_call));
6270 insert_stmt_after (new_call, old_call);
6271 /* We've used the constant, get rid of it. */
6272 ops->pop ();
6273 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6274 /* Handle the CST1 < 0 case by negating the result. */
6275 if (cst1_neg)
6277 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6278 gimple *negate_stmt
6279 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6280 insert_stmt_after (negate_stmt, new_call);
6281 oe->op = negrhs;
6283 else
6284 oe->op = lhs;
6285 if (dump_file && (dump_flags & TDF_DETAILS))
6287 fprintf (dump_file, "Optimizing copysign: ");
6288 print_generic_expr (dump_file, cst);
6289 fprintf (dump_file, " * COPYSIGN (");
6290 print_generic_expr (dump_file, arg0);
6291 fprintf (dump_file, ", ");
6292 print_generic_expr (dump_file, arg1);
6293 fprintf (dump_file, ") into %sCOPYSIGN (",
6294 cst1_neg ? "-" : "");
6295 print_generic_expr (dump_file, mul);
6296 fprintf (dump_file, ", ");
6297 print_generic_expr (dump_file, arg1);
6298 fprintf (dump_file, "\n");
6300 return;
6302 break;
6303 default:
6304 break;
6311 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6313 static void
6314 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6316 tree rhs1;
6318 if (dump_file && (dump_flags & TDF_DETAILS))
6320 fprintf (dump_file, "Transforming ");
6321 print_gimple_stmt (dump_file, stmt, 0);
6324 rhs1 = gimple_assign_rhs1 (stmt);
6325 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6326 update_stmt (stmt);
6327 remove_visited_stmt_chain (rhs1);
6329 if (dump_file && (dump_flags & TDF_DETAILS))
6331 fprintf (dump_file, " into ");
6332 print_gimple_stmt (dump_file, stmt, 0);
6336 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6338 static void
6339 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6340 tree rhs1, tree rhs2)
6342 if (dump_file && (dump_flags & TDF_DETAILS))
6344 fprintf (dump_file, "Transforming ");
6345 print_gimple_stmt (dump_file, stmt, 0);
6348 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6349 update_stmt (gsi_stmt (*gsi));
6350 remove_visited_stmt_chain (rhs1);
6352 if (dump_file && (dump_flags & TDF_DETAILS))
6354 fprintf (dump_file, " into ");
6355 print_gimple_stmt (dump_file, stmt, 0);
6359 /* Reassociate expressions in basic block BB and its post-dominator as
6360 children.
6362 Bubble up return status from maybe_optimize_range_tests. */
6364 static bool
6365 reassociate_bb (basic_block bb)
6367 gimple_stmt_iterator gsi;
6368 basic_block son;
6369 gimple *stmt = last_stmt (bb);
6370 bool cfg_cleanup_needed = false;
6372 if (stmt && !gimple_visited_p (stmt))
6373 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6375 bool do_prev = false;
6376 for (gsi = gsi_last_bb (bb);
6377 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6379 do_prev = true;
6380 stmt = gsi_stmt (gsi);
6382 if (is_gimple_assign (stmt)
6383 && !stmt_could_throw_p (cfun, stmt))
6385 tree lhs, rhs1, rhs2;
6386 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6388 /* If this was part of an already processed statement,
6389 we don't need to touch it again. */
6390 if (gimple_visited_p (stmt))
6392 /* This statement might have become dead because of previous
6393 reassociations. */
6394 if (has_zero_uses (gimple_get_lhs (stmt)))
6396 reassoc_remove_stmt (&gsi);
6397 release_defs (stmt);
6398 /* We might end up removing the last stmt above which
6399 places the iterator to the end of the sequence.
6400 Reset it to the last stmt in this case and make sure
6401 we don't do gsi_prev in that case. */
6402 if (gsi_end_p (gsi))
6404 gsi = gsi_last_bb (bb);
6405 do_prev = false;
6408 continue;
6411 /* If this is not a gimple binary expression, there is
6412 nothing for us to do with it. */
6413 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6414 continue;
6416 lhs = gimple_assign_lhs (stmt);
6417 rhs1 = gimple_assign_rhs1 (stmt);
6418 rhs2 = gimple_assign_rhs2 (stmt);
6420 /* For non-bit or min/max operations we can't associate
6421 all types. Verify that here. */
6422 if (rhs_code != BIT_IOR_EXPR
6423 && rhs_code != BIT_AND_EXPR
6424 && rhs_code != BIT_XOR_EXPR
6425 && rhs_code != MIN_EXPR
6426 && rhs_code != MAX_EXPR
6427 && (!can_reassociate_p (lhs)
6428 || !can_reassociate_p (rhs1)
6429 || !can_reassociate_p (rhs2)))
6430 continue;
6432 if (associative_tree_code (rhs_code))
6434 auto_vec<operand_entry *> ops;
6435 tree powi_result = NULL_TREE;
6436 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6438 /* There may be no immediate uses left by the time we
6439 get here because we may have eliminated them all. */
6440 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6441 continue;
6443 gimple_set_visited (stmt, true);
6444 linearize_expr_tree (&ops, stmt, true, true);
6445 ops.qsort (sort_by_operand_rank);
6446 int orig_len = ops.length ();
6447 optimize_ops_list (rhs_code, &ops);
6448 if (undistribute_ops_list (rhs_code, &ops,
6449 loop_containing_stmt (stmt)))
6451 ops.qsort (sort_by_operand_rank);
6452 optimize_ops_list (rhs_code, &ops);
6454 if (undistribute_bitref_for_vector (rhs_code, &ops,
6455 loop_containing_stmt (stmt)))
6457 ops.qsort (sort_by_operand_rank);
6458 optimize_ops_list (rhs_code, &ops);
6460 if (rhs_code == PLUS_EXPR
6461 && transform_add_to_multiply (&ops))
6462 ops.qsort (sort_by_operand_rank);
6464 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6466 if (is_vector)
6467 optimize_vec_cond_expr (rhs_code, &ops);
6468 else
6469 optimize_range_tests (rhs_code, &ops, NULL);
6472 if (rhs_code == MULT_EXPR && !is_vector)
6474 attempt_builtin_copysign (&ops);
6476 if (reassoc_insert_powi_p
6477 && flag_unsafe_math_optimizations)
6478 powi_result = attempt_builtin_powi (stmt, &ops);
6481 operand_entry *last;
6482 bool negate_result = false;
6483 if (ops.length () > 1
6484 && rhs_code == MULT_EXPR)
6486 last = ops.last ();
6487 if ((integer_minus_onep (last->op)
6488 || real_minus_onep (last->op))
6489 && !HONOR_SNANS (TREE_TYPE (lhs))
6490 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6491 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6493 ops.pop ();
6494 negate_result = true;
6498 tree new_lhs = lhs;
6499 /* If the operand vector is now empty, all operands were
6500 consumed by the __builtin_powi optimization. */
6501 if (ops.length () == 0)
6502 transform_stmt_to_copy (&gsi, stmt, powi_result);
6503 else if (ops.length () == 1)
6505 tree last_op = ops.last ()->op;
6507 /* If the stmt that defines operand has to be inserted, insert it
6508 before the use. */
6509 if (ops.last ()->stmt_to_insert)
6510 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6511 if (powi_result)
6512 transform_stmt_to_multiply (&gsi, stmt, last_op,
6513 powi_result);
6514 else
6515 transform_stmt_to_copy (&gsi, stmt, last_op);
6517 else
6519 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6520 int ops_num = ops.length ();
6521 int width;
6523 /* For binary bit operations, if there are at least 3
6524 operands and the last operand in OPS is a constant,
6525 move it to the front. This helps ensure that we generate
6526 (X & Y) & C rather than (X & C) & Y. The former will
6527 often match a canonical bit test when we get to RTL. */
6528 if (ops.length () > 2
6529 && (rhs_code == BIT_AND_EXPR
6530 || rhs_code == BIT_IOR_EXPR
6531 || rhs_code == BIT_XOR_EXPR)
6532 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6533 std::swap (*ops[0], *ops[ops_num - 1]);
6535 /* Only rewrite the expression tree to parallel in the
6536 last reassoc pass to avoid useless work back-and-forth
6537 with initial linearization. */
6538 if (!reassoc_insert_powi_p
6539 && ops.length () > 3
6540 && (width = get_reassociation_width (ops_num, rhs_code,
6541 mode)) > 1)
6543 if (dump_file && (dump_flags & TDF_DETAILS))
6544 fprintf (dump_file,
6545 "Width = %d was chosen for reassociation\n",
6546 width);
6547 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6548 width, ops);
6550 else
6552 /* When there are three operands left, we want
6553 to make sure the ones that get the double
6554 binary op are chosen wisely. */
6555 int len = ops.length ();
6556 if (len >= 3)
6557 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6559 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6560 powi_result != NULL
6561 || negate_result,
6562 len != orig_len);
6565 /* If we combined some repeated factors into a
6566 __builtin_powi call, multiply that result by the
6567 reassociated operands. */
6568 if (powi_result)
6570 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6571 tree type = TREE_TYPE (lhs);
6572 tree target_ssa = make_temp_ssa_name (type, NULL,
6573 "reassocpow");
6574 gimple_set_lhs (lhs_stmt, target_ssa);
6575 update_stmt (lhs_stmt);
6576 if (lhs != new_lhs)
6578 target_ssa = new_lhs;
6579 new_lhs = lhs;
6581 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6582 powi_result, target_ssa);
6583 gimple_set_location (mul_stmt, gimple_location (stmt));
6584 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6585 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6589 if (negate_result)
6591 stmt = SSA_NAME_DEF_STMT (lhs);
6592 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6593 gimple_set_lhs (stmt, tmp);
6594 if (lhs != new_lhs)
6595 tmp = new_lhs;
6596 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6597 tmp);
6598 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6599 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6600 update_stmt (stmt);
6605 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6606 son;
6607 son = next_dom_son (CDI_POST_DOMINATORS, son))
6608 cfg_cleanup_needed |= reassociate_bb (son);
6610 return cfg_cleanup_needed;
6613 /* Add jumps around shifts for range tests turned into bit tests.
6614 For each SSA_NAME VAR we have code like:
6615 VAR = ...; // final stmt of range comparison
6616 // bit test here...;
6617 OTHERVAR = ...; // final stmt of the bit test sequence
6618 RES = VAR | OTHERVAR;
6619 Turn the above into:
6620 VAR = ...;
6621 if (VAR != 0)
6622 goto <l3>;
6623 else
6624 goto <l2>;
6625 <l2>:
6626 // bit test here...;
6627 OTHERVAR = ...;
6628 <l3>:
6629 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6631 static void
6632 branch_fixup (void)
6634 tree var;
6635 unsigned int i;
6637 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6639 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6640 gimple *use_stmt;
6641 use_operand_p use;
6642 bool ok = single_imm_use (var, &use, &use_stmt);
6643 gcc_assert (ok
6644 && is_gimple_assign (use_stmt)
6645 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6646 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6648 basic_block cond_bb = gimple_bb (def_stmt);
6649 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6650 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6652 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6653 gimple *g = gimple_build_cond (NE_EXPR, var,
6654 build_zero_cst (TREE_TYPE (var)),
6655 NULL_TREE, NULL_TREE);
6656 location_t loc = gimple_location (use_stmt);
6657 gimple_set_location (g, loc);
6658 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6660 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6661 etrue->probability = profile_probability::even ();
6662 edge efalse = find_edge (cond_bb, then_bb);
6663 efalse->flags = EDGE_FALSE_VALUE;
6664 efalse->probability -= etrue->probability;
6665 then_bb->count -= etrue->count ();
6667 tree othervar = NULL_TREE;
6668 if (gimple_assign_rhs1 (use_stmt) == var)
6669 othervar = gimple_assign_rhs2 (use_stmt);
6670 else if (gimple_assign_rhs2 (use_stmt) == var)
6671 othervar = gimple_assign_rhs1 (use_stmt);
6672 else
6673 gcc_unreachable ();
6674 tree lhs = gimple_assign_lhs (use_stmt);
6675 gphi *phi = create_phi_node (lhs, merge_bb);
6676 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6677 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6678 gsi = gsi_for_stmt (use_stmt);
6679 gsi_remove (&gsi, true);
6681 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6682 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6684 reassoc_branch_fixups.release ();
6687 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6688 void debug_ops_vector (vec<operand_entry *> ops);
6690 /* Dump the operand entry vector OPS to FILE. */
6692 void
6693 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6695 operand_entry *oe;
6696 unsigned int i;
6698 FOR_EACH_VEC_ELT (ops, i, oe)
6700 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6701 print_generic_expr (file, oe->op);
6702 fprintf (file, "\n");
6706 /* Dump the operand entry vector OPS to STDERR. */
6708 DEBUG_FUNCTION void
6709 debug_ops_vector (vec<operand_entry *> ops)
6711 dump_ops_vector (stderr, ops);
6714 /* Bubble up return status from reassociate_bb. */
6716 static bool
6717 do_reassoc (void)
6719 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6720 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6723 /* Initialize the reassociation pass. */
6725 static void
6726 init_reassoc (void)
6728 int i;
6729 long rank = 2;
6730 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6732 /* Find the loops, so that we can prevent moving calculations in
6733 them. */
6734 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6736 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6738 next_operand_entry_id = 0;
6740 /* Reverse RPO (Reverse Post Order) will give us something where
6741 deeper loops come later. */
6742 pre_and_rev_post_order_compute (NULL, bbs, false);
6743 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
6744 operand_rank = new hash_map<tree, long>;
6746 /* Give each default definition a distinct rank. This includes
6747 parameters and the static chain. Walk backwards over all
6748 SSA names so that we get proper rank ordering according
6749 to tree_swap_operands_p. */
6750 for (i = num_ssa_names - 1; i > 0; --i)
6752 tree name = ssa_name (i);
6753 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6754 insert_operand_rank (name, ++rank);
6757 /* Set up rank for each BB */
6758 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6759 bb_rank[bbs[i]] = ++rank << 16;
6761 free (bbs);
6762 calculate_dominance_info (CDI_POST_DOMINATORS);
6763 plus_negates = vNULL;
6766 /* Cleanup after the reassociation pass, and print stats if
6767 requested. */
6769 static void
6770 fini_reassoc (void)
6772 statistics_counter_event (cfun, "Linearized",
6773 reassociate_stats.linearized);
6774 statistics_counter_event (cfun, "Constants eliminated",
6775 reassociate_stats.constants_eliminated);
6776 statistics_counter_event (cfun, "Ops eliminated",
6777 reassociate_stats.ops_eliminated);
6778 statistics_counter_event (cfun, "Statements rewritten",
6779 reassociate_stats.rewritten);
6780 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6781 reassociate_stats.pows_encountered);
6782 statistics_counter_event (cfun, "Built-in powi calls created",
6783 reassociate_stats.pows_created);
6785 delete operand_rank;
6786 operand_entry_pool.release ();
6787 free (bb_rank);
6788 plus_negates.release ();
6789 free_dominance_info (CDI_POST_DOMINATORS);
6790 loop_optimizer_finalize ();
6793 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6794 insertion of __builtin_powi calls.
6796 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6797 optimization of a gimple conditional. Otherwise returns zero. */
6799 static unsigned int
6800 execute_reassoc (bool insert_powi_p)
6802 reassoc_insert_powi_p = insert_powi_p;
6804 init_reassoc ();
6806 bool cfg_cleanup_needed;
6807 cfg_cleanup_needed = do_reassoc ();
6808 repropagate_negates ();
6809 branch_fixup ();
6811 fini_reassoc ();
6812 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6815 namespace {
6817 const pass_data pass_data_reassoc =
6819 GIMPLE_PASS, /* type */
6820 "reassoc", /* name */
6821 OPTGROUP_NONE, /* optinfo_flags */
6822 TV_TREE_REASSOC, /* tv_id */
6823 ( PROP_cfg | PROP_ssa ), /* properties_required */
6824 0, /* properties_provided */
6825 0, /* properties_destroyed */
6826 0, /* todo_flags_start */
6827 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6830 class pass_reassoc : public gimple_opt_pass
6832 public:
6833 pass_reassoc (gcc::context *ctxt)
6834 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6837 /* opt_pass methods: */
6838 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
6839 void set_pass_param (unsigned int n, bool param)
6841 gcc_assert (n == 0);
6842 insert_powi_p = param;
6844 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
6845 virtual unsigned int execute (function *)
6846 { return execute_reassoc (insert_powi_p); }
6848 private:
6849 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6850 point 3a in the pass header comment. */
6851 bool insert_powi_p;
6852 }; // class pass_reassoc
6854 } // anon namespace
6856 gimple_opt_pass *
6857 make_pass_reassoc (gcc::context *ctxt)
6859 return new pass_reassoc (ctxt);