[doc] Correct optimisation levels documentation for -fstore-merging
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob4a796f48864075c7d8ca2a7bc0822780be1bfe4c
1 /* Reassociation for trees.
2 Copyright (C) 2005-2017 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "params.h"
52 #include "builtins.h"
53 #include "gimplify.h"
54 #include "case-cfn-macros.h"
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
60 It consists of five steps:
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_*
70 3. Optimization of the operand lists, eliminating things like a +
71 -a, a & a, etc.
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
80 5. Repropagate negates, as nothing else will clean it up ATM.
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
101 a (1), b (1), c (1), d (2), e (2)
104 We start with our merge worklist empty, and the ops list with all of
105 those on it.
107 You want to first merge all leaves of the same rank, as much as
108 possible.
110 So first build a binary op of
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
122 Then build a binary op of d + e
123 mergetmp2 = d + e
125 and put mergetmp2 on the merge worklist.
127 so merge worklist = {mergetmp, c, mergetmp2}
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
132 So we have
134 build binary op
135 mergetmp3 = mergetmp + c
137 worklist = {mergetmp2, mergetmp3}
139 mergetmp4 = mergetmp2 + mergetmp3
141 worklist = {mergetmp4}
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
152 So why don't we do this?
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
160 mergetmp = op1 + op2
161 newstmt = mergetmp + op3
163 instead of
164 mergetmp = op2 + op3
165 newstmt = mergetmp + op1
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
169 can do.
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179 static bool reassoc_insert_powi_p;
181 /* Statistics */
182 static struct
184 int linearized;
185 int constants_eliminated;
186 int ops_eliminated;
187 int rewritten;
188 int pows_encountered;
189 int pows_created;
190 } reassociate_stats;
192 /* Operator, rank pair. */
193 struct operand_entry
195 unsigned int rank;
196 int id;
197 tree op;
198 unsigned int count;
199 gimple *stmt_to_insert;
202 static object_allocator<operand_entry> operand_entry_pool
203 ("operand entry pool");
205 /* This is used to assign a unique ID to each struct operand_entry
206 so that qsort results are identical on different hosts. */
207 static int next_operand_entry_id;
209 /* Starting rank number for a given basic block, so that we can rank
210 operations using unmovable instructions in that BB based on the bb
211 depth. */
212 static long *bb_rank;
214 /* Operand->rank hashtable. */
215 static hash_map<tree, long> *operand_rank;
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218 all basic blocks the CFG should be adjusted - basic blocks
219 split right after that SSA_NAME's definition statement and before
220 the only use, which must be a bit ior. */
221 static vec<tree> reassoc_branch_fixups;
223 /* Forward decls. */
224 static long get_rank (tree);
225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228 possibly added by gsi_remove. */
230 bool
231 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
233 gimple *stmt = gsi_stmt (*gsi);
235 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
236 return gsi_remove (gsi, true);
238 gimple_stmt_iterator prev = *gsi;
239 gsi_prev (&prev);
240 unsigned uid = gimple_uid (stmt);
241 basic_block bb = gimple_bb (stmt);
242 bool ret = gsi_remove (gsi, true);
243 if (!gsi_end_p (prev))
244 gsi_next (&prev);
245 else
246 prev = gsi_start_bb (bb);
247 gimple *end_stmt = gsi_stmt (*gsi);
248 while ((stmt = gsi_stmt (prev)) != end_stmt)
250 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
251 gimple_set_uid (stmt, uid);
252 gsi_next (&prev);
254 return ret;
257 /* Bias amount for loop-carried phis. We want this to be larger than
258 the depth of any reassociation tree we can see, but not larger than
259 the rank difference between two blocks. */
260 #define PHI_LOOP_BIAS (1 << 15)
262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
263 an innermost loop, and the phi has only a single use which is inside
264 the loop, then the rank is the block rank of the loop latch plus an
265 extra bias for the loop-carried dependence. This causes expressions
266 calculated into an accumulator variable to be independent for each
267 iteration of the loop. If STMT is some other phi, the rank is the
268 block rank of its containing block. */
269 static long
270 phi_rank (gimple *stmt)
272 basic_block bb = gimple_bb (stmt);
273 struct loop *father = bb->loop_father;
274 tree res;
275 unsigned i;
276 use_operand_p use;
277 gimple *use_stmt;
279 /* We only care about real loops (those with a latch). */
280 if (!father->latch)
281 return bb_rank[bb->index];
283 /* Interesting phis must be in headers of innermost loops. */
284 if (bb != father->header
285 || father->inner)
286 return bb_rank[bb->index];
288 /* Ignore virtual SSA_NAMEs. */
289 res = gimple_phi_result (stmt);
290 if (virtual_operand_p (res))
291 return bb_rank[bb->index];
293 /* The phi definition must have a single use, and that use must be
294 within the loop. Otherwise this isn't an accumulator pattern. */
295 if (!single_imm_use (res, &use, &use_stmt)
296 || gimple_bb (use_stmt)->loop_father != father)
297 return bb_rank[bb->index];
299 /* Look for phi arguments from within the loop. If found, bias this phi. */
300 for (i = 0; i < gimple_phi_num_args (stmt); i++)
302 tree arg = gimple_phi_arg_def (stmt, i);
303 if (TREE_CODE (arg) == SSA_NAME
304 && !SSA_NAME_IS_DEFAULT_DEF (arg))
306 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
307 if (gimple_bb (def_stmt)->loop_father == father)
308 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
312 /* Must be an uninteresting phi. */
313 return bb_rank[bb->index];
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317 loop-carried dependence of an innermost loop, return TRUE; else
318 return FALSE. */
319 static bool
320 loop_carried_phi (tree exp)
322 gimple *phi_stmt;
323 long block_rank;
325 if (TREE_CODE (exp) != SSA_NAME
326 || SSA_NAME_IS_DEFAULT_DEF (exp))
327 return false;
329 phi_stmt = SSA_NAME_DEF_STMT (exp);
331 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
332 return false;
334 /* Non-loop-carried phis have block rank. Loop-carried phis have
335 an additional bias added in. If this phi doesn't have block rank,
336 it's biased and should not be propagated. */
337 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
339 if (phi_rank (phi_stmt) != block_rank)
340 return true;
342 return false;
345 /* Return the maximum of RANK and the rank that should be propagated
346 from expression OP. For most operands, this is just the rank of OP.
347 For loop-carried phis, the value is zero to avoid undoing the bias
348 in favor of the phi. */
349 static long
350 propagate_rank (long rank, tree op)
352 long op_rank;
354 if (loop_carried_phi (op))
355 return rank;
357 op_rank = get_rank (op);
359 return MAX (rank, op_rank);
362 /* Look up the operand rank structure for expression E. */
364 static inline long
365 find_operand_rank (tree e)
367 long *slot = operand_rank->get (e);
368 return slot ? *slot : -1;
371 /* Insert {E,RANK} into the operand rank hashtable. */
373 static inline void
374 insert_operand_rank (tree e, long rank)
376 gcc_assert (rank > 0);
377 gcc_assert (!operand_rank->put (e, rank));
380 /* Given an expression E, return the rank of the expression. */
382 static long
383 get_rank (tree e)
385 /* SSA_NAME's have the rank of the expression they are the result
387 For globals and uninitialized values, the rank is 0.
388 For function arguments, use the pre-setup rank.
389 For PHI nodes, stores, asm statements, etc, we use the rank of
390 the BB.
391 For simple operations, the rank is the maximum rank of any of
392 its operands, or the bb_rank, whichever is less.
393 I make no claims that this is optimal, however, it gives good
394 results. */
396 /* We make an exception to the normal ranking system to break
397 dependences of accumulator variables in loops. Suppose we
398 have a simple one-block loop containing:
400 x_1 = phi(x_0, x_2)
401 b = a + x_1
402 c = b + d
403 x_2 = c + e
405 As shown, each iteration of the calculation into x is fully
406 dependent upon the iteration before it. We would prefer to
407 see this in the form:
409 x_1 = phi(x_0, x_2)
410 b = a + d
411 c = b + e
412 x_2 = c + x_1
414 If the loop is unrolled, the calculations of b and c from
415 different iterations can be interleaved.
417 To obtain this result during reassociation, we bias the rank
418 of the phi definition x_1 upward, when it is recognized as an
419 accumulator pattern. The artificial rank causes it to be
420 added last, providing the desired independence. */
422 if (TREE_CODE (e) == SSA_NAME)
424 ssa_op_iter iter;
425 gimple *stmt;
426 long rank;
427 tree op;
429 if (SSA_NAME_IS_DEFAULT_DEF (e))
430 return find_operand_rank (e);
432 stmt = SSA_NAME_DEF_STMT (e);
433 if (gimple_code (stmt) == GIMPLE_PHI)
434 return phi_rank (stmt);
436 if (!is_gimple_assign (stmt))
437 return bb_rank[gimple_bb (stmt)->index];
439 /* If we already have a rank for this expression, use that. */
440 rank = find_operand_rank (e);
441 if (rank != -1)
442 return rank;
444 /* Otherwise, find the maximum rank for the operands. As an
445 exception, remove the bias from loop-carried phis when propagating
446 the rank so that dependent operations are not also biased. */
447 /* Simply walk over all SSA uses - this takes advatage of the
448 fact that non-SSA operands are is_gimple_min_invariant and
449 thus have rank 0. */
450 rank = 0;
451 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
452 rank = propagate_rank (rank, op);
454 if (dump_file && (dump_flags & TDF_DETAILS))
456 fprintf (dump_file, "Rank for ");
457 print_generic_expr (dump_file, e, 0);
458 fprintf (dump_file, " is %ld\n", (rank + 1));
461 /* Note the rank in the hashtable so we don't recompute it. */
462 insert_operand_rank (e, (rank + 1));
463 return (rank + 1);
466 /* Constants, globals, etc., are rank 0 */
467 return 0;
471 /* We want integer ones to end up last no matter what, since they are
472 the ones we can do the most with. */
473 #define INTEGER_CONST_TYPE 1 << 3
474 #define FLOAT_CONST_TYPE 1 << 2
475 #define OTHER_CONST_TYPE 1 << 1
477 /* Classify an invariant tree into integer, float, or other, so that
478 we can sort them to be near other constants of the same type. */
479 static inline int
480 constant_type (tree t)
482 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
483 return INTEGER_CONST_TYPE;
484 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
485 return FLOAT_CONST_TYPE;
486 else
487 return OTHER_CONST_TYPE;
490 /* qsort comparison function to sort operand entries PA and PB by rank
491 so that the sorted array is ordered by rank in decreasing order. */
492 static int
493 sort_by_operand_rank (const void *pa, const void *pb)
495 const operand_entry *oea = *(const operand_entry *const *)pa;
496 const operand_entry *oeb = *(const operand_entry *const *)pb;
498 /* It's nicer for optimize_expression if constants that are likely
499 to fold when added/multiplied//whatever are put next to each
500 other. Since all constants have rank 0, order them by type. */
501 if (oeb->rank == 0 && oea->rank == 0)
503 if (constant_type (oeb->op) != constant_type (oea->op))
504 return constant_type (oeb->op) - constant_type (oea->op);
505 else
506 /* To make sorting result stable, we use unique IDs to determine
507 order. */
508 return oeb->id - oea->id;
511 /* Lastly, make sure the versions that are the same go next to each
512 other. */
513 if ((oeb->rank - oea->rank == 0)
514 && TREE_CODE (oea->op) == SSA_NAME
515 && TREE_CODE (oeb->op) == SSA_NAME)
517 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
518 versions of removed SSA_NAMEs, so if possible, prefer to sort
519 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
520 See PR60418. */
521 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
522 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
523 && !oea->stmt_to_insert
524 && !oeb->stmt_to_insert
525 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
527 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
528 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
529 basic_block bba = gimple_bb (stmta);
530 basic_block bbb = gimple_bb (stmtb);
531 if (bbb != bba)
533 if (bb_rank[bbb->index] != bb_rank[bba->index])
534 return bb_rank[bbb->index] - bb_rank[bba->index];
536 else
538 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
539 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
540 if (da != db)
541 return da ? 1 : -1;
545 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
546 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
547 else
548 return oeb->id - oea->id;
551 if (oeb->rank != oea->rank)
552 return oeb->rank - oea->rank;
553 else
554 return oeb->id - oea->id;
557 /* Add an operand entry to *OPS for the tree operand OP. */
559 static void
560 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
562 operand_entry *oe = operand_entry_pool.allocate ();
564 oe->op = op;
565 oe->rank = get_rank (op);
566 oe->id = next_operand_entry_id++;
567 oe->count = 1;
568 oe->stmt_to_insert = stmt_to_insert;
569 ops->safe_push (oe);
572 /* Add an operand entry to *OPS for the tree operand OP with repeat
573 count REPEAT. */
575 static void
576 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
577 HOST_WIDE_INT repeat)
579 operand_entry *oe = operand_entry_pool.allocate ();
581 oe->op = op;
582 oe->rank = get_rank (op);
583 oe->id = next_operand_entry_id++;
584 oe->count = repeat;
585 oe->stmt_to_insert = NULL;
586 ops->safe_push (oe);
588 reassociate_stats.pows_encountered++;
591 /* Return true if STMT is reassociable operation containing a binary
592 operation with tree code CODE, and is inside LOOP. */
594 static bool
595 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
597 basic_block bb = gimple_bb (stmt);
599 if (gimple_bb (stmt) == NULL)
600 return false;
602 if (!flow_bb_inside_loop_p (loop, bb))
603 return false;
605 if (is_gimple_assign (stmt)
606 && gimple_assign_rhs_code (stmt) == code
607 && has_single_use (gimple_assign_lhs (stmt)))
608 return true;
610 return false;
614 /* Return true if STMT is a nop-conversion. */
616 static bool
617 gimple_nop_conversion_p (gimple *stmt)
619 if (gassign *ass = dyn_cast <gassign *> (stmt))
621 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
622 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
623 TREE_TYPE (gimple_assign_rhs1 (ass))))
624 return true;
626 return false;
629 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
630 operand of the negate operation. Otherwise, return NULL. */
632 static tree
633 get_unary_op (tree name, enum tree_code opcode)
635 gimple *stmt = SSA_NAME_DEF_STMT (name);
637 /* Look through nop conversions (sign changes). */
638 if (gimple_nop_conversion_p (stmt)
639 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
640 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
642 if (!is_gimple_assign (stmt))
643 return NULL_TREE;
645 if (gimple_assign_rhs_code (stmt) == opcode)
646 return gimple_assign_rhs1 (stmt);
647 return NULL_TREE;
650 /* Return true if OP1 and OP2 have the same value if casted to either type. */
652 static bool
653 ops_equal_values_p (tree op1, tree op2)
655 if (op1 == op2)
656 return true;
658 tree orig_op1 = op1;
659 if (TREE_CODE (op1) == SSA_NAME)
661 gimple *stmt = SSA_NAME_DEF_STMT (op1);
662 if (gimple_nop_conversion_p (stmt))
664 op1 = gimple_assign_rhs1 (stmt);
665 if (op1 == op2)
666 return true;
670 if (TREE_CODE (op2) == SSA_NAME)
672 gimple *stmt = SSA_NAME_DEF_STMT (op2);
673 if (gimple_nop_conversion_p (stmt))
675 op2 = gimple_assign_rhs1 (stmt);
676 if (op1 == op2
677 || orig_op1 == op2)
678 return true;
682 return false;
686 /* If CURR and LAST are a pair of ops that OPCODE allows us to
687 eliminate through equivalences, do so, remove them from OPS, and
688 return true. Otherwise, return false. */
690 static bool
691 eliminate_duplicate_pair (enum tree_code opcode,
692 vec<operand_entry *> *ops,
693 bool *all_done,
694 unsigned int i,
695 operand_entry *curr,
696 operand_entry *last)
699 /* If we have two of the same op, and the opcode is & |, min, or max,
700 we can eliminate one of them.
701 If we have two of the same op, and the opcode is ^, we can
702 eliminate both of them. */
704 if (last && last->op == curr->op)
706 switch (opcode)
708 case MAX_EXPR:
709 case MIN_EXPR:
710 case BIT_IOR_EXPR:
711 case BIT_AND_EXPR:
712 if (dump_file && (dump_flags & TDF_DETAILS))
714 fprintf (dump_file, "Equivalence: ");
715 print_generic_expr (dump_file, curr->op, 0);
716 fprintf (dump_file, " [&|minmax] ");
717 print_generic_expr (dump_file, last->op, 0);
718 fprintf (dump_file, " -> ");
719 print_generic_stmt (dump_file, last->op, 0);
722 ops->ordered_remove (i);
723 reassociate_stats.ops_eliminated ++;
725 return true;
727 case BIT_XOR_EXPR:
728 if (dump_file && (dump_flags & TDF_DETAILS))
730 fprintf (dump_file, "Equivalence: ");
731 print_generic_expr (dump_file, curr->op, 0);
732 fprintf (dump_file, " ^ ");
733 print_generic_expr (dump_file, last->op, 0);
734 fprintf (dump_file, " -> nothing\n");
737 reassociate_stats.ops_eliminated += 2;
739 if (ops->length () == 2)
741 ops->truncate (0);
742 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
743 *all_done = true;
745 else
747 ops->ordered_remove (i-1);
748 ops->ordered_remove (i-1);
751 return true;
753 default:
754 break;
757 return false;
760 static vec<tree> plus_negates;
762 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
763 expression, look in OPS for a corresponding positive operation to cancel
764 it out. If we find one, remove the other from OPS, replace
765 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
766 return false. */
768 static bool
769 eliminate_plus_minus_pair (enum tree_code opcode,
770 vec<operand_entry *> *ops,
771 unsigned int currindex,
772 operand_entry *curr)
774 tree negateop;
775 tree notop;
776 unsigned int i;
777 operand_entry *oe;
779 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
780 return false;
782 negateop = get_unary_op (curr->op, NEGATE_EXPR);
783 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
784 if (negateop == NULL_TREE && notop == NULL_TREE)
785 return false;
787 /* Any non-negated version will have a rank that is one less than
788 the current rank. So once we hit those ranks, if we don't find
789 one, we can stop. */
791 for (i = currindex + 1;
792 ops->iterate (i, &oe)
793 && oe->rank >= curr->rank - 1 ;
794 i++)
796 if (negateop
797 && ops_equal_values_p (oe->op, negateop))
799 if (dump_file && (dump_flags & TDF_DETAILS))
801 fprintf (dump_file, "Equivalence: ");
802 print_generic_expr (dump_file, negateop, 0);
803 fprintf (dump_file, " + -");
804 print_generic_expr (dump_file, oe->op, 0);
805 fprintf (dump_file, " -> 0\n");
808 ops->ordered_remove (i);
809 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
810 ops->ordered_remove (currindex);
811 reassociate_stats.ops_eliminated ++;
813 return true;
815 else if (notop
816 && ops_equal_values_p (oe->op, notop))
818 tree op_type = TREE_TYPE (oe->op);
820 if (dump_file && (dump_flags & TDF_DETAILS))
822 fprintf (dump_file, "Equivalence: ");
823 print_generic_expr (dump_file, notop, 0);
824 fprintf (dump_file, " + ~");
825 print_generic_expr (dump_file, oe->op, 0);
826 fprintf (dump_file, " -> -1\n");
829 ops->ordered_remove (i);
830 add_to_ops_vec (ops, build_all_ones_cst (op_type));
831 ops->ordered_remove (currindex);
832 reassociate_stats.ops_eliminated ++;
834 return true;
838 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
839 save it for later inspection in repropagate_negates(). */
840 if (negateop != NULL_TREE
841 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
842 plus_negates.safe_push (curr->op);
844 return false;
847 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
848 bitwise not expression, look in OPS for a corresponding operand to
849 cancel it out. If we find one, remove the other from OPS, replace
850 OPS[CURRINDEX] with 0, and return true. Otherwise, return
851 false. */
853 static bool
854 eliminate_not_pairs (enum tree_code opcode,
855 vec<operand_entry *> *ops,
856 unsigned int currindex,
857 operand_entry *curr)
859 tree notop;
860 unsigned int i;
861 operand_entry *oe;
863 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
864 || TREE_CODE (curr->op) != SSA_NAME)
865 return false;
867 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
868 if (notop == NULL_TREE)
869 return false;
871 /* Any non-not version will have a rank that is one less than
872 the current rank. So once we hit those ranks, if we don't find
873 one, we can stop. */
875 for (i = currindex + 1;
876 ops->iterate (i, &oe)
877 && oe->rank >= curr->rank - 1;
878 i++)
880 if (oe->op == notop)
882 if (dump_file && (dump_flags & TDF_DETAILS))
884 fprintf (dump_file, "Equivalence: ");
885 print_generic_expr (dump_file, notop, 0);
886 if (opcode == BIT_AND_EXPR)
887 fprintf (dump_file, " & ~");
888 else if (opcode == BIT_IOR_EXPR)
889 fprintf (dump_file, " | ~");
890 print_generic_expr (dump_file, oe->op, 0);
891 if (opcode == BIT_AND_EXPR)
892 fprintf (dump_file, " -> 0\n");
893 else if (opcode == BIT_IOR_EXPR)
894 fprintf (dump_file, " -> -1\n");
897 if (opcode == BIT_AND_EXPR)
898 oe->op = build_zero_cst (TREE_TYPE (oe->op));
899 else if (opcode == BIT_IOR_EXPR)
900 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
902 reassociate_stats.ops_eliminated += ops->length () - 1;
903 ops->truncate (0);
904 ops->quick_push (oe);
905 return true;
909 return false;
912 /* Use constant value that may be present in OPS to try to eliminate
913 operands. Note that this function is only really used when we've
914 eliminated ops for other reasons, or merged constants. Across
915 single statements, fold already does all of this, plus more. There
916 is little point in duplicating logic, so I've only included the
917 identities that I could ever construct testcases to trigger. */
919 static void
920 eliminate_using_constants (enum tree_code opcode,
921 vec<operand_entry *> *ops)
923 operand_entry *oelast = ops->last ();
924 tree type = TREE_TYPE (oelast->op);
926 if (oelast->rank == 0
927 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
929 switch (opcode)
931 case BIT_AND_EXPR:
932 if (integer_zerop (oelast->op))
934 if (ops->length () != 1)
936 if (dump_file && (dump_flags & TDF_DETAILS))
937 fprintf (dump_file, "Found & 0, removing all other ops\n");
939 reassociate_stats.ops_eliminated += ops->length () - 1;
941 ops->truncate (0);
942 ops->quick_push (oelast);
943 return;
946 else if (integer_all_onesp (oelast->op))
948 if (ops->length () != 1)
950 if (dump_file && (dump_flags & TDF_DETAILS))
951 fprintf (dump_file, "Found & -1, removing\n");
952 ops->pop ();
953 reassociate_stats.ops_eliminated++;
956 break;
957 case BIT_IOR_EXPR:
958 if (integer_all_onesp (oelast->op))
960 if (ops->length () != 1)
962 if (dump_file && (dump_flags & TDF_DETAILS))
963 fprintf (dump_file, "Found | -1, removing all other ops\n");
965 reassociate_stats.ops_eliminated += ops->length () - 1;
967 ops->truncate (0);
968 ops->quick_push (oelast);
969 return;
972 else if (integer_zerop (oelast->op))
974 if (ops->length () != 1)
976 if (dump_file && (dump_flags & TDF_DETAILS))
977 fprintf (dump_file, "Found | 0, removing\n");
978 ops->pop ();
979 reassociate_stats.ops_eliminated++;
982 break;
983 case MULT_EXPR:
984 if (integer_zerop (oelast->op)
985 || (FLOAT_TYPE_P (type)
986 && !HONOR_NANS (type)
987 && !HONOR_SIGNED_ZEROS (type)
988 && real_zerop (oelast->op)))
990 if (ops->length () != 1)
992 if (dump_file && (dump_flags & TDF_DETAILS))
993 fprintf (dump_file, "Found * 0, removing all other ops\n");
995 reassociate_stats.ops_eliminated += ops->length () - 1;
996 ops->truncate (1);
997 ops->quick_push (oelast);
998 return;
1001 else if (integer_onep (oelast->op)
1002 || (FLOAT_TYPE_P (type)
1003 && !HONOR_SNANS (type)
1004 && real_onep (oelast->op)))
1006 if (ops->length () != 1)
1008 if (dump_file && (dump_flags & TDF_DETAILS))
1009 fprintf (dump_file, "Found * 1, removing\n");
1010 ops->pop ();
1011 reassociate_stats.ops_eliminated++;
1012 return;
1015 break;
1016 case BIT_XOR_EXPR:
1017 case PLUS_EXPR:
1018 case MINUS_EXPR:
1019 if (integer_zerop (oelast->op)
1020 || (FLOAT_TYPE_P (type)
1021 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1022 && fold_real_zero_addition_p (type, oelast->op,
1023 opcode == MINUS_EXPR)))
1025 if (ops->length () != 1)
1027 if (dump_file && (dump_flags & TDF_DETAILS))
1028 fprintf (dump_file, "Found [|^+] 0, removing\n");
1029 ops->pop ();
1030 reassociate_stats.ops_eliminated++;
1031 return;
1034 break;
1035 default:
1036 break;
1042 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1043 bool, bool);
1045 /* Structure for tracking and counting operands. */
1046 struct oecount {
1047 int cnt;
1048 int id;
1049 enum tree_code oecode;
1050 tree op;
1054 /* The heap for the oecount hashtable and the sorted list of operands. */
1055 static vec<oecount> cvec;
1058 /* Oecount hashtable helpers. */
1060 struct oecount_hasher : int_hash <int, 0, 1>
1062 static inline hashval_t hash (int);
1063 static inline bool equal (int, int);
1066 /* Hash function for oecount. */
1068 inline hashval_t
1069 oecount_hasher::hash (int p)
1071 const oecount *c = &cvec[p - 42];
1072 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1075 /* Comparison function for oecount. */
1077 inline bool
1078 oecount_hasher::equal (int p1, int p2)
1080 const oecount *c1 = &cvec[p1 - 42];
1081 const oecount *c2 = &cvec[p2 - 42];
1082 return (c1->oecode == c2->oecode
1083 && c1->op == c2->op);
1086 /* Comparison function for qsort sorting oecount elements by count. */
1088 static int
1089 oecount_cmp (const void *p1, const void *p2)
1091 const oecount *c1 = (const oecount *)p1;
1092 const oecount *c2 = (const oecount *)p2;
1093 if (c1->cnt != c2->cnt)
1094 return c1->cnt - c2->cnt;
1095 else
1096 /* If counts are identical, use unique IDs to stabilize qsort. */
1097 return c1->id - c2->id;
1100 /* Return TRUE iff STMT represents a builtin call that raises OP
1101 to some exponent. */
1103 static bool
1104 stmt_is_power_of_op (gimple *stmt, tree op)
1106 if (!is_gimple_call (stmt))
1107 return false;
1109 switch (gimple_call_combined_fn (stmt))
1111 CASE_CFN_POW:
1112 CASE_CFN_POWI:
1113 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1115 default:
1116 return false;
1120 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1121 in place and return the result. Assumes that stmt_is_power_of_op
1122 was previously called for STMT and returned TRUE. */
1124 static HOST_WIDE_INT
1125 decrement_power (gimple *stmt)
1127 REAL_VALUE_TYPE c, cint;
1128 HOST_WIDE_INT power;
1129 tree arg1;
1131 switch (gimple_call_combined_fn (stmt))
1133 CASE_CFN_POW:
1134 arg1 = gimple_call_arg (stmt, 1);
1135 c = TREE_REAL_CST (arg1);
1136 power = real_to_integer (&c) - 1;
1137 real_from_integer (&cint, VOIDmode, power, SIGNED);
1138 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1139 return power;
1141 CASE_CFN_POWI:
1142 arg1 = gimple_call_arg (stmt, 1);
1143 power = TREE_INT_CST_LOW (arg1) - 1;
1144 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1145 return power;
1147 default:
1148 gcc_unreachable ();
1152 /* Replace SSA defined by STMT and replace all its uses with new
1153 SSA. Also return the new SSA. */
1155 static tree
1156 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1158 gimple *use_stmt;
1159 use_operand_p use;
1160 imm_use_iterator iter;
1161 tree new_lhs, new_debug_lhs = NULL_TREE;
1162 tree lhs = gimple_get_lhs (stmt);
1164 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1165 gimple_set_lhs (stmt, new_lhs);
1167 /* Also need to update GIMPLE_DEBUGs. */
1168 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1170 tree repl = new_lhs;
1171 if (is_gimple_debug (use_stmt))
1173 if (new_debug_lhs == NULL_TREE)
1175 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1176 gdebug *def_temp
1177 = gimple_build_debug_bind (new_debug_lhs,
1178 build2 (opcode, TREE_TYPE (lhs),
1179 new_lhs, op),
1180 stmt);
1181 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1182 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1183 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1184 gimple_set_uid (def_temp, gimple_uid (stmt));
1185 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1186 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1188 repl = new_debug_lhs;
1190 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1191 SET_USE (use, repl);
1192 update_stmt (use_stmt);
1194 return new_lhs;
1197 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1198 uses with new SSAs. Also do this for the stmt that defines DEF
1199 if *DEF is not OP. */
1201 static void
1202 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1203 vec<gimple *> &stmts_to_fix)
1205 unsigned i;
1206 gimple *stmt;
1208 if (*def != op
1209 && TREE_CODE (*def) == SSA_NAME
1210 && (stmt = SSA_NAME_DEF_STMT (*def))
1211 && gimple_code (stmt) != GIMPLE_NOP)
1212 *def = make_new_ssa_for_def (stmt, opcode, op);
1214 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1215 make_new_ssa_for_def (stmt, opcode, op);
1218 /* Find the single immediate use of STMT's LHS, and replace it
1219 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1220 replace *DEF with OP as well. */
1222 static void
1223 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1225 tree lhs;
1226 gimple *use_stmt;
1227 use_operand_p use;
1228 gimple_stmt_iterator gsi;
1230 if (is_gimple_call (stmt))
1231 lhs = gimple_call_lhs (stmt);
1232 else
1233 lhs = gimple_assign_lhs (stmt);
1235 gcc_assert (has_single_use (lhs));
1236 single_imm_use (lhs, &use, &use_stmt);
1237 if (lhs == *def)
1238 *def = op;
1239 SET_USE (use, op);
1240 if (TREE_CODE (op) != SSA_NAME)
1241 update_stmt (use_stmt);
1242 gsi = gsi_for_stmt (stmt);
1243 unlink_stmt_vdef (stmt);
1244 reassoc_remove_stmt (&gsi);
1245 release_defs (stmt);
1248 /* Walks the linear chain with result *DEF searching for an operation
1249 with operand OP and code OPCODE removing that from the chain. *DEF
1250 is updated if there is only one operand but no operation left. */
1252 static void
1253 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1255 tree orig_def = *def;
1256 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1257 /* PR72835 - Record the stmt chain that has to be updated such that
1258 we dont use the same LHS when the values computed are different. */
1259 auto_vec<gimple *, 64> stmts_to_fix;
1263 tree name;
1265 if (opcode == MULT_EXPR)
1267 if (stmt_is_power_of_op (stmt, op))
1269 if (decrement_power (stmt) == 1)
1271 if (stmts_to_fix.length () > 0)
1272 stmts_to_fix.pop ();
1273 propagate_op_to_single_use (op, stmt, def);
1275 break;
1277 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1279 if (gimple_assign_rhs1 (stmt) == op)
1281 tree cst = build_minus_one_cst (TREE_TYPE (op));
1282 if (stmts_to_fix.length () > 0)
1283 stmts_to_fix.pop ();
1284 propagate_op_to_single_use (cst, stmt, def);
1285 break;
1287 else if (integer_minus_onep (op)
1288 || real_minus_onep (op))
1290 gimple_assign_set_rhs_code
1291 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1292 break;
1297 name = gimple_assign_rhs1 (stmt);
1299 /* If this is the operation we look for and one of the operands
1300 is ours simply propagate the other operand into the stmts
1301 single use. */
1302 if (gimple_assign_rhs_code (stmt) == opcode
1303 && (name == op
1304 || gimple_assign_rhs2 (stmt) == op))
1306 if (name == op)
1307 name = gimple_assign_rhs2 (stmt);
1308 if (stmts_to_fix.length () > 0)
1309 stmts_to_fix.pop ();
1310 propagate_op_to_single_use (name, stmt, def);
1311 break;
1314 /* We might have a multiply of two __builtin_pow* calls, and
1315 the operand might be hiding in the rightmost one. Likewise
1316 this can happen for a negate. */
1317 if (opcode == MULT_EXPR
1318 && gimple_assign_rhs_code (stmt) == opcode
1319 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1320 && has_single_use (gimple_assign_rhs2 (stmt)))
1322 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1323 if (stmt_is_power_of_op (stmt2, op))
1325 if (decrement_power (stmt2) == 1)
1326 propagate_op_to_single_use (op, stmt2, def);
1327 else
1328 stmts_to_fix.safe_push (stmt2);
1329 break;
1331 else if (is_gimple_assign (stmt2)
1332 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1334 if (gimple_assign_rhs1 (stmt2) == op)
1336 tree cst = build_minus_one_cst (TREE_TYPE (op));
1337 propagate_op_to_single_use (cst, stmt2, def);
1338 break;
1340 else if (integer_minus_onep (op)
1341 || real_minus_onep (op))
1343 stmts_to_fix.safe_push (stmt2);
1344 gimple_assign_set_rhs_code
1345 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1346 break;
1351 /* Continue walking the chain. */
1352 gcc_assert (name != op
1353 && TREE_CODE (name) == SSA_NAME);
1354 stmt = SSA_NAME_DEF_STMT (name);
1355 stmts_to_fix.safe_push (stmt);
1357 while (1);
1359 if (stmts_to_fix.length () > 0 || *def == orig_def)
1360 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1363 /* Returns true if statement S1 dominates statement S2. Like
1364 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1366 static bool
1367 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1369 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1371 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1372 SSA_NAME. Assume it lives at the beginning of function and
1373 thus dominates everything. */
1374 if (!bb1 || s1 == s2)
1375 return true;
1377 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1378 if (!bb2)
1379 return false;
1381 if (bb1 == bb2)
1383 /* PHIs in the same basic block are assumed to be
1384 executed all in parallel, if only one stmt is a PHI,
1385 it dominates the other stmt in the same basic block. */
1386 if (gimple_code (s1) == GIMPLE_PHI)
1387 return true;
1389 if (gimple_code (s2) == GIMPLE_PHI)
1390 return false;
1392 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1394 if (gimple_uid (s1) < gimple_uid (s2))
1395 return true;
1397 if (gimple_uid (s1) > gimple_uid (s2))
1398 return false;
1400 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1401 unsigned int uid = gimple_uid (s1);
1402 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1404 gimple *s = gsi_stmt (gsi);
1405 if (gimple_uid (s) != uid)
1406 break;
1407 if (s == s2)
1408 return true;
1411 return false;
1414 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1417 /* Insert STMT after INSERT_POINT. */
1419 static void
1420 insert_stmt_after (gimple *stmt, gimple *insert_point)
1422 gimple_stmt_iterator gsi;
1423 basic_block bb;
1425 if (gimple_code (insert_point) == GIMPLE_PHI)
1426 bb = gimple_bb (insert_point);
1427 else if (!stmt_ends_bb_p (insert_point))
1429 gsi = gsi_for_stmt (insert_point);
1430 gimple_set_uid (stmt, gimple_uid (insert_point));
1431 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1432 return;
1434 else
1435 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1436 thus if it must end a basic block, it should be a call that can
1437 throw, or some assignment that can throw. If it throws, the LHS
1438 of it will not be initialized though, so only valid places using
1439 the SSA_NAME should be dominated by the fallthru edge. */
1440 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1441 gsi = gsi_after_labels (bb);
1442 if (gsi_end_p (gsi))
1444 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1445 gimple_set_uid (stmt,
1446 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1448 else
1449 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1450 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1453 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1454 the result. Places the statement after the definition of either
1455 OP1 or OP2. Returns the new statement. */
1457 static gimple *
1458 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1460 gimple *op1def = NULL, *op2def = NULL;
1461 gimple_stmt_iterator gsi;
1462 tree op;
1463 gassign *sum;
1465 /* Create the addition statement. */
1466 op = make_ssa_name (type);
1467 sum = gimple_build_assign (op, opcode, op1, op2);
1469 /* Find an insertion place and insert. */
1470 if (TREE_CODE (op1) == SSA_NAME)
1471 op1def = SSA_NAME_DEF_STMT (op1);
1472 if (TREE_CODE (op2) == SSA_NAME)
1473 op2def = SSA_NAME_DEF_STMT (op2);
1474 if ((!op1def || gimple_nop_p (op1def))
1475 && (!op2def || gimple_nop_p (op2def)))
1477 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1478 if (gsi_end_p (gsi))
1480 gimple_stmt_iterator gsi2
1481 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1482 gimple_set_uid (sum,
1483 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1485 else
1486 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1487 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1489 else
1491 gimple *insert_point;
1492 if ((!op1def || gimple_nop_p (op1def))
1493 || (op2def && !gimple_nop_p (op2def)
1494 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1495 insert_point = op2def;
1496 else
1497 insert_point = op1def;
1498 insert_stmt_after (sum, insert_point);
1500 update_stmt (sum);
1502 return sum;
1505 /* Perform un-distribution of divisions and multiplications.
1506 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1507 to (A + B) / X for real X.
1509 The algorithm is organized as follows.
1511 - First we walk the addition chain *OPS looking for summands that
1512 are defined by a multiplication or a real division. This results
1513 in the candidates bitmap with relevant indices into *OPS.
1515 - Second we build the chains of multiplications or divisions for
1516 these candidates, counting the number of occurrences of (operand, code)
1517 pairs in all of the candidates chains.
1519 - Third we sort the (operand, code) pairs by number of occurrence and
1520 process them starting with the pair with the most uses.
1522 * For each such pair we walk the candidates again to build a
1523 second candidate bitmap noting all multiplication/division chains
1524 that have at least one occurrence of (operand, code).
1526 * We build an alternate addition chain only covering these
1527 candidates with one (operand, code) operation removed from their
1528 multiplication/division chain.
1530 * The first candidate gets replaced by the alternate addition chain
1531 multiplied/divided by the operand.
1533 * All candidate chains get disabled for further processing and
1534 processing of (operand, code) pairs continues.
1536 The alternate addition chains built are re-processed by the main
1537 reassociation algorithm which allows optimizing a * x * y + b * y * x
1538 to (a + b ) * x * y in one invocation of the reassociation pass. */
1540 static bool
1541 undistribute_ops_list (enum tree_code opcode,
1542 vec<operand_entry *> *ops, struct loop *loop)
1544 unsigned int length = ops->length ();
1545 operand_entry *oe1;
1546 unsigned i, j;
1547 unsigned nr_candidates, nr_candidates2;
1548 sbitmap_iterator sbi0;
1549 vec<operand_entry *> *subops;
1550 bool changed = false;
1551 int next_oecount_id = 0;
1553 if (length <= 1
1554 || opcode != PLUS_EXPR)
1555 return false;
1557 /* Build a list of candidates to process. */
1558 auto_sbitmap candidates (length);
1559 bitmap_clear (candidates);
1560 nr_candidates = 0;
1561 FOR_EACH_VEC_ELT (*ops, i, oe1)
1563 enum tree_code dcode;
1564 gimple *oe1def;
1566 if (TREE_CODE (oe1->op) != SSA_NAME)
1567 continue;
1568 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1569 if (!is_gimple_assign (oe1def))
1570 continue;
1571 dcode = gimple_assign_rhs_code (oe1def);
1572 if ((dcode != MULT_EXPR
1573 && dcode != RDIV_EXPR)
1574 || !is_reassociable_op (oe1def, dcode, loop))
1575 continue;
1577 bitmap_set_bit (candidates, i);
1578 nr_candidates++;
1581 if (nr_candidates < 2)
1582 return false;
1584 if (dump_file && (dump_flags & TDF_DETAILS))
1586 fprintf (dump_file, "searching for un-distribute opportunities ");
1587 print_generic_expr (dump_file,
1588 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1589 fprintf (dump_file, " %d\n", nr_candidates);
1592 /* Build linearized sub-operand lists and the counting table. */
1593 cvec.create (0);
1595 hash_table<oecount_hasher> ctable (15);
1597 /* ??? Macro arguments cannot have multi-argument template types in
1598 them. This typedef is needed to workaround that limitation. */
1599 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1600 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1601 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1603 gimple *oedef;
1604 enum tree_code oecode;
1605 unsigned j;
1607 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1608 oecode = gimple_assign_rhs_code (oedef);
1609 linearize_expr_tree (&subops[i], oedef,
1610 associative_tree_code (oecode), false);
1612 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1614 oecount c;
1615 int *slot;
1616 int idx;
1617 c.oecode = oecode;
1618 c.cnt = 1;
1619 c.id = next_oecount_id++;
1620 c.op = oe1->op;
1621 cvec.safe_push (c);
1622 idx = cvec.length () + 41;
1623 slot = ctable.find_slot (idx, INSERT);
1624 if (!*slot)
1626 *slot = idx;
1628 else
1630 cvec.pop ();
1631 cvec[*slot - 42].cnt++;
1636 /* Sort the counting table. */
1637 cvec.qsort (oecount_cmp);
1639 if (dump_file && (dump_flags & TDF_DETAILS))
1641 oecount *c;
1642 fprintf (dump_file, "Candidates:\n");
1643 FOR_EACH_VEC_ELT (cvec, j, c)
1645 fprintf (dump_file, " %u %s: ", c->cnt,
1646 c->oecode == MULT_EXPR
1647 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1648 print_generic_expr (dump_file, c->op, 0);
1649 fprintf (dump_file, "\n");
1653 /* Process the (operand, code) pairs in order of most occurrence. */
1654 auto_sbitmap candidates2 (length);
1655 while (!cvec.is_empty ())
1657 oecount *c = &cvec.last ();
1658 if (c->cnt < 2)
1659 break;
1661 /* Now collect the operands in the outer chain that contain
1662 the common operand in their inner chain. */
1663 bitmap_clear (candidates2);
1664 nr_candidates2 = 0;
1665 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1667 gimple *oedef;
1668 enum tree_code oecode;
1669 unsigned j;
1670 tree op = (*ops)[i]->op;
1672 /* If we undistributed in this chain already this may be
1673 a constant. */
1674 if (TREE_CODE (op) != SSA_NAME)
1675 continue;
1677 oedef = SSA_NAME_DEF_STMT (op);
1678 oecode = gimple_assign_rhs_code (oedef);
1679 if (oecode != c->oecode)
1680 continue;
1682 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1684 if (oe1->op == c->op)
1686 bitmap_set_bit (candidates2, i);
1687 ++nr_candidates2;
1688 break;
1693 if (nr_candidates2 >= 2)
1695 operand_entry *oe1, *oe2;
1696 gimple *prod;
1697 int first = bitmap_first_set_bit (candidates2);
1699 /* Build the new addition chain. */
1700 oe1 = (*ops)[first];
1701 if (dump_file && (dump_flags & TDF_DETAILS))
1703 fprintf (dump_file, "Building (");
1704 print_generic_expr (dump_file, oe1->op, 0);
1706 zero_one_operation (&oe1->op, c->oecode, c->op);
1707 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1709 gimple *sum;
1710 oe2 = (*ops)[i];
1711 if (dump_file && (dump_flags & TDF_DETAILS))
1713 fprintf (dump_file, " + ");
1714 print_generic_expr (dump_file, oe2->op, 0);
1716 zero_one_operation (&oe2->op, c->oecode, c->op);
1717 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1718 oe1->op, oe2->op, opcode);
1719 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1720 oe2->rank = 0;
1721 oe1->op = gimple_get_lhs (sum);
1724 /* Apply the multiplication/division. */
1725 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1726 oe1->op, c->op, c->oecode);
1727 if (dump_file && (dump_flags & TDF_DETAILS))
1729 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1730 print_generic_expr (dump_file, c->op, 0);
1731 fprintf (dump_file, "\n");
1734 /* Record it in the addition chain and disable further
1735 undistribution with this op. */
1736 oe1->op = gimple_assign_lhs (prod);
1737 oe1->rank = get_rank (oe1->op);
1738 subops[first].release ();
1740 changed = true;
1743 cvec.pop ();
1746 for (i = 0; i < ops->length (); ++i)
1747 subops[i].release ();
1748 free (subops);
1749 cvec.release ();
1751 return changed;
1754 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1755 expression, examine the other OPS to see if any of them are comparisons
1756 of the same values, which we may be able to combine or eliminate.
1757 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1759 static bool
1760 eliminate_redundant_comparison (enum tree_code opcode,
1761 vec<operand_entry *> *ops,
1762 unsigned int currindex,
1763 operand_entry *curr)
1765 tree op1, op2;
1766 enum tree_code lcode, rcode;
1767 gimple *def1, *def2;
1768 int i;
1769 operand_entry *oe;
1771 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1772 return false;
1774 /* Check that CURR is a comparison. */
1775 if (TREE_CODE (curr->op) != SSA_NAME)
1776 return false;
1777 def1 = SSA_NAME_DEF_STMT (curr->op);
1778 if (!is_gimple_assign (def1))
1779 return false;
1780 lcode = gimple_assign_rhs_code (def1);
1781 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1782 return false;
1783 op1 = gimple_assign_rhs1 (def1);
1784 op2 = gimple_assign_rhs2 (def1);
1786 /* Now look for a similar comparison in the remaining OPS. */
1787 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1789 tree t;
1791 if (TREE_CODE (oe->op) != SSA_NAME)
1792 continue;
1793 def2 = SSA_NAME_DEF_STMT (oe->op);
1794 if (!is_gimple_assign (def2))
1795 continue;
1796 rcode = gimple_assign_rhs_code (def2);
1797 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1798 continue;
1800 /* If we got here, we have a match. See if we can combine the
1801 two comparisons. */
1802 if (opcode == BIT_IOR_EXPR)
1803 t = maybe_fold_or_comparisons (lcode, op1, op2,
1804 rcode, gimple_assign_rhs1 (def2),
1805 gimple_assign_rhs2 (def2));
1806 else
1807 t = maybe_fold_and_comparisons (lcode, op1, op2,
1808 rcode, gimple_assign_rhs1 (def2),
1809 gimple_assign_rhs2 (def2));
1810 if (!t)
1811 continue;
1813 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1814 always give us a boolean_type_node value back. If the original
1815 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1816 we need to convert. */
1817 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1818 t = fold_convert (TREE_TYPE (curr->op), t);
1820 if (TREE_CODE (t) != INTEGER_CST
1821 && !operand_equal_p (t, curr->op, 0))
1823 enum tree_code subcode;
1824 tree newop1, newop2;
1825 if (!COMPARISON_CLASS_P (t))
1826 continue;
1827 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1828 STRIP_USELESS_TYPE_CONVERSION (newop1);
1829 STRIP_USELESS_TYPE_CONVERSION (newop2);
1830 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1831 continue;
1834 if (dump_file && (dump_flags & TDF_DETAILS))
1836 fprintf (dump_file, "Equivalence: ");
1837 print_generic_expr (dump_file, curr->op, 0);
1838 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1839 print_generic_expr (dump_file, oe->op, 0);
1840 fprintf (dump_file, " -> ");
1841 print_generic_expr (dump_file, t, 0);
1842 fprintf (dump_file, "\n");
1845 /* Now we can delete oe, as it has been subsumed by the new combined
1846 expression t. */
1847 ops->ordered_remove (i);
1848 reassociate_stats.ops_eliminated ++;
1850 /* If t is the same as curr->op, we're done. Otherwise we must
1851 replace curr->op with t. Special case is if we got a constant
1852 back, in which case we add it to the end instead of in place of
1853 the current entry. */
1854 if (TREE_CODE (t) == INTEGER_CST)
1856 ops->ordered_remove (currindex);
1857 add_to_ops_vec (ops, t);
1859 else if (!operand_equal_p (t, curr->op, 0))
1861 gimple *sum;
1862 enum tree_code subcode;
1863 tree newop1;
1864 tree newop2;
1865 gcc_assert (COMPARISON_CLASS_P (t));
1866 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1867 STRIP_USELESS_TYPE_CONVERSION (newop1);
1868 STRIP_USELESS_TYPE_CONVERSION (newop2);
1869 gcc_checking_assert (is_gimple_val (newop1)
1870 && is_gimple_val (newop2));
1871 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1872 curr->op = gimple_get_lhs (sum);
1874 return true;
1877 return false;
1881 /* Transform repeated addition of same values into multiply with
1882 constant. */
1883 static bool
1884 transform_add_to_multiply (vec<operand_entry *> *ops)
1886 operand_entry *oe;
1887 tree op = NULL_TREE;
1888 int j;
1889 int i, start = -1, end = 0, count = 0;
1890 auto_vec<std::pair <int, int> > indxs;
1891 bool changed = false;
1893 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1894 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1895 || !flag_unsafe_math_optimizations))
1896 return false;
1898 /* Look for repeated operands. */
1899 FOR_EACH_VEC_ELT (*ops, i, oe)
1901 if (start == -1)
1903 count = 1;
1904 op = oe->op;
1905 start = i;
1907 else if (operand_equal_p (oe->op, op, 0))
1909 count++;
1910 end = i;
1912 else
1914 if (count > 1)
1915 indxs.safe_push (std::make_pair (start, end));
1916 count = 1;
1917 op = oe->op;
1918 start = i;
1922 if (count > 1)
1923 indxs.safe_push (std::make_pair (start, end));
1925 for (j = indxs.length () - 1; j >= 0; --j)
1927 /* Convert repeated operand addition to multiplication. */
1928 start = indxs[j].first;
1929 end = indxs[j].second;
1930 op = (*ops)[start]->op;
1931 count = end - start + 1;
1932 for (i = end; i >= start; --i)
1933 ops->unordered_remove (i);
1934 tree tmp = make_ssa_name (TREE_TYPE (op));
1935 tree cst = build_int_cst (integer_type_node, count);
1936 gassign *mul_stmt
1937 = gimple_build_assign (tmp, MULT_EXPR,
1938 op, fold_convert (TREE_TYPE (op), cst));
1939 gimple_set_visited (mul_stmt, true);
1940 add_to_ops_vec (ops, tmp, mul_stmt);
1941 changed = true;
1944 return changed;
1948 /* Perform various identities and other optimizations on the list of
1949 operand entries, stored in OPS. The tree code for the binary
1950 operation between all the operands is OPCODE. */
1952 static void
1953 optimize_ops_list (enum tree_code opcode,
1954 vec<operand_entry *> *ops)
1956 unsigned int length = ops->length ();
1957 unsigned int i;
1958 operand_entry *oe;
1959 operand_entry *oelast = NULL;
1960 bool iterate = false;
1962 if (length == 1)
1963 return;
1965 oelast = ops->last ();
1967 /* If the last two are constants, pop the constants off, merge them
1968 and try the next two. */
1969 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1971 operand_entry *oelm1 = (*ops)[length - 2];
1973 if (oelm1->rank == 0
1974 && is_gimple_min_invariant (oelm1->op)
1975 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1976 TREE_TYPE (oelast->op)))
1978 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1979 oelm1->op, oelast->op);
1981 if (folded && is_gimple_min_invariant (folded))
1983 if (dump_file && (dump_flags & TDF_DETAILS))
1984 fprintf (dump_file, "Merging constants\n");
1986 ops->pop ();
1987 ops->pop ();
1989 add_to_ops_vec (ops, folded);
1990 reassociate_stats.constants_eliminated++;
1992 optimize_ops_list (opcode, ops);
1993 return;
1998 eliminate_using_constants (opcode, ops);
1999 oelast = NULL;
2001 for (i = 0; ops->iterate (i, &oe);)
2003 bool done = false;
2005 if (eliminate_not_pairs (opcode, ops, i, oe))
2006 return;
2007 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2008 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2009 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2011 if (done)
2012 return;
2013 iterate = true;
2014 oelast = NULL;
2015 continue;
2017 oelast = oe;
2018 i++;
2021 length = ops->length ();
2022 oelast = ops->last ();
2024 if (iterate)
2025 optimize_ops_list (opcode, ops);
2028 /* The following functions are subroutines to optimize_range_tests and allow
2029 it to try to change a logical combination of comparisons into a range
2030 test.
2032 For example, both
2033 X == 2 || X == 5 || X == 3 || X == 4
2035 X >= 2 && X <= 5
2036 are converted to
2037 (unsigned) (X - 2) <= 3
2039 For more information see comments above fold_test_range in fold-const.c,
2040 this implementation is for GIMPLE. */
2042 struct range_entry
2044 tree exp;
2045 tree low;
2046 tree high;
2047 bool in_p;
2048 bool strict_overflow_p;
2049 unsigned int idx, next;
2052 /* This is similar to make_range in fold-const.c, but on top of
2053 GIMPLE instead of trees. If EXP is non-NULL, it should be
2054 an SSA_NAME and STMT argument is ignored, otherwise STMT
2055 argument should be a GIMPLE_COND. */
2057 static void
2058 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2060 int in_p;
2061 tree low, high;
2062 bool is_bool, strict_overflow_p;
2064 r->exp = NULL_TREE;
2065 r->in_p = false;
2066 r->strict_overflow_p = false;
2067 r->low = NULL_TREE;
2068 r->high = NULL_TREE;
2069 if (exp != NULL_TREE
2070 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2071 return;
2073 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2074 and see if we can refine the range. Some of the cases below may not
2075 happen, but it doesn't seem worth worrying about this. We "continue"
2076 the outer loop when we've changed something; otherwise we "break"
2077 the switch, which will "break" the while. */
2078 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2079 high = low;
2080 in_p = 0;
2081 strict_overflow_p = false;
2082 is_bool = false;
2083 if (exp == NULL_TREE)
2084 is_bool = true;
2085 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2087 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2088 is_bool = true;
2089 else
2090 return;
2092 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2093 is_bool = true;
2095 while (1)
2097 enum tree_code code;
2098 tree arg0, arg1, exp_type;
2099 tree nexp;
2100 location_t loc;
2102 if (exp != NULL_TREE)
2104 if (TREE_CODE (exp) != SSA_NAME
2105 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2106 break;
2108 stmt = SSA_NAME_DEF_STMT (exp);
2109 if (!is_gimple_assign (stmt))
2110 break;
2112 code = gimple_assign_rhs_code (stmt);
2113 arg0 = gimple_assign_rhs1 (stmt);
2114 arg1 = gimple_assign_rhs2 (stmt);
2115 exp_type = TREE_TYPE (exp);
2117 else
2119 code = gimple_cond_code (stmt);
2120 arg0 = gimple_cond_lhs (stmt);
2121 arg1 = gimple_cond_rhs (stmt);
2122 exp_type = boolean_type_node;
2125 if (TREE_CODE (arg0) != SSA_NAME)
2126 break;
2127 loc = gimple_location (stmt);
2128 switch (code)
2130 case BIT_NOT_EXPR:
2131 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2132 /* Ensure the range is either +[-,0], +[0,0],
2133 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2134 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2135 or similar expression of unconditional true or
2136 false, it should not be negated. */
2137 && ((high && integer_zerop (high))
2138 || (low && integer_onep (low))))
2140 in_p = !in_p;
2141 exp = arg0;
2142 continue;
2144 break;
2145 case SSA_NAME:
2146 exp = arg0;
2147 continue;
2148 CASE_CONVERT:
2149 if (is_bool)
2150 goto do_default;
2151 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2153 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2154 is_bool = true;
2155 else
2156 return;
2158 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2159 is_bool = true;
2160 goto do_default;
2161 case EQ_EXPR:
2162 case NE_EXPR:
2163 case LT_EXPR:
2164 case LE_EXPR:
2165 case GE_EXPR:
2166 case GT_EXPR:
2167 is_bool = true;
2168 /* FALLTHRU */
2169 default:
2170 if (!is_bool)
2171 return;
2172 do_default:
2173 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2174 &low, &high, &in_p,
2175 &strict_overflow_p);
2176 if (nexp != NULL_TREE)
2178 exp = nexp;
2179 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2180 continue;
2182 break;
2184 break;
2186 if (is_bool)
2188 r->exp = exp;
2189 r->in_p = in_p;
2190 r->low = low;
2191 r->high = high;
2192 r->strict_overflow_p = strict_overflow_p;
2196 /* Comparison function for qsort. Sort entries
2197 without SSA_NAME exp first, then with SSA_NAMEs sorted
2198 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2199 by increasing ->low and if ->low is the same, by increasing
2200 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2201 maximum. */
2203 static int
2204 range_entry_cmp (const void *a, const void *b)
2206 const struct range_entry *p = (const struct range_entry *) a;
2207 const struct range_entry *q = (const struct range_entry *) b;
2209 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2211 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2213 /* Group range_entries for the same SSA_NAME together. */
2214 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2215 return -1;
2216 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2217 return 1;
2218 /* If ->low is different, NULL low goes first, then by
2219 ascending low. */
2220 if (p->low != NULL_TREE)
2222 if (q->low != NULL_TREE)
2224 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2225 p->low, q->low);
2226 if (tem && integer_onep (tem))
2227 return -1;
2228 tem = fold_binary (GT_EXPR, boolean_type_node,
2229 p->low, q->low);
2230 if (tem && integer_onep (tem))
2231 return 1;
2233 else
2234 return 1;
2236 else if (q->low != NULL_TREE)
2237 return -1;
2238 /* If ->high is different, NULL high goes last, before that by
2239 ascending high. */
2240 if (p->high != NULL_TREE)
2242 if (q->high != NULL_TREE)
2244 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2245 p->high, q->high);
2246 if (tem && integer_onep (tem))
2247 return -1;
2248 tem = fold_binary (GT_EXPR, boolean_type_node,
2249 p->high, q->high);
2250 if (tem && integer_onep (tem))
2251 return 1;
2253 else
2254 return -1;
2256 else if (q->high != NULL_TREE)
2257 return 1;
2258 /* If both ranges are the same, sort below by ascending idx. */
2260 else
2261 return 1;
2263 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2264 return -1;
2266 if (p->idx < q->idx)
2267 return -1;
2268 else
2270 gcc_checking_assert (p->idx > q->idx);
2271 return 1;
2275 /* Helper routine of optimize_range_test.
2276 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2277 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2278 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2279 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2280 an array of COUNT pointers to other ranges. Return
2281 true if the range merge has been successful.
2282 If OPCODE is ERROR_MARK, this is called from within
2283 maybe_optimize_range_tests and is performing inter-bb range optimization.
2284 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2285 oe->rank. */
2287 static bool
2288 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2289 struct range_entry **otherrangep,
2290 unsigned int count, enum tree_code opcode,
2291 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2292 bool in_p, tree low, tree high, bool strict_overflow_p)
2294 operand_entry *oe = (*ops)[range->idx];
2295 tree op = oe->op;
2296 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2297 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2298 location_t loc = gimple_location (stmt);
2299 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2300 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2301 in_p, low, high);
2302 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2303 gimple_stmt_iterator gsi;
2304 unsigned int i, uid;
2306 if (tem == NULL_TREE)
2307 return false;
2309 /* If op is default def SSA_NAME, there is no place to insert the
2310 new comparison. Give up, unless we can use OP itself as the
2311 range test. */
2312 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2314 if (op == range->exp
2315 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2316 || TREE_CODE (optype) == BOOLEAN_TYPE)
2317 && (op == tem
2318 || (TREE_CODE (tem) == EQ_EXPR
2319 && TREE_OPERAND (tem, 0) == op
2320 && integer_onep (TREE_OPERAND (tem, 1))))
2321 && opcode != BIT_IOR_EXPR
2322 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2324 stmt = NULL;
2325 tem = op;
2327 else
2328 return false;
2331 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2332 warning_at (loc, OPT_Wstrict_overflow,
2333 "assuming signed overflow does not occur "
2334 "when simplifying range test");
2336 if (dump_file && (dump_flags & TDF_DETAILS))
2338 struct range_entry *r;
2339 fprintf (dump_file, "Optimizing range tests ");
2340 print_generic_expr (dump_file, range->exp, 0);
2341 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2342 print_generic_expr (dump_file, range->low, 0);
2343 fprintf (dump_file, ", ");
2344 print_generic_expr (dump_file, range->high, 0);
2345 fprintf (dump_file, "]");
2346 for (i = 0; i < count; i++)
2348 if (otherrange)
2349 r = otherrange + i;
2350 else
2351 r = otherrangep[i];
2352 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2353 print_generic_expr (dump_file, r->low, 0);
2354 fprintf (dump_file, ", ");
2355 print_generic_expr (dump_file, r->high, 0);
2356 fprintf (dump_file, "]");
2358 fprintf (dump_file, "\n into ");
2359 print_generic_expr (dump_file, tem, 0);
2360 fprintf (dump_file, "\n");
2363 if (opcode == BIT_IOR_EXPR
2364 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2365 tem = invert_truthvalue_loc (loc, tem);
2367 tem = fold_convert_loc (loc, optype, tem);
2368 if (stmt)
2370 gsi = gsi_for_stmt (stmt);
2371 uid = gimple_uid (stmt);
2373 else
2375 gsi = gsi_none ();
2376 uid = 0;
2378 if (stmt == NULL)
2379 gcc_checking_assert (tem == op);
2380 /* In rare cases range->exp can be equal to lhs of stmt.
2381 In that case we have to insert after the stmt rather then before
2382 it. If stmt is a PHI, insert it at the start of the basic block. */
2383 else if (op != range->exp)
2385 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2386 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2387 GSI_SAME_STMT);
2388 gsi_prev (&gsi);
2390 else if (gimple_code (stmt) != GIMPLE_PHI)
2392 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2393 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2394 GSI_CONTINUE_LINKING);
2396 else
2398 gsi = gsi_after_labels (gimple_bb (stmt));
2399 if (!gsi_end_p (gsi))
2400 uid = gimple_uid (gsi_stmt (gsi));
2401 else
2403 gsi = gsi_start_bb (gimple_bb (stmt));
2404 uid = 1;
2405 while (!gsi_end_p (gsi))
2407 uid = gimple_uid (gsi_stmt (gsi));
2408 gsi_next (&gsi);
2411 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2412 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2413 GSI_SAME_STMT);
2414 if (gsi_end_p (gsi))
2415 gsi = gsi_last_bb (gimple_bb (stmt));
2416 else
2417 gsi_prev (&gsi);
2419 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2420 if (gimple_uid (gsi_stmt (gsi)))
2421 break;
2422 else
2423 gimple_set_uid (gsi_stmt (gsi), uid);
2425 oe->op = tem;
2426 range->exp = exp;
2427 range->low = low;
2428 range->high = high;
2429 range->in_p = in_p;
2430 range->strict_overflow_p = false;
2432 for (i = 0; i < count; i++)
2434 if (otherrange)
2435 range = otherrange + i;
2436 else
2437 range = otherrangep[i];
2438 oe = (*ops)[range->idx];
2439 /* Now change all the other range test immediate uses, so that
2440 those tests will be optimized away. */
2441 if (opcode == ERROR_MARK)
2443 if (oe->op)
2444 oe->op = build_int_cst (TREE_TYPE (oe->op),
2445 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2446 else
2447 oe->op = (oe->rank == BIT_IOR_EXPR
2448 ? boolean_false_node : boolean_true_node);
2450 else
2451 oe->op = error_mark_node;
2452 range->exp = NULL_TREE;
2453 range->low = NULL_TREE;
2454 range->high = NULL_TREE;
2456 return true;
2459 /* Optimize X == CST1 || X == CST2
2460 if popcount (CST1 ^ CST2) == 1 into
2461 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2462 Similarly for ranges. E.g.
2463 X != 2 && X != 3 && X != 10 && X != 11
2464 will be transformed by the previous optimization into
2465 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2466 and this loop can transform that into
2467 !(((X & ~8) - 2U) <= 1U). */
2469 static bool
2470 optimize_range_tests_xor (enum tree_code opcode, tree type,
2471 tree lowi, tree lowj, tree highi, tree highj,
2472 vec<operand_entry *> *ops,
2473 struct range_entry *rangei,
2474 struct range_entry *rangej)
2476 tree lowxor, highxor, tem, exp;
2477 /* Check lowi ^ lowj == highi ^ highj and
2478 popcount (lowi ^ lowj) == 1. */
2479 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2480 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2481 return false;
2482 if (!integer_pow2p (lowxor))
2483 return false;
2484 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2485 if (!tree_int_cst_equal (lowxor, highxor))
2486 return false;
2488 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2489 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2490 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2491 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2492 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2493 NULL, rangei->in_p, lowj, highj,
2494 rangei->strict_overflow_p
2495 || rangej->strict_overflow_p))
2496 return true;
2497 return false;
2500 /* Optimize X == CST1 || X == CST2
2501 if popcount (CST2 - CST1) == 1 into
2502 ((X - CST1) & ~(CST2 - CST1)) == 0.
2503 Similarly for ranges. E.g.
2504 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2505 || X == 75 || X == 45
2506 will be transformed by the previous optimization into
2507 (X - 43U) <= 3U || (X - 75U) <= 3U
2508 and this loop can transform that into
2509 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2510 static bool
2511 optimize_range_tests_diff (enum tree_code opcode, tree type,
2512 tree lowi, tree lowj, tree highi, tree highj,
2513 vec<operand_entry *> *ops,
2514 struct range_entry *rangei,
2515 struct range_entry *rangej)
2517 tree tem1, tem2, mask;
2518 /* Check highi - lowi == highj - lowj. */
2519 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2520 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2521 return false;
2522 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2523 if (!tree_int_cst_equal (tem1, tem2))
2524 return false;
2525 /* Check popcount (lowj - lowi) == 1. */
2526 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2527 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2528 return false;
2529 if (!integer_pow2p (tem1))
2530 return false;
2532 type = unsigned_type_for (type);
2533 tem1 = fold_convert (type, tem1);
2534 tem2 = fold_convert (type, tem2);
2535 lowi = fold_convert (type, lowi);
2536 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2537 tem1 = fold_binary (MINUS_EXPR, type,
2538 fold_convert (type, rangei->exp), lowi);
2539 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2540 lowj = build_int_cst (type, 0);
2541 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2542 NULL, rangei->in_p, lowj, tem2,
2543 rangei->strict_overflow_p
2544 || rangej->strict_overflow_p))
2545 return true;
2546 return false;
2549 /* It does some common checks for function optimize_range_tests_xor and
2550 optimize_range_tests_diff.
2551 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2552 Else it calls optimize_range_tests_diff. */
2554 static bool
2555 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2556 bool optimize_xor, vec<operand_entry *> *ops,
2557 struct range_entry *ranges)
2559 int i, j;
2560 bool any_changes = false;
2561 for (i = first; i < length; i++)
2563 tree lowi, highi, lowj, highj, type, tem;
2565 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2566 continue;
2567 type = TREE_TYPE (ranges[i].exp);
2568 if (!INTEGRAL_TYPE_P (type))
2569 continue;
2570 lowi = ranges[i].low;
2571 if (lowi == NULL_TREE)
2572 lowi = TYPE_MIN_VALUE (type);
2573 highi = ranges[i].high;
2574 if (highi == NULL_TREE)
2575 continue;
2576 for (j = i + 1; j < length && j < i + 64; j++)
2578 bool changes;
2579 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2580 continue;
2581 lowj = ranges[j].low;
2582 if (lowj == NULL_TREE)
2583 continue;
2584 highj = ranges[j].high;
2585 if (highj == NULL_TREE)
2586 highj = TYPE_MAX_VALUE (type);
2587 /* Check lowj > highi. */
2588 tem = fold_binary (GT_EXPR, boolean_type_node,
2589 lowj, highi);
2590 if (tem == NULL_TREE || !integer_onep (tem))
2591 continue;
2592 if (optimize_xor)
2593 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2594 highi, highj, ops,
2595 ranges + i, ranges + j);
2596 else
2597 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2598 highi, highj, ops,
2599 ranges + i, ranges + j);
2600 if (changes)
2602 any_changes = true;
2603 break;
2607 return any_changes;
2610 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2611 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2612 EXP on success, NULL otherwise. */
2614 static tree
2615 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2616 wide_int *mask, tree *totallowp)
2618 tree tem = int_const_binop (MINUS_EXPR, high, low);
2619 if (tem == NULL_TREE
2620 || TREE_CODE (tem) != INTEGER_CST
2621 || TREE_OVERFLOW (tem)
2622 || tree_int_cst_sgn (tem) == -1
2623 || compare_tree_int (tem, prec) != -1)
2624 return NULL_TREE;
2626 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2627 *mask = wi::shifted_mask (0, max, false, prec);
2628 if (TREE_CODE (exp) == BIT_AND_EXPR
2629 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2631 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2632 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2633 if (wi::popcount (msk) == 1
2634 && wi::ltu_p (msk, prec - max))
2636 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2637 max += msk.to_uhwi ();
2638 exp = TREE_OPERAND (exp, 0);
2639 if (integer_zerop (low)
2640 && TREE_CODE (exp) == PLUS_EXPR
2641 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2643 tree ret = TREE_OPERAND (exp, 0);
2644 STRIP_NOPS (ret);
2645 widest_int bias
2646 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2647 TYPE_PRECISION (TREE_TYPE (low))));
2648 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2649 if (totallowp)
2651 *totallowp = tbias;
2652 return ret;
2654 else if (!tree_int_cst_lt (totallow, tbias))
2655 return NULL_TREE;
2656 bias = wi::to_widest (tbias);
2657 bias -= wi::to_widest (totallow);
2658 if (bias >= 0 && bias < prec - max)
2660 *mask = wi::lshift (*mask, bias);
2661 return ret;
2666 if (totallowp)
2667 return exp;
2668 if (!tree_int_cst_lt (totallow, low))
2669 return exp;
2670 tem = int_const_binop (MINUS_EXPR, low, totallow);
2671 if (tem == NULL_TREE
2672 || TREE_CODE (tem) != INTEGER_CST
2673 || TREE_OVERFLOW (tem)
2674 || compare_tree_int (tem, prec - max) == 1)
2675 return NULL_TREE;
2677 *mask = wi::lshift (*mask, wi::to_widest (tem));
2678 return exp;
2681 /* Attempt to optimize small range tests using bit test.
2682 E.g.
2683 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2684 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2685 has been by earlier optimizations optimized into:
2686 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2687 As all the 43 through 82 range is less than 64 numbers,
2688 for 64-bit word targets optimize that into:
2689 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2691 static bool
2692 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2693 vec<operand_entry *> *ops,
2694 struct range_entry *ranges)
2696 int i, j;
2697 bool any_changes = false;
2698 int prec = GET_MODE_BITSIZE (word_mode);
2699 auto_vec<struct range_entry *, 64> candidates;
2701 for (i = first; i < length - 2; i++)
2703 tree lowi, highi, lowj, highj, type;
2705 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2706 continue;
2707 type = TREE_TYPE (ranges[i].exp);
2708 if (!INTEGRAL_TYPE_P (type))
2709 continue;
2710 lowi = ranges[i].low;
2711 if (lowi == NULL_TREE)
2712 lowi = TYPE_MIN_VALUE (type);
2713 highi = ranges[i].high;
2714 if (highi == NULL_TREE)
2715 continue;
2716 wide_int mask;
2717 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2718 highi, &mask, &lowi);
2719 if (exp == NULL_TREE)
2720 continue;
2721 bool strict_overflow_p = ranges[i].strict_overflow_p;
2722 candidates.truncate (0);
2723 int end = MIN (i + 64, length);
2724 for (j = i + 1; j < end; j++)
2726 tree exp2;
2727 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2728 continue;
2729 if (ranges[j].exp == exp)
2731 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2733 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2734 if (exp2 == exp)
2736 else if (TREE_CODE (exp2) == PLUS_EXPR)
2738 exp2 = TREE_OPERAND (exp2, 0);
2739 STRIP_NOPS (exp2);
2740 if (exp2 != exp)
2741 continue;
2743 else
2744 continue;
2746 else
2747 continue;
2748 lowj = ranges[j].low;
2749 if (lowj == NULL_TREE)
2750 continue;
2751 highj = ranges[j].high;
2752 if (highj == NULL_TREE)
2753 highj = TYPE_MAX_VALUE (type);
2754 wide_int mask2;
2755 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2756 highj, &mask2, NULL);
2757 if (exp2 != exp)
2758 continue;
2759 mask |= mask2;
2760 strict_overflow_p |= ranges[j].strict_overflow_p;
2761 candidates.safe_push (&ranges[j]);
2764 /* If we need otherwise 3 or more comparisons, use a bit test. */
2765 if (candidates.length () >= 2)
2767 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2768 wi::to_widest (lowi)
2769 + prec - 1 - wi::clz (mask));
2770 operand_entry *oe = (*ops)[ranges[i].idx];
2771 tree op = oe->op;
2772 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2773 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2774 location_t loc = gimple_location (stmt);
2775 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2777 /* See if it isn't cheaper to pretend the minimum value of the
2778 range is 0, if maximum value is small enough.
2779 We can avoid then subtraction of the minimum value, but the
2780 mask constant could be perhaps more expensive. */
2781 if (compare_tree_int (lowi, 0) > 0
2782 && compare_tree_int (high, prec) < 0)
2784 int cost_diff;
2785 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2786 rtx reg = gen_raw_REG (word_mode, 10000);
2787 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2788 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2789 GEN_INT (-m)), speed_p);
2790 rtx r = immed_wide_int_const (mask, word_mode);
2791 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2792 word_mode, speed_p);
2793 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2794 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2795 word_mode, speed_p);
2796 if (cost_diff > 0)
2798 mask = wi::lshift (mask, m);
2799 lowi = build_zero_cst (TREE_TYPE (lowi));
2803 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2804 false, lowi, high);
2805 if (tem == NULL_TREE || is_gimple_val (tem))
2806 continue;
2807 tree etype = unsigned_type_for (TREE_TYPE (exp));
2808 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2809 fold_convert_loc (loc, etype, exp),
2810 fold_convert_loc (loc, etype, lowi));
2811 exp = fold_convert_loc (loc, integer_type_node, exp);
2812 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2813 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2814 build_int_cst (word_type, 1), exp);
2815 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2816 wide_int_to_tree (word_type, mask));
2817 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2818 build_zero_cst (word_type));
2819 if (is_gimple_val (exp))
2820 continue;
2822 /* The shift might have undefined behavior if TEM is true,
2823 but reassociate_bb isn't prepared to have basic blocks
2824 split when it is running. So, temporarily emit a code
2825 with BIT_IOR_EXPR instead of &&, and fix it up in
2826 branch_fixup. */
2827 gimple_seq seq;
2828 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2829 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2830 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2831 gimple_seq seq2;
2832 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2833 gimple_seq_add_seq_without_update (&seq, seq2);
2834 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2835 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2836 gimple *g = gimple_build_assign (make_ssa_name (optype),
2837 BIT_IOR_EXPR, tem, exp);
2838 gimple_set_location (g, loc);
2839 gimple_seq_add_stmt_without_update (&seq, g);
2840 exp = gimple_assign_lhs (g);
2841 tree val = build_zero_cst (optype);
2842 if (update_range_test (&ranges[i], NULL, candidates.address (),
2843 candidates.length (), opcode, ops, exp,
2844 seq, false, val, val, strict_overflow_p))
2846 any_changes = true;
2847 reassoc_branch_fixups.safe_push (tem);
2849 else
2850 gimple_seq_discard (seq);
2853 return any_changes;
2856 /* Attempt to optimize for signed a and b where b is known to be >= 0:
2857 a >= 0 && a < b into (unsigned) a < (unsigned) b
2858 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
2860 static bool
2861 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
2862 vec<operand_entry *> *ops,
2863 struct range_entry *ranges)
2865 int i;
2866 bool any_changes = false;
2867 hash_map<tree, int> *map = NULL;
2869 for (i = first; i < length; i++)
2871 if (ranges[i].exp == NULL_TREE
2872 || TREE_CODE (ranges[i].exp) != SSA_NAME
2873 || !ranges[i].in_p)
2874 continue;
2876 tree type = TREE_TYPE (ranges[i].exp);
2877 if (!INTEGRAL_TYPE_P (type)
2878 || TYPE_UNSIGNED (type)
2879 || ranges[i].low == NULL_TREE
2880 || !integer_zerop (ranges[i].low)
2881 || ranges[i].high != NULL_TREE)
2882 continue;
2883 /* EXP >= 0 here. */
2884 if (map == NULL)
2885 map = new hash_map <tree, int>;
2886 map->put (ranges[i].exp, i);
2889 if (map == NULL)
2890 return false;
2892 for (i = 0; i < length; i++)
2894 if (ranges[i].low == NULL_TREE
2895 || ranges[i].high == NULL_TREE
2896 || !integer_zerop (ranges[i].low)
2897 || !integer_zerop (ranges[i].high))
2898 continue;
2900 gimple *stmt;
2901 tree_code ccode;
2902 tree rhs1, rhs2;
2903 if (ranges[i].exp)
2905 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
2906 continue;
2907 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
2908 if (!is_gimple_assign (stmt))
2909 continue;
2910 ccode = gimple_assign_rhs_code (stmt);
2911 rhs1 = gimple_assign_rhs1 (stmt);
2912 rhs2 = gimple_assign_rhs2 (stmt);
2914 else
2916 operand_entry *oe = (*ops)[ranges[i].idx];
2917 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2918 if (gimple_code (stmt) != GIMPLE_COND)
2919 continue;
2920 ccode = gimple_cond_code (stmt);
2921 rhs1 = gimple_cond_lhs (stmt);
2922 rhs2 = gimple_cond_rhs (stmt);
2925 if (TREE_CODE (rhs1) != SSA_NAME
2926 || rhs2 == NULL_TREE
2927 || TREE_CODE (rhs2) != SSA_NAME)
2928 continue;
2930 switch (ccode)
2932 case GT_EXPR:
2933 case GE_EXPR:
2934 if (!ranges[i].in_p)
2935 std::swap (rhs1, rhs2);
2936 ccode = swap_tree_comparison (ccode);
2937 break;
2938 case LT_EXPR:
2939 case LE_EXPR:
2940 if (ranges[i].in_p)
2941 std::swap (rhs1, rhs2);
2942 break;
2943 default:
2944 continue;
2947 int *idx = map->get (rhs1);
2948 if (idx == NULL)
2949 continue;
2951 wide_int nz = get_nonzero_bits (rhs2);
2952 if (wi::neg_p (nz))
2953 continue;
2955 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
2956 and RHS2 is known to be RHS2 >= 0. */
2957 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
2959 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2960 if ((ranges[*idx].strict_overflow_p
2961 || ranges[i].strict_overflow_p)
2962 && issue_strict_overflow_warning (wc))
2963 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
2964 "assuming signed overflow does not occur "
2965 "when simplifying range test");
2967 if (dump_file && (dump_flags & TDF_DETAILS))
2969 struct range_entry *r = &ranges[*idx];
2970 fprintf (dump_file, "Optimizing range test ");
2971 print_generic_expr (dump_file, r->exp, 0);
2972 fprintf (dump_file, " +[");
2973 print_generic_expr (dump_file, r->low, 0);
2974 fprintf (dump_file, ", ");
2975 print_generic_expr (dump_file, r->high, 0);
2976 fprintf (dump_file, "] and comparison ");
2977 print_generic_expr (dump_file, rhs1, 0);
2978 fprintf (dump_file, " %s ", op_symbol_code (ccode));
2979 print_generic_expr (dump_file, rhs2, 0);
2980 fprintf (dump_file, "\n into (");
2981 print_generic_expr (dump_file, utype, 0);
2982 fprintf (dump_file, ") ");
2983 print_generic_expr (dump_file, rhs1, 0);
2984 fprintf (dump_file, " %s (", op_symbol_code (ccode));
2985 print_generic_expr (dump_file, utype, 0);
2986 fprintf (dump_file, ") ");
2987 print_generic_expr (dump_file, rhs2, 0);
2988 fprintf (dump_file, "\n");
2991 if (ranges[i].in_p)
2992 std::swap (rhs1, rhs2);
2994 unsigned int uid = gimple_uid (stmt);
2995 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2996 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
2997 gimple_set_uid (g, uid);
2998 rhs1 = gimple_assign_lhs (g);
2999 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3000 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3001 gimple_set_uid (g, uid);
3002 rhs2 = gimple_assign_lhs (g);
3003 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3004 if (tree_swap_operands_p (rhs1, rhs2))
3006 std::swap (rhs1, rhs2);
3007 ccode = swap_tree_comparison (ccode);
3009 if (gimple_code (stmt) == GIMPLE_COND)
3011 gcond *c = as_a <gcond *> (stmt);
3012 gimple_cond_set_code (c, ccode);
3013 gimple_cond_set_lhs (c, rhs1);
3014 gimple_cond_set_rhs (c, rhs2);
3015 update_stmt (stmt);
3017 else
3019 operand_entry *oe = (*ops)[ranges[i].idx];
3020 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3021 if (!INTEGRAL_TYPE_P (ctype)
3022 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3023 && TYPE_PRECISION (ctype) != 1))
3024 ctype = boolean_type_node;
3025 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3026 gimple_set_uid (g, uid);
3027 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3028 if (oe->op && ctype != TREE_TYPE (oe->op))
3030 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3031 NOP_EXPR, gimple_assign_lhs (g));
3032 gimple_set_uid (g, uid);
3033 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3035 ranges[i].exp = gimple_assign_lhs (g);
3036 oe->op = ranges[i].exp;
3037 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3038 ranges[i].high = ranges[i].low;
3040 ranges[i].strict_overflow_p = false;
3041 operand_entry *oe = (*ops)[ranges[*idx].idx];
3042 /* Now change all the other range test immediate uses, so that
3043 those tests will be optimized away. */
3044 if (opcode == ERROR_MARK)
3046 if (oe->op)
3047 oe->op = build_int_cst (TREE_TYPE (oe->op),
3048 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3049 else
3050 oe->op = (oe->rank == BIT_IOR_EXPR
3051 ? boolean_false_node : boolean_true_node);
3053 else
3054 oe->op = error_mark_node;
3055 ranges[*idx].exp = NULL_TREE;
3056 ranges[*idx].low = NULL_TREE;
3057 ranges[*idx].high = NULL_TREE;
3058 any_changes = true;
3061 delete map;
3062 return any_changes;
3065 /* Optimize range tests, similarly how fold_range_test optimizes
3066 it on trees. The tree code for the binary
3067 operation between all the operands is OPCODE.
3068 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3069 maybe_optimize_range_tests for inter-bb range optimization.
3070 In that case if oe->op is NULL, oe->id is bb->index whose
3071 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3072 the actual opcode. */
3074 static bool
3075 optimize_range_tests (enum tree_code opcode,
3076 vec<operand_entry *> *ops)
3078 unsigned int length = ops->length (), i, j, first;
3079 operand_entry *oe;
3080 struct range_entry *ranges;
3081 bool any_changes = false;
3083 if (length == 1)
3084 return false;
3086 ranges = XNEWVEC (struct range_entry, length);
3087 for (i = 0; i < length; i++)
3089 oe = (*ops)[i];
3090 ranges[i].idx = i;
3091 init_range_entry (ranges + i, oe->op,
3092 oe->op
3093 ? NULL
3094 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3095 /* For | invert it now, we will invert it again before emitting
3096 the optimized expression. */
3097 if (opcode == BIT_IOR_EXPR
3098 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3099 ranges[i].in_p = !ranges[i].in_p;
3102 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3103 for (i = 0; i < length; i++)
3104 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3105 break;
3107 /* Try to merge ranges. */
3108 for (first = i; i < length; i++)
3110 tree low = ranges[i].low;
3111 tree high = ranges[i].high;
3112 int in_p = ranges[i].in_p;
3113 bool strict_overflow_p = ranges[i].strict_overflow_p;
3114 int update_fail_count = 0;
3116 for (j = i + 1; j < length; j++)
3118 if (ranges[i].exp != ranges[j].exp)
3119 break;
3120 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3121 ranges[j].in_p, ranges[j].low, ranges[j].high))
3122 break;
3123 strict_overflow_p |= ranges[j].strict_overflow_p;
3126 if (j == i + 1)
3127 continue;
3129 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3130 opcode, ops, ranges[i].exp, NULL, in_p,
3131 low, high, strict_overflow_p))
3133 i = j - 1;
3134 any_changes = true;
3136 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3137 while update_range_test would fail. */
3138 else if (update_fail_count == 64)
3139 i = j - 1;
3140 else
3141 ++update_fail_count;
3144 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3145 ops, ranges);
3147 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3148 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3149 ops, ranges);
3150 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3151 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3152 ops, ranges);
3153 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3154 ranges);
3156 if (any_changes && opcode != ERROR_MARK)
3158 j = 0;
3159 FOR_EACH_VEC_ELT (*ops, i, oe)
3161 if (oe->op == error_mark_node)
3162 continue;
3163 else if (i != j)
3164 (*ops)[j] = oe;
3165 j++;
3167 ops->truncate (j);
3170 XDELETEVEC (ranges);
3171 return any_changes;
3174 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3175 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3176 otherwise the comparison code. */
3178 static tree_code
3179 ovce_extract_ops (tree var, gassign **rets, bool *reti)
3181 if (TREE_CODE (var) != SSA_NAME)
3182 return ERROR_MARK;
3184 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3185 if (stmt == NULL)
3186 return ERROR_MARK;
3188 /* ??? If we start creating more COND_EXPR, we could perform
3189 this same optimization with them. For now, simplify. */
3190 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3191 return ERROR_MARK;
3193 tree cond = gimple_assign_rhs1 (stmt);
3194 tree_code cmp = TREE_CODE (cond);
3195 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3196 return ERROR_MARK;
3198 /* ??? For now, allow only canonical true and false result vectors.
3199 We could expand this to other constants should the need arise,
3200 but at the moment we don't create them. */
3201 tree t = gimple_assign_rhs2 (stmt);
3202 tree f = gimple_assign_rhs3 (stmt);
3203 bool inv;
3204 if (integer_all_onesp (t))
3205 inv = false;
3206 else if (integer_all_onesp (f))
3208 cmp = invert_tree_comparison (cmp, false);
3209 inv = true;
3211 else
3212 return ERROR_MARK;
3213 if (!integer_zerop (f))
3214 return ERROR_MARK;
3216 /* Success! */
3217 if (rets)
3218 *rets = stmt;
3219 if (reti)
3220 *reti = inv;
3221 return cmp;
3224 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3225 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3227 static bool
3228 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3230 unsigned int length = ops->length (), i, j;
3231 bool any_changes = false;
3233 if (length == 1)
3234 return false;
3236 for (i = 0; i < length; ++i)
3238 tree elt0 = (*ops)[i]->op;
3240 gassign *stmt0;
3241 bool invert;
3242 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3243 if (cmp0 == ERROR_MARK)
3244 continue;
3246 for (j = i + 1; j < length; ++j)
3248 tree &elt1 = (*ops)[j]->op;
3250 gassign *stmt1;
3251 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3252 if (cmp1 == ERROR_MARK)
3253 continue;
3255 tree cond0 = gimple_assign_rhs1 (stmt0);
3256 tree x0 = TREE_OPERAND (cond0, 0);
3257 tree y0 = TREE_OPERAND (cond0, 1);
3259 tree cond1 = gimple_assign_rhs1 (stmt1);
3260 tree x1 = TREE_OPERAND (cond1, 0);
3261 tree y1 = TREE_OPERAND (cond1, 1);
3263 tree comb;
3264 if (opcode == BIT_AND_EXPR)
3265 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3266 else if (opcode == BIT_IOR_EXPR)
3267 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3268 else
3269 gcc_unreachable ();
3270 if (comb == NULL)
3271 continue;
3273 /* Success! */
3274 if (dump_file && (dump_flags & TDF_DETAILS))
3276 fprintf (dump_file, "Transforming ");
3277 print_generic_expr (dump_file, cond0, 0);
3278 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3279 print_generic_expr (dump_file, cond1, 0);
3280 fprintf (dump_file, " into ");
3281 print_generic_expr (dump_file, comb, 0);
3282 fputc ('\n', dump_file);
3285 gimple_assign_set_rhs1 (stmt0, comb);
3286 if (invert)
3287 std::swap (*gimple_assign_rhs2_ptr (stmt0),
3288 *gimple_assign_rhs3_ptr (stmt0));
3289 update_stmt (stmt0);
3291 elt1 = error_mark_node;
3292 any_changes = true;
3296 if (any_changes)
3298 operand_entry *oe;
3299 j = 0;
3300 FOR_EACH_VEC_ELT (*ops, i, oe)
3302 if (oe->op == error_mark_node)
3303 continue;
3304 else if (i != j)
3305 (*ops)[j] = oe;
3306 j++;
3308 ops->truncate (j);
3311 return any_changes;
3314 /* Return true if STMT is a cast like:
3315 <bb N>:
3317 _123 = (int) _234;
3319 <bb M>:
3320 # _345 = PHI <_123(N), 1(...), 1(...)>
3321 where _234 has bool type, _123 has single use and
3322 bb N has a single successor M. This is commonly used in
3323 the last block of a range test.
3325 Also Return true if STMT is tcc_compare like:
3326 <bb N>:
3328 _234 = a_2(D) == 2;
3330 <bb M>:
3331 # _345 = PHI <_234(N), 1(...), 1(...)>
3332 _346 = (int) _345;
3333 where _234 has booltype, single use and
3334 bb N has a single successor M. This is commonly used in
3335 the last block of a range test. */
3337 static bool
3338 final_range_test_p (gimple *stmt)
3340 basic_block bb, rhs_bb, lhs_bb;
3341 edge e;
3342 tree lhs, rhs;
3343 use_operand_p use_p;
3344 gimple *use_stmt;
3346 if (!gimple_assign_cast_p (stmt)
3347 && (!is_gimple_assign (stmt)
3348 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3349 != tcc_comparison)))
3350 return false;
3351 bb = gimple_bb (stmt);
3352 if (!single_succ_p (bb))
3353 return false;
3354 e = single_succ_edge (bb);
3355 if (e->flags & EDGE_COMPLEX)
3356 return false;
3358 lhs = gimple_assign_lhs (stmt);
3359 rhs = gimple_assign_rhs1 (stmt);
3360 if (gimple_assign_cast_p (stmt)
3361 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3362 || TREE_CODE (rhs) != SSA_NAME
3363 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3364 return false;
3366 if (!gimple_assign_cast_p (stmt)
3367 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3368 return false;
3370 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3371 if (!single_imm_use (lhs, &use_p, &use_stmt))
3372 return false;
3374 if (gimple_code (use_stmt) != GIMPLE_PHI
3375 || gimple_bb (use_stmt) != e->dest)
3376 return false;
3378 /* And that the rhs is defined in the same loop. */
3379 if (gimple_assign_cast_p (stmt))
3381 if (TREE_CODE (rhs) != SSA_NAME
3382 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3383 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3384 return false;
3386 else
3388 if (TREE_CODE (lhs) != SSA_NAME
3389 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3390 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3391 return false;
3394 return true;
3397 /* Return true if BB is suitable basic block for inter-bb range test
3398 optimization. If BACKWARD is true, BB should be the only predecessor
3399 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3400 or compared with to find a common basic block to which all conditions
3401 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3402 be the only predecessor of BB. */
3404 static bool
3405 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3406 bool backward)
3408 edge_iterator ei, ei2;
3409 edge e, e2;
3410 gimple *stmt;
3411 gphi_iterator gsi;
3412 bool other_edge_seen = false;
3413 bool is_cond;
3415 if (test_bb == bb)
3416 return false;
3417 /* Check last stmt first. */
3418 stmt = last_stmt (bb);
3419 if (stmt == NULL
3420 || (gimple_code (stmt) != GIMPLE_COND
3421 && (backward || !final_range_test_p (stmt)))
3422 || gimple_visited_p (stmt)
3423 || stmt_could_throw_p (stmt)
3424 || *other_bb == bb)
3425 return false;
3426 is_cond = gimple_code (stmt) == GIMPLE_COND;
3427 if (is_cond)
3429 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3430 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3431 to *OTHER_BB (if not set yet, try to find it out). */
3432 if (EDGE_COUNT (bb->succs) != 2)
3433 return false;
3434 FOR_EACH_EDGE (e, ei, bb->succs)
3436 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3437 return false;
3438 if (e->dest == test_bb)
3440 if (backward)
3441 continue;
3442 else
3443 return false;
3445 if (e->dest == bb)
3446 return false;
3447 if (*other_bb == NULL)
3449 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3450 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3451 return false;
3452 else if (e->dest == e2->dest)
3453 *other_bb = e->dest;
3454 if (*other_bb == NULL)
3455 return false;
3457 if (e->dest == *other_bb)
3458 other_edge_seen = true;
3459 else if (backward)
3460 return false;
3462 if (*other_bb == NULL || !other_edge_seen)
3463 return false;
3465 else if (single_succ (bb) != *other_bb)
3466 return false;
3468 /* Now check all PHIs of *OTHER_BB. */
3469 e = find_edge (bb, *other_bb);
3470 e2 = find_edge (test_bb, *other_bb);
3471 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3473 gphi *phi = gsi.phi ();
3474 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3475 corresponding to BB and TEST_BB predecessor must be the same. */
3476 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3477 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3479 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3480 one of the PHIs should have the lhs of the last stmt in
3481 that block as PHI arg and that PHI should have 0 or 1
3482 corresponding to it in all other range test basic blocks
3483 considered. */
3484 if (!is_cond)
3486 if (gimple_phi_arg_def (phi, e->dest_idx)
3487 == gimple_assign_lhs (stmt)
3488 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3489 || integer_onep (gimple_phi_arg_def (phi,
3490 e2->dest_idx))))
3491 continue;
3493 else
3495 gimple *test_last = last_stmt (test_bb);
3496 if (gimple_code (test_last) != GIMPLE_COND
3497 && gimple_phi_arg_def (phi, e2->dest_idx)
3498 == gimple_assign_lhs (test_last)
3499 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3500 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3501 continue;
3504 return false;
3507 return true;
3510 /* Return true if BB doesn't have side-effects that would disallow
3511 range test optimization, all SSA_NAMEs set in the bb are consumed
3512 in the bb and there are no PHIs. */
3514 static bool
3515 no_side_effect_bb (basic_block bb)
3517 gimple_stmt_iterator gsi;
3518 gimple *last;
3520 if (!gimple_seq_empty_p (phi_nodes (bb)))
3521 return false;
3522 last = last_stmt (bb);
3523 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3525 gimple *stmt = gsi_stmt (gsi);
3526 tree lhs;
3527 imm_use_iterator imm_iter;
3528 use_operand_p use_p;
3530 if (is_gimple_debug (stmt))
3531 continue;
3532 if (gimple_has_side_effects (stmt))
3533 return false;
3534 if (stmt == last)
3535 return true;
3536 if (!is_gimple_assign (stmt))
3537 return false;
3538 lhs = gimple_assign_lhs (stmt);
3539 if (TREE_CODE (lhs) != SSA_NAME)
3540 return false;
3541 if (gimple_assign_rhs_could_trap_p (stmt))
3542 return false;
3543 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3545 gimple *use_stmt = USE_STMT (use_p);
3546 if (is_gimple_debug (use_stmt))
3547 continue;
3548 if (gimple_bb (use_stmt) != bb)
3549 return false;
3552 return false;
3555 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3556 return true and fill in *OPS recursively. */
3558 static bool
3559 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3560 struct loop *loop)
3562 gimple *stmt = SSA_NAME_DEF_STMT (var);
3563 tree rhs[2];
3564 int i;
3566 if (!is_reassociable_op (stmt, code, loop))
3567 return false;
3569 rhs[0] = gimple_assign_rhs1 (stmt);
3570 rhs[1] = gimple_assign_rhs2 (stmt);
3571 gimple_set_visited (stmt, true);
3572 for (i = 0; i < 2; i++)
3573 if (TREE_CODE (rhs[i]) == SSA_NAME
3574 && !get_ops (rhs[i], code, ops, loop)
3575 && has_single_use (rhs[i]))
3577 operand_entry *oe = operand_entry_pool.allocate ();
3579 oe->op = rhs[i];
3580 oe->rank = code;
3581 oe->id = 0;
3582 oe->count = 1;
3583 oe->stmt_to_insert = NULL;
3584 ops->safe_push (oe);
3586 return true;
3589 /* Find the ops that were added by get_ops starting from VAR, see if
3590 they were changed during update_range_test and if yes, create new
3591 stmts. */
3593 static tree
3594 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3595 unsigned int *pidx, struct loop *loop)
3597 gimple *stmt = SSA_NAME_DEF_STMT (var);
3598 tree rhs[4];
3599 int i;
3601 if (!is_reassociable_op (stmt, code, loop))
3602 return NULL;
3604 rhs[0] = gimple_assign_rhs1 (stmt);
3605 rhs[1] = gimple_assign_rhs2 (stmt);
3606 rhs[2] = rhs[0];
3607 rhs[3] = rhs[1];
3608 for (i = 0; i < 2; i++)
3609 if (TREE_CODE (rhs[i]) == SSA_NAME)
3611 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3612 if (rhs[2 + i] == NULL_TREE)
3614 if (has_single_use (rhs[i]))
3615 rhs[2 + i] = ops[(*pidx)++]->op;
3616 else
3617 rhs[2 + i] = rhs[i];
3620 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3621 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3623 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3624 var = make_ssa_name (TREE_TYPE (var));
3625 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3626 rhs[2], rhs[3]);
3627 gimple_set_uid (g, gimple_uid (stmt));
3628 gimple_set_visited (g, true);
3629 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3631 return var;
3634 /* Structure to track the initial value passed to get_ops and
3635 the range in the ops vector for each basic block. */
3637 struct inter_bb_range_test_entry
3639 tree op;
3640 unsigned int first_idx, last_idx;
3643 /* Inter-bb range test optimization.
3645 Returns TRUE if a gimple conditional is optimized to a true/false,
3646 otherwise return FALSE.
3648 This indicates to the caller that it should run a CFG cleanup pass
3649 once reassociation is completed. */
3651 static bool
3652 maybe_optimize_range_tests (gimple *stmt)
3654 basic_block first_bb = gimple_bb (stmt);
3655 basic_block last_bb = first_bb;
3656 basic_block other_bb = NULL;
3657 basic_block bb;
3658 edge_iterator ei;
3659 edge e;
3660 auto_vec<operand_entry *> ops;
3661 auto_vec<inter_bb_range_test_entry> bbinfo;
3662 bool any_changes = false;
3663 bool cfg_cleanup_needed = false;
3665 /* Consider only basic blocks that end with GIMPLE_COND or
3666 a cast statement satisfying final_range_test_p. All
3667 but the last bb in the first_bb .. last_bb range
3668 should end with GIMPLE_COND. */
3669 if (gimple_code (stmt) == GIMPLE_COND)
3671 if (EDGE_COUNT (first_bb->succs) != 2)
3672 return cfg_cleanup_needed;
3674 else if (final_range_test_p (stmt))
3675 other_bb = single_succ (first_bb);
3676 else
3677 return cfg_cleanup_needed;
3679 if (stmt_could_throw_p (stmt))
3680 return cfg_cleanup_needed;
3682 /* As relative ordering of post-dominator sons isn't fixed,
3683 maybe_optimize_range_tests can be called first on any
3684 bb in the range we want to optimize. So, start searching
3685 backwards, if first_bb can be set to a predecessor. */
3686 while (single_pred_p (first_bb))
3688 basic_block pred_bb = single_pred (first_bb);
3689 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3690 break;
3691 if (!no_side_effect_bb (first_bb))
3692 break;
3693 first_bb = pred_bb;
3695 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3696 Before starting forward search in last_bb successors, find
3697 out the other_bb. */
3698 if (first_bb == last_bb)
3700 other_bb = NULL;
3701 /* As non-GIMPLE_COND last stmt always terminates the range,
3702 if forward search didn't discover anything, just give up. */
3703 if (gimple_code (stmt) != GIMPLE_COND)
3704 return cfg_cleanup_needed;
3705 /* Look at both successors. Either it ends with a GIMPLE_COND
3706 and satisfies suitable_cond_bb, or ends with a cast and
3707 other_bb is that cast's successor. */
3708 FOR_EACH_EDGE (e, ei, first_bb->succs)
3709 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3710 || e->dest == first_bb)
3711 return cfg_cleanup_needed;
3712 else if (single_pred_p (e->dest))
3714 stmt = last_stmt (e->dest);
3715 if (stmt
3716 && gimple_code (stmt) == GIMPLE_COND
3717 && EDGE_COUNT (e->dest->succs) == 2)
3719 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3720 break;
3721 else
3722 other_bb = NULL;
3724 else if (stmt
3725 && final_range_test_p (stmt)
3726 && find_edge (first_bb, single_succ (e->dest)))
3728 other_bb = single_succ (e->dest);
3729 if (other_bb == first_bb)
3730 other_bb = NULL;
3733 if (other_bb == NULL)
3734 return cfg_cleanup_needed;
3736 /* Now do the forward search, moving last_bb to successor bbs
3737 that aren't other_bb. */
3738 while (EDGE_COUNT (last_bb->succs) == 2)
3740 FOR_EACH_EDGE (e, ei, last_bb->succs)
3741 if (e->dest != other_bb)
3742 break;
3743 if (e == NULL)
3744 break;
3745 if (!single_pred_p (e->dest))
3746 break;
3747 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3748 break;
3749 if (!no_side_effect_bb (e->dest))
3750 break;
3751 last_bb = e->dest;
3753 if (first_bb == last_bb)
3754 return cfg_cleanup_needed;
3755 /* Here basic blocks first_bb through last_bb's predecessor
3756 end with GIMPLE_COND, all of them have one of the edges to
3757 other_bb and another to another block in the range,
3758 all blocks except first_bb don't have side-effects and
3759 last_bb ends with either GIMPLE_COND, or cast satisfying
3760 final_range_test_p. */
3761 for (bb = last_bb; ; bb = single_pred (bb))
3763 enum tree_code code;
3764 tree lhs, rhs;
3765 inter_bb_range_test_entry bb_ent;
3767 bb_ent.op = NULL_TREE;
3768 bb_ent.first_idx = ops.length ();
3769 bb_ent.last_idx = bb_ent.first_idx;
3770 e = find_edge (bb, other_bb);
3771 stmt = last_stmt (bb);
3772 gimple_set_visited (stmt, true);
3773 if (gimple_code (stmt) != GIMPLE_COND)
3775 use_operand_p use_p;
3776 gimple *phi;
3777 edge e2;
3778 unsigned int d;
3780 lhs = gimple_assign_lhs (stmt);
3781 rhs = gimple_assign_rhs1 (stmt);
3782 gcc_assert (bb == last_bb);
3784 /* stmt is
3785 _123 = (int) _234;
3787 _234 = a_2(D) == 2;
3789 followed by:
3790 <bb M>:
3791 # _345 = PHI <_123(N), 1(...), 1(...)>
3793 or 0 instead of 1. If it is 0, the _234
3794 range test is anded together with all the
3795 other range tests, if it is 1, it is ored with
3796 them. */
3797 single_imm_use (lhs, &use_p, &phi);
3798 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3799 e2 = find_edge (first_bb, other_bb);
3800 d = e2->dest_idx;
3801 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3802 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3803 code = BIT_AND_EXPR;
3804 else
3806 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3807 code = BIT_IOR_EXPR;
3810 /* If _234 SSA_NAME_DEF_STMT is
3811 _234 = _567 | _789;
3812 (or &, corresponding to 1/0 in the phi arguments,
3813 push into ops the individual range test arguments
3814 of the bitwise or resp. and, recursively. */
3815 if (TREE_CODE (rhs) == SSA_NAME
3816 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3817 != tcc_comparison)
3818 && !get_ops (rhs, code, &ops,
3819 loop_containing_stmt (stmt))
3820 && has_single_use (rhs))
3822 /* Otherwise, push the _234 range test itself. */
3823 operand_entry *oe = operand_entry_pool.allocate ();
3825 oe->op = rhs;
3826 oe->rank = code;
3827 oe->id = 0;
3828 oe->count = 1;
3829 oe->stmt_to_insert = NULL;
3830 ops.safe_push (oe);
3831 bb_ent.last_idx++;
3832 bb_ent.op = rhs;
3834 else if (is_gimple_assign (stmt)
3835 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3836 == tcc_comparison)
3837 && !get_ops (lhs, code, &ops,
3838 loop_containing_stmt (stmt))
3839 && has_single_use (lhs))
3841 operand_entry *oe = operand_entry_pool.allocate ();
3842 oe->op = lhs;
3843 oe->rank = code;
3844 oe->id = 0;
3845 oe->count = 1;
3846 ops.safe_push (oe);
3847 bb_ent.last_idx++;
3848 bb_ent.op = lhs;
3850 else
3852 bb_ent.last_idx = ops.length ();
3853 bb_ent.op = rhs;
3855 bbinfo.safe_push (bb_ent);
3856 continue;
3858 /* Otherwise stmt is GIMPLE_COND. */
3859 code = gimple_cond_code (stmt);
3860 lhs = gimple_cond_lhs (stmt);
3861 rhs = gimple_cond_rhs (stmt);
3862 if (TREE_CODE (lhs) == SSA_NAME
3863 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3864 && ((code != EQ_EXPR && code != NE_EXPR)
3865 || rhs != boolean_false_node
3866 /* Either push into ops the individual bitwise
3867 or resp. and operands, depending on which
3868 edge is other_bb. */
3869 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3870 ^ (code == EQ_EXPR))
3871 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3872 loop_containing_stmt (stmt))))
3874 /* Or push the GIMPLE_COND stmt itself. */
3875 operand_entry *oe = operand_entry_pool.allocate ();
3877 oe->op = NULL;
3878 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3879 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3880 /* oe->op = NULL signs that there is no SSA_NAME
3881 for the range test, and oe->id instead is the
3882 basic block number, at which's end the GIMPLE_COND
3883 is. */
3884 oe->id = bb->index;
3885 oe->count = 1;
3886 oe->stmt_to_insert = NULL;
3887 ops.safe_push (oe);
3888 bb_ent.op = NULL;
3889 bb_ent.last_idx++;
3891 else if (ops.length () > bb_ent.first_idx)
3893 bb_ent.op = lhs;
3894 bb_ent.last_idx = ops.length ();
3896 bbinfo.safe_push (bb_ent);
3897 if (bb == first_bb)
3898 break;
3900 if (ops.length () > 1)
3901 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3902 if (any_changes)
3904 unsigned int idx, max_idx = 0;
3905 /* update_ops relies on has_single_use predicates returning the
3906 same values as it did during get_ops earlier. Additionally it
3907 never removes statements, only adds new ones and it should walk
3908 from the single imm use and check the predicate already before
3909 making those changes.
3910 On the other side, the handling of GIMPLE_COND directly can turn
3911 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3912 it needs to be done in a separate loop afterwards. */
3913 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3915 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3916 && bbinfo[idx].op != NULL_TREE)
3918 tree new_op;
3920 max_idx = idx;
3921 stmt = last_stmt (bb);
3922 new_op = update_ops (bbinfo[idx].op,
3923 (enum tree_code)
3924 ops[bbinfo[idx].first_idx]->rank,
3925 ops, &bbinfo[idx].first_idx,
3926 loop_containing_stmt (stmt));
3927 if (new_op == NULL_TREE)
3929 gcc_assert (bb == last_bb);
3930 new_op = ops[bbinfo[idx].first_idx++]->op;
3932 if (bbinfo[idx].op != new_op)
3934 imm_use_iterator iter;
3935 use_operand_p use_p;
3936 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
3938 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3939 if (is_gimple_debug (use_stmt))
3940 continue;
3941 else if (gimple_code (use_stmt) == GIMPLE_COND
3942 || gimple_code (use_stmt) == GIMPLE_PHI)
3943 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3944 SET_USE (use_p, new_op);
3945 else if ((is_gimple_assign (use_stmt)
3946 && (TREE_CODE_CLASS
3947 (gimple_assign_rhs_code (use_stmt))
3948 == tcc_comparison)))
3949 cast_or_tcc_cmp_stmt = use_stmt;
3950 else if (gimple_assign_cast_p (use_stmt))
3951 cast_or_tcc_cmp_stmt = use_stmt;
3952 else
3953 gcc_unreachable ();
3955 if (cast_or_tcc_cmp_stmt)
3957 gcc_assert (bb == last_bb);
3958 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
3959 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3960 enum tree_code rhs_code
3961 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
3962 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
3963 : CONVERT_EXPR;
3964 gassign *g;
3965 if (is_gimple_min_invariant (new_op))
3967 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3968 g = gimple_build_assign (new_lhs, new_op);
3970 else
3971 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3972 gimple_stmt_iterator gsi
3973 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
3974 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
3975 gimple_set_visited (g, true);
3976 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3977 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3978 if (is_gimple_debug (use_stmt))
3979 continue;
3980 else if (gimple_code (use_stmt) == GIMPLE_COND
3981 || gimple_code (use_stmt) == GIMPLE_PHI)
3982 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3983 SET_USE (use_p, new_lhs);
3984 else
3985 gcc_unreachable ();
3989 if (bb == first_bb)
3990 break;
3992 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3994 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3995 && bbinfo[idx].op == NULL_TREE
3996 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3998 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4000 if (idx > max_idx)
4001 max_idx = idx;
4003 /* If we collapse the conditional to a true/false
4004 condition, then bubble that knowledge up to our caller. */
4005 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4007 gimple_cond_make_false (cond_stmt);
4008 cfg_cleanup_needed = true;
4010 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4012 gimple_cond_make_true (cond_stmt);
4013 cfg_cleanup_needed = true;
4015 else
4017 gimple_cond_set_code (cond_stmt, NE_EXPR);
4018 gimple_cond_set_lhs (cond_stmt,
4019 ops[bbinfo[idx].first_idx]->op);
4020 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4022 update_stmt (cond_stmt);
4024 if (bb == first_bb)
4025 break;
4028 /* The above changes could result in basic blocks after the first
4029 modified one, up to and including last_bb, to be executed even if
4030 they would not be in the original program. If the value ranges of
4031 assignment lhs' in those bbs were dependent on the conditions
4032 guarding those basic blocks which now can change, the VRs might
4033 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4034 are only used within the same bb, it should be not a big deal if
4035 we just reset all the VRs in those bbs. See PR68671. */
4036 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4037 reset_flow_sensitive_info_in_bb (bb);
4039 return cfg_cleanup_needed;
4042 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4043 of STMT in it's operands. This is also known as a "destructive
4044 update" operation. */
4046 static bool
4047 is_phi_for_stmt (gimple *stmt, tree operand)
4049 gimple *def_stmt;
4050 gphi *def_phi;
4051 tree lhs;
4052 use_operand_p arg_p;
4053 ssa_op_iter i;
4055 if (TREE_CODE (operand) != SSA_NAME)
4056 return false;
4058 lhs = gimple_assign_lhs (stmt);
4060 def_stmt = SSA_NAME_DEF_STMT (operand);
4061 def_phi = dyn_cast <gphi *> (def_stmt);
4062 if (!def_phi)
4063 return false;
4065 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4066 if (lhs == USE_FROM_PTR (arg_p))
4067 return true;
4068 return false;
4071 /* Remove def stmt of VAR if VAR has zero uses and recurse
4072 on rhs1 operand if so. */
4074 static void
4075 remove_visited_stmt_chain (tree var)
4077 gimple *stmt;
4078 gimple_stmt_iterator gsi;
4080 while (1)
4082 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4083 return;
4084 stmt = SSA_NAME_DEF_STMT (var);
4085 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4087 var = gimple_assign_rhs1 (stmt);
4088 gsi = gsi_for_stmt (stmt);
4089 reassoc_remove_stmt (&gsi);
4090 release_defs (stmt);
4092 else
4093 return;
4097 /* This function checks three consequtive operands in
4098 passed operands vector OPS starting from OPINDEX and
4099 swaps two operands if it is profitable for binary operation
4100 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4102 We pair ops with the same rank if possible.
4104 The alternative we try is to see if STMT is a destructive
4105 update style statement, which is like:
4106 b = phi (a, ...)
4107 a = c + b;
4108 In that case, we want to use the destructive update form to
4109 expose the possible vectorizer sum reduction opportunity.
4110 In that case, the third operand will be the phi node. This
4111 check is not performed if STMT is null.
4113 We could, of course, try to be better as noted above, and do a
4114 lot of work to try to find these opportunities in >3 operand
4115 cases, but it is unlikely to be worth it. */
4117 static void
4118 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4119 unsigned int opindex, gimple *stmt)
4121 operand_entry *oe1, *oe2, *oe3;
4123 oe1 = ops[opindex];
4124 oe2 = ops[opindex + 1];
4125 oe3 = ops[opindex + 2];
4127 if ((oe1->rank == oe2->rank
4128 && oe2->rank != oe3->rank)
4129 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4130 && !is_phi_for_stmt (stmt, oe1->op)
4131 && !is_phi_for_stmt (stmt, oe2->op)))
4132 std::swap (*oe1, *oe3);
4133 else if ((oe1->rank == oe3->rank
4134 && oe2->rank != oe3->rank)
4135 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4136 && !is_phi_for_stmt (stmt, oe1->op)
4137 && !is_phi_for_stmt (stmt, oe3->op)))
4138 std::swap (*oe1, *oe2);
4141 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4142 two definitions, otherwise return STMT. */
4144 static inline gimple *
4145 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4147 if (TREE_CODE (rhs1) == SSA_NAME
4148 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4149 stmt = SSA_NAME_DEF_STMT (rhs1);
4150 if (TREE_CODE (rhs2) == SSA_NAME
4151 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4152 stmt = SSA_NAME_DEF_STMT (rhs2);
4153 return stmt;
4156 /* If the stmt that defines operand has to be inserted, insert it
4157 before the use. */
4158 static void
4159 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4161 gcc_assert (is_gimple_assign (stmt_to_insert));
4162 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4163 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4164 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4165 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4166 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4168 /* If the insert point is not stmt, then insert_point would be
4169 the point where operand rhs1 or rhs2 is defined. In this case,
4170 stmt_to_insert has to be inserted afterwards. This would
4171 only happen when the stmt insertion point is flexible. */
4172 if (stmt == insert_point)
4173 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4174 else
4175 insert_stmt_after (stmt_to_insert, insert_point);
4179 /* Recursively rewrite our linearized statements so that the operators
4180 match those in OPS[OPINDEX], putting the computation in rank
4181 order. Return new lhs. */
4183 static tree
4184 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4185 vec<operand_entry *> ops, bool changed)
4187 tree rhs1 = gimple_assign_rhs1 (stmt);
4188 tree rhs2 = gimple_assign_rhs2 (stmt);
4189 tree lhs = gimple_assign_lhs (stmt);
4190 operand_entry *oe;
4192 /* The final recursion case for this function is that you have
4193 exactly two operations left.
4194 If we had exactly one op in the entire list to start with, we
4195 would have never called this function, and the tail recursion
4196 rewrites them one at a time. */
4197 if (opindex + 2 == ops.length ())
4199 operand_entry *oe1, *oe2;
4201 oe1 = ops[opindex];
4202 oe2 = ops[opindex + 1];
4204 if (rhs1 != oe1->op || rhs2 != oe2->op)
4206 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4207 unsigned int uid = gimple_uid (stmt);
4209 if (dump_file && (dump_flags & TDF_DETAILS))
4211 fprintf (dump_file, "Transforming ");
4212 print_gimple_stmt (dump_file, stmt, 0, 0);
4215 /* If the stmt that defines operand has to be inserted, insert it
4216 before the use. */
4217 if (oe1->stmt_to_insert)
4218 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4219 if (oe2->stmt_to_insert)
4220 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4221 /* Even when changed is false, reassociation could have e.g. removed
4222 some redundant operations, so unless we are just swapping the
4223 arguments or unless there is no change at all (then we just
4224 return lhs), force creation of a new SSA_NAME. */
4225 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4227 gimple *insert_point
4228 = find_insert_point (stmt, oe1->op, oe2->op);
4229 lhs = make_ssa_name (TREE_TYPE (lhs));
4230 stmt
4231 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4232 oe1->op, oe2->op);
4233 gimple_set_uid (stmt, uid);
4234 gimple_set_visited (stmt, true);
4235 if (insert_point == gsi_stmt (gsi))
4236 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4237 else
4238 insert_stmt_after (stmt, insert_point);
4240 else
4242 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4243 == stmt);
4244 gimple_assign_set_rhs1 (stmt, oe1->op);
4245 gimple_assign_set_rhs2 (stmt, oe2->op);
4246 update_stmt (stmt);
4249 if (rhs1 != oe1->op && rhs1 != oe2->op)
4250 remove_visited_stmt_chain (rhs1);
4252 if (dump_file && (dump_flags & TDF_DETAILS))
4254 fprintf (dump_file, " into ");
4255 print_gimple_stmt (dump_file, stmt, 0, 0);
4258 return lhs;
4261 /* If we hit here, we should have 3 or more ops left. */
4262 gcc_assert (opindex + 2 < ops.length ());
4264 /* Rewrite the next operator. */
4265 oe = ops[opindex];
4267 /* If the stmt that defines operand has to be inserted, insert it
4268 before the use. */
4269 if (oe->stmt_to_insert)
4270 insert_stmt_before_use (stmt, oe->stmt_to_insert);
4272 /* Recurse on the LHS of the binary operator, which is guaranteed to
4273 be the non-leaf side. */
4274 tree new_rhs1
4275 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4276 changed || oe->op != rhs2);
4278 if (oe->op != rhs2 || new_rhs1 != rhs1)
4280 if (dump_file && (dump_flags & TDF_DETAILS))
4282 fprintf (dump_file, "Transforming ");
4283 print_gimple_stmt (dump_file, stmt, 0, 0);
4286 /* If changed is false, this is either opindex == 0
4287 or all outer rhs2's were equal to corresponding oe->op,
4288 and powi_result is NULL.
4289 That means lhs is equivalent before and after reassociation.
4290 Otherwise ensure the old lhs SSA_NAME is not reused and
4291 create a new stmt as well, so that any debug stmts will be
4292 properly adjusted. */
4293 if (changed)
4295 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4296 unsigned int uid = gimple_uid (stmt);
4297 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4299 lhs = make_ssa_name (TREE_TYPE (lhs));
4300 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4301 new_rhs1, oe->op);
4302 gimple_set_uid (stmt, uid);
4303 gimple_set_visited (stmt, true);
4304 if (insert_point == gsi_stmt (gsi))
4305 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4306 else
4307 insert_stmt_after (stmt, insert_point);
4309 else
4311 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4312 == stmt);
4313 gimple_assign_set_rhs1 (stmt, new_rhs1);
4314 gimple_assign_set_rhs2 (stmt, oe->op);
4315 update_stmt (stmt);
4318 if (dump_file && (dump_flags & TDF_DETAILS))
4320 fprintf (dump_file, " into ");
4321 print_gimple_stmt (dump_file, stmt, 0, 0);
4324 return lhs;
4327 /* Find out how many cycles we need to compute statements chain.
4328 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4329 maximum number of independent statements we may execute per cycle. */
4331 static int
4332 get_required_cycles (int ops_num, int cpu_width)
4334 int res;
4335 int elog;
4336 unsigned int rest;
4338 /* While we have more than 2 * cpu_width operands
4339 we may reduce number of operands by cpu_width
4340 per cycle. */
4341 res = ops_num / (2 * cpu_width);
4343 /* Remained operands count may be reduced twice per cycle
4344 until we have only one operand. */
4345 rest = (unsigned)(ops_num - res * cpu_width);
4346 elog = exact_log2 (rest);
4347 if (elog >= 0)
4348 res += elog;
4349 else
4350 res += floor_log2 (rest) + 1;
4352 return res;
4355 /* Returns an optimal number of registers to use for computation of
4356 given statements. */
4358 static int
4359 get_reassociation_width (int ops_num, enum tree_code opc,
4360 machine_mode mode)
4362 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4363 int width;
4364 int width_min;
4365 int cycles_best;
4367 if (param_width > 0)
4368 width = param_width;
4369 else
4370 width = targetm.sched.reassociation_width (opc, mode);
4372 if (width == 1)
4373 return width;
4375 /* Get the minimal time required for sequence computation. */
4376 cycles_best = get_required_cycles (ops_num, width);
4378 /* Check if we may use less width and still compute sequence for
4379 the same time. It will allow us to reduce registers usage.
4380 get_required_cycles is monotonically increasing with lower width
4381 so we can perform a binary search for the minimal width that still
4382 results in the optimal cycle count. */
4383 width_min = 1;
4384 while (width > width_min)
4386 int width_mid = (width + width_min) / 2;
4388 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4389 width = width_mid;
4390 else if (width_min < width_mid)
4391 width_min = width_mid;
4392 else
4393 break;
4396 return width;
4399 /* Recursively rewrite our linearized statements so that the operators
4400 match those in OPS[OPINDEX], putting the computation in rank
4401 order and trying to allow operations to be executed in
4402 parallel. */
4404 static void
4405 rewrite_expr_tree_parallel (gassign *stmt, int width,
4406 vec<operand_entry *> ops)
4408 enum tree_code opcode = gimple_assign_rhs_code (stmt);
4409 int op_num = ops.length ();
4410 gcc_assert (op_num > 0);
4411 int stmt_num = op_num - 1;
4412 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4413 int op_index = op_num - 1;
4414 int stmt_index = 0;
4415 int ready_stmts_end = 0;
4416 int i = 0;
4417 gimple *stmt1 = NULL, *stmt2 = NULL;
4418 tree last_rhs1 = gimple_assign_rhs1 (stmt);
4420 /* We start expression rewriting from the top statements.
4421 So, in this loop we create a full list of statements
4422 we will work with. */
4423 stmts[stmt_num - 1] = stmt;
4424 for (i = stmt_num - 2; i >= 0; i--)
4425 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4427 for (i = 0; i < stmt_num; i++)
4429 tree op1, op2;
4431 /* Determine whether we should use results of
4432 already handled statements or not. */
4433 if (ready_stmts_end == 0
4434 && (i - stmt_index >= width || op_index < 1))
4435 ready_stmts_end = i;
4437 /* Now we choose operands for the next statement. Non zero
4438 value in ready_stmts_end means here that we should use
4439 the result of already generated statements as new operand. */
4440 if (ready_stmts_end > 0)
4442 op1 = gimple_assign_lhs (stmts[stmt_index++]);
4443 if (ready_stmts_end > stmt_index)
4444 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4445 else if (op_index >= 0)
4447 operand_entry *oe = ops[op_index--];
4448 stmt2 = oe->stmt_to_insert;
4449 op2 = oe->op;
4451 else
4453 gcc_assert (stmt_index < i);
4454 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4457 if (stmt_index >= ready_stmts_end)
4458 ready_stmts_end = 0;
4460 else
4462 if (op_index > 1)
4463 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4464 operand_entry *oe2 = ops[op_index--];
4465 operand_entry *oe1 = ops[op_index--];
4466 op2 = oe2->op;
4467 stmt2 = oe2->stmt_to_insert;
4468 op1 = oe1->op;
4469 stmt1 = oe1->stmt_to_insert;
4472 /* If we emit the last statement then we should put
4473 operands into the last statement. It will also
4474 break the loop. */
4475 if (op_index < 0 && stmt_index == i)
4476 i = stmt_num - 1;
4478 if (dump_file && (dump_flags & TDF_DETAILS))
4480 fprintf (dump_file, "Transforming ");
4481 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4484 /* If the stmt that defines operand has to be inserted, insert it
4485 before the use. */
4486 if (stmt1)
4487 insert_stmt_before_use (stmts[i], stmt1);
4488 if (stmt2)
4489 insert_stmt_before_use (stmts[i], stmt2);
4490 stmt1 = stmt2 = NULL;
4492 /* We keep original statement only for the last one. All
4493 others are recreated. */
4494 if (i == stmt_num - 1)
4496 gimple_assign_set_rhs1 (stmts[i], op1);
4497 gimple_assign_set_rhs2 (stmts[i], op2);
4498 update_stmt (stmts[i]);
4500 else
4502 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4504 if (dump_file && (dump_flags & TDF_DETAILS))
4506 fprintf (dump_file, " into ");
4507 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4511 remove_visited_stmt_chain (last_rhs1);
4514 /* Transform STMT, which is really (A +B) + (C + D) into the left
4515 linear form, ((A+B)+C)+D.
4516 Recurse on D if necessary. */
4518 static void
4519 linearize_expr (gimple *stmt)
4521 gimple_stmt_iterator gsi;
4522 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4523 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4524 gimple *oldbinrhs = binrhs;
4525 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4526 gimple *newbinrhs = NULL;
4527 struct loop *loop = loop_containing_stmt (stmt);
4528 tree lhs = gimple_assign_lhs (stmt);
4530 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4531 && is_reassociable_op (binrhs, rhscode, loop));
4533 gsi = gsi_for_stmt (stmt);
4535 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4536 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4537 gimple_assign_rhs_code (binrhs),
4538 gimple_assign_lhs (binlhs),
4539 gimple_assign_rhs2 (binrhs));
4540 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4541 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4542 gimple_set_uid (binrhs, gimple_uid (stmt));
4544 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4545 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4547 if (dump_file && (dump_flags & TDF_DETAILS))
4549 fprintf (dump_file, "Linearized: ");
4550 print_gimple_stmt (dump_file, stmt, 0, 0);
4553 reassociate_stats.linearized++;
4554 update_stmt (stmt);
4556 gsi = gsi_for_stmt (oldbinrhs);
4557 reassoc_remove_stmt (&gsi);
4558 release_defs (oldbinrhs);
4560 gimple_set_visited (stmt, true);
4561 gimple_set_visited (binlhs, true);
4562 gimple_set_visited (binrhs, true);
4564 /* Tail recurse on the new rhs if it still needs reassociation. */
4565 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4566 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4567 want to change the algorithm while converting to tuples. */
4568 linearize_expr (stmt);
4571 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4572 it. Otherwise, return NULL. */
4574 static gimple *
4575 get_single_immediate_use (tree lhs)
4577 use_operand_p immuse;
4578 gimple *immusestmt;
4580 if (TREE_CODE (lhs) == SSA_NAME
4581 && single_imm_use (lhs, &immuse, &immusestmt)
4582 && is_gimple_assign (immusestmt))
4583 return immusestmt;
4585 return NULL;
4588 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4589 representing the negated value. Insertions of any necessary
4590 instructions go before GSI.
4591 This function is recursive in that, if you hand it "a_5" as the
4592 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4593 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4595 static tree
4596 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4598 gimple *negatedefstmt = NULL;
4599 tree resultofnegate;
4600 gimple_stmt_iterator gsi;
4601 unsigned int uid;
4603 /* If we are trying to negate a name, defined by an add, negate the
4604 add operands instead. */
4605 if (TREE_CODE (tonegate) == SSA_NAME)
4606 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4607 if (TREE_CODE (tonegate) == SSA_NAME
4608 && is_gimple_assign (negatedefstmt)
4609 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4610 && has_single_use (gimple_assign_lhs (negatedefstmt))
4611 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4613 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4614 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4615 tree lhs = gimple_assign_lhs (negatedefstmt);
4616 gimple *g;
4618 gsi = gsi_for_stmt (negatedefstmt);
4619 rhs1 = negate_value (rhs1, &gsi);
4621 gsi = gsi_for_stmt (negatedefstmt);
4622 rhs2 = negate_value (rhs2, &gsi);
4624 gsi = gsi_for_stmt (negatedefstmt);
4625 lhs = make_ssa_name (TREE_TYPE (lhs));
4626 gimple_set_visited (negatedefstmt, true);
4627 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4628 gimple_set_uid (g, gimple_uid (negatedefstmt));
4629 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4630 return lhs;
4633 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4634 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4635 NULL_TREE, true, GSI_SAME_STMT);
4636 gsi = *gsip;
4637 uid = gimple_uid (gsi_stmt (gsi));
4638 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4640 gimple *stmt = gsi_stmt (gsi);
4641 if (gimple_uid (stmt) != 0)
4642 break;
4643 gimple_set_uid (stmt, uid);
4645 return resultofnegate;
4648 /* Return true if we should break up the subtract in STMT into an add
4649 with negate. This is true when we the subtract operands are really
4650 adds, or the subtract itself is used in an add expression. In
4651 either case, breaking up the subtract into an add with negate
4652 exposes the adds to reassociation. */
4654 static bool
4655 should_break_up_subtract (gimple *stmt)
4657 tree lhs = gimple_assign_lhs (stmt);
4658 tree binlhs = gimple_assign_rhs1 (stmt);
4659 tree binrhs = gimple_assign_rhs2 (stmt);
4660 gimple *immusestmt;
4661 struct loop *loop = loop_containing_stmt (stmt);
4663 if (TREE_CODE (binlhs) == SSA_NAME
4664 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4665 return true;
4667 if (TREE_CODE (binrhs) == SSA_NAME
4668 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4669 return true;
4671 if (TREE_CODE (lhs) == SSA_NAME
4672 && (immusestmt = get_single_immediate_use (lhs))
4673 && is_gimple_assign (immusestmt)
4674 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4675 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4676 return true;
4677 return false;
4680 /* Transform STMT from A - B into A + -B. */
4682 static void
4683 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4685 tree rhs1 = gimple_assign_rhs1 (stmt);
4686 tree rhs2 = gimple_assign_rhs2 (stmt);
4688 if (dump_file && (dump_flags & TDF_DETAILS))
4690 fprintf (dump_file, "Breaking up subtract ");
4691 print_gimple_stmt (dump_file, stmt, 0, 0);
4694 rhs2 = negate_value (rhs2, gsip);
4695 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4696 update_stmt (stmt);
4699 /* Determine whether STMT is a builtin call that raises an SSA name
4700 to an integer power and has only one use. If so, and this is early
4701 reassociation and unsafe math optimizations are permitted, place
4702 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4703 If any of these conditions does not hold, return FALSE. */
4705 static bool
4706 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4708 tree arg1;
4709 REAL_VALUE_TYPE c, cint;
4711 switch (gimple_call_combined_fn (stmt))
4713 CASE_CFN_POW:
4714 if (flag_errno_math)
4715 return false;
4717 *base = gimple_call_arg (stmt, 0);
4718 arg1 = gimple_call_arg (stmt, 1);
4720 if (TREE_CODE (arg1) != REAL_CST)
4721 return false;
4723 c = TREE_REAL_CST (arg1);
4725 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4726 return false;
4728 *exponent = real_to_integer (&c);
4729 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4730 if (!real_identical (&c, &cint))
4731 return false;
4733 break;
4735 CASE_CFN_POWI:
4736 *base = gimple_call_arg (stmt, 0);
4737 arg1 = gimple_call_arg (stmt, 1);
4739 if (!tree_fits_shwi_p (arg1))
4740 return false;
4742 *exponent = tree_to_shwi (arg1);
4743 break;
4745 default:
4746 return false;
4749 /* Expanding negative exponents is generally unproductive, so we don't
4750 complicate matters with those. Exponents of zero and one should
4751 have been handled by expression folding. */
4752 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4753 return false;
4755 return true;
4758 /* Try to derive and add operand entry for OP to *OPS. Return false if
4759 unsuccessful. */
4761 static bool
4762 try_special_add_to_ops (vec<operand_entry *> *ops,
4763 enum tree_code code,
4764 tree op, gimple* def_stmt)
4766 tree base = NULL_TREE;
4767 HOST_WIDE_INT exponent = 0;
4769 if (TREE_CODE (op) != SSA_NAME
4770 || ! has_single_use (op))
4771 return false;
4773 if (code == MULT_EXPR
4774 && reassoc_insert_powi_p
4775 && flag_unsafe_math_optimizations
4776 && is_gimple_call (def_stmt)
4777 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
4779 add_repeat_to_ops_vec (ops, base, exponent);
4780 gimple_set_visited (def_stmt, true);
4781 return true;
4783 else if (code == MULT_EXPR
4784 && is_gimple_assign (def_stmt)
4785 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
4786 && !HONOR_SNANS (TREE_TYPE (op))
4787 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
4788 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
4790 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4791 tree cst = build_minus_one_cst (TREE_TYPE (op));
4792 add_to_ops_vec (ops, rhs1);
4793 add_to_ops_vec (ops, cst);
4794 gimple_set_visited (def_stmt, true);
4795 return true;
4798 return false;
4801 /* Recursively linearize a binary expression that is the RHS of STMT.
4802 Place the operands of the expression tree in the vector named OPS. */
4804 static void
4805 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4806 bool is_associative, bool set_visited)
4808 tree binlhs = gimple_assign_rhs1 (stmt);
4809 tree binrhs = gimple_assign_rhs2 (stmt);
4810 gimple *binlhsdef = NULL, *binrhsdef = NULL;
4811 bool binlhsisreassoc = false;
4812 bool binrhsisreassoc = false;
4813 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4814 struct loop *loop = loop_containing_stmt (stmt);
4816 if (set_visited)
4817 gimple_set_visited (stmt, true);
4819 if (TREE_CODE (binlhs) == SSA_NAME)
4821 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4822 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4823 && !stmt_could_throw_p (binlhsdef));
4826 if (TREE_CODE (binrhs) == SSA_NAME)
4828 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4829 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4830 && !stmt_could_throw_p (binrhsdef));
4833 /* If the LHS is not reassociable, but the RHS is, we need to swap
4834 them. If neither is reassociable, there is nothing we can do, so
4835 just put them in the ops vector. If the LHS is reassociable,
4836 linearize it. If both are reassociable, then linearize the RHS
4837 and the LHS. */
4839 if (!binlhsisreassoc)
4841 /* If this is not a associative operation like division, give up. */
4842 if (!is_associative)
4844 add_to_ops_vec (ops, binrhs);
4845 return;
4848 if (!binrhsisreassoc)
4850 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4851 add_to_ops_vec (ops, binrhs);
4853 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
4854 add_to_ops_vec (ops, binlhs);
4856 return;
4859 if (dump_file && (dump_flags & TDF_DETAILS))
4861 fprintf (dump_file, "swapping operands of ");
4862 print_gimple_stmt (dump_file, stmt, 0, 0);
4865 swap_ssa_operands (stmt,
4866 gimple_assign_rhs1_ptr (stmt),
4867 gimple_assign_rhs2_ptr (stmt));
4868 update_stmt (stmt);
4870 if (dump_file && (dump_flags & TDF_DETAILS))
4872 fprintf (dump_file, " is now ");
4873 print_gimple_stmt (dump_file, stmt, 0, 0);
4876 /* We want to make it so the lhs is always the reassociative op,
4877 so swap. */
4878 std::swap (binlhs, binrhs);
4880 else if (binrhsisreassoc)
4882 linearize_expr (stmt);
4883 binlhs = gimple_assign_rhs1 (stmt);
4884 binrhs = gimple_assign_rhs2 (stmt);
4887 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4888 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4889 rhscode, loop));
4890 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4891 is_associative, set_visited);
4893 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4894 add_to_ops_vec (ops, binrhs);
4897 /* Repropagate the negates back into subtracts, since no other pass
4898 currently does it. */
4900 static void
4901 repropagate_negates (void)
4903 unsigned int i = 0;
4904 tree negate;
4906 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4908 gimple *user = get_single_immediate_use (negate);
4910 if (!user || !is_gimple_assign (user))
4911 continue;
4913 /* The negate operand can be either operand of a PLUS_EXPR
4914 (it can be the LHS if the RHS is a constant for example).
4916 Force the negate operand to the RHS of the PLUS_EXPR, then
4917 transform the PLUS_EXPR into a MINUS_EXPR. */
4918 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4920 /* If the negated operand appears on the LHS of the
4921 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4922 to force the negated operand to the RHS of the PLUS_EXPR. */
4923 if (gimple_assign_rhs1 (user) == negate)
4925 swap_ssa_operands (user,
4926 gimple_assign_rhs1_ptr (user),
4927 gimple_assign_rhs2_ptr (user));
4930 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4931 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4932 if (gimple_assign_rhs2 (user) == negate)
4934 tree rhs1 = gimple_assign_rhs1 (user);
4935 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
4936 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4937 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4938 update_stmt (user);
4941 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4943 if (gimple_assign_rhs1 (user) == negate)
4945 /* We have
4946 x = -a
4947 y = x - b
4948 which we transform into
4949 x = a + b
4950 y = -x .
4951 This pushes down the negate which we possibly can merge
4952 into some other operation, hence insert it into the
4953 plus_negates vector. */
4954 gimple *feed = SSA_NAME_DEF_STMT (negate);
4955 tree a = gimple_assign_rhs1 (feed);
4956 tree b = gimple_assign_rhs2 (user);
4957 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4958 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4959 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4960 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4961 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4962 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4963 user = gsi_stmt (gsi2);
4964 update_stmt (user);
4965 reassoc_remove_stmt (&gsi);
4966 release_defs (feed);
4967 plus_negates.safe_push (gimple_assign_lhs (user));
4969 else
4971 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4972 rid of one operation. */
4973 gimple *feed = SSA_NAME_DEF_STMT (negate);
4974 tree a = gimple_assign_rhs1 (feed);
4975 tree rhs1 = gimple_assign_rhs1 (user);
4976 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4977 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4978 update_stmt (gsi_stmt (gsi));
4984 /* Returns true if OP is of a type for which we can do reassociation.
4985 That is for integral or non-saturating fixed-point types, and for
4986 floating point type when associative-math is enabled. */
4988 static bool
4989 can_reassociate_p (tree op)
4991 tree type = TREE_TYPE (op);
4992 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4993 || NON_SAT_FIXED_POINT_TYPE_P (type)
4994 || (flag_associative_math && FLOAT_TYPE_P (type)))
4995 return true;
4996 return false;
4999 /* Break up subtract operations in block BB.
5001 We do this top down because we don't know whether the subtract is
5002 part of a possible chain of reassociation except at the top.
5004 IE given
5005 d = f + g
5006 c = a + e
5007 b = c - d
5008 q = b - r
5009 k = t - q
5011 we want to break up k = t - q, but we won't until we've transformed q
5012 = b - r, which won't be broken up until we transform b = c - d.
5014 En passant, clear the GIMPLE visited flag on every statement
5015 and set UIDs within each basic block. */
5017 static void
5018 break_up_subtract_bb (basic_block bb)
5020 gimple_stmt_iterator gsi;
5021 basic_block son;
5022 unsigned int uid = 1;
5024 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5026 gimple *stmt = gsi_stmt (gsi);
5027 gimple_set_visited (stmt, false);
5028 gimple_set_uid (stmt, uid++);
5030 if (!is_gimple_assign (stmt)
5031 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5032 continue;
5034 /* Look for simple gimple subtract operations. */
5035 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5037 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5038 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5039 continue;
5041 /* Check for a subtract used only in an addition. If this
5042 is the case, transform it into add of a negate for better
5043 reassociation. IE transform C = A-B into C = A + -B if C
5044 is only used in an addition. */
5045 if (should_break_up_subtract (stmt))
5046 break_up_subtract (stmt, &gsi);
5048 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5049 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5050 plus_negates.safe_push (gimple_assign_lhs (stmt));
5052 for (son = first_dom_son (CDI_DOMINATORS, bb);
5053 son;
5054 son = next_dom_son (CDI_DOMINATORS, son))
5055 break_up_subtract_bb (son);
5058 /* Used for repeated factor analysis. */
5059 struct repeat_factor
5061 /* An SSA name that occurs in a multiply chain. */
5062 tree factor;
5064 /* Cached rank of the factor. */
5065 unsigned rank;
5067 /* Number of occurrences of the factor in the chain. */
5068 HOST_WIDE_INT count;
5070 /* An SSA name representing the product of this factor and
5071 all factors appearing later in the repeated factor vector. */
5072 tree repr;
5076 static vec<repeat_factor> repeat_factor_vec;
5078 /* Used for sorting the repeat factor vector. Sort primarily by
5079 ascending occurrence count, secondarily by descending rank. */
5081 static int
5082 compare_repeat_factors (const void *x1, const void *x2)
5084 const repeat_factor *rf1 = (const repeat_factor *) x1;
5085 const repeat_factor *rf2 = (const repeat_factor *) x2;
5087 if (rf1->count != rf2->count)
5088 return rf1->count - rf2->count;
5090 return rf2->rank - rf1->rank;
5093 /* Look for repeated operands in OPS in the multiply tree rooted at
5094 STMT. Replace them with an optimal sequence of multiplies and powi
5095 builtin calls, and remove the used operands from OPS. Return an
5096 SSA name representing the value of the replacement sequence. */
5098 static tree
5099 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5101 unsigned i, j, vec_len;
5102 int ii;
5103 operand_entry *oe;
5104 repeat_factor *rf1, *rf2;
5105 repeat_factor rfnew;
5106 tree result = NULL_TREE;
5107 tree target_ssa, iter_result;
5108 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5109 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5110 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5111 gimple *mul_stmt, *pow_stmt;
5113 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5114 target. */
5115 if (!powi_fndecl)
5116 return NULL_TREE;
5118 /* Allocate the repeated factor vector. */
5119 repeat_factor_vec.create (10);
5121 /* Scan the OPS vector for all SSA names in the product and build
5122 up a vector of occurrence counts for each factor. */
5123 FOR_EACH_VEC_ELT (*ops, i, oe)
5125 if (TREE_CODE (oe->op) == SSA_NAME)
5127 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5129 if (rf1->factor == oe->op)
5131 rf1->count += oe->count;
5132 break;
5136 if (j >= repeat_factor_vec.length ())
5138 rfnew.factor = oe->op;
5139 rfnew.rank = oe->rank;
5140 rfnew.count = oe->count;
5141 rfnew.repr = NULL_TREE;
5142 repeat_factor_vec.safe_push (rfnew);
5147 /* Sort the repeated factor vector by (a) increasing occurrence count,
5148 and (b) decreasing rank. */
5149 repeat_factor_vec.qsort (compare_repeat_factors);
5151 /* It is generally best to combine as many base factors as possible
5152 into a product before applying __builtin_powi to the result.
5153 However, the sort order chosen for the repeated factor vector
5154 allows us to cache partial results for the product of the base
5155 factors for subsequent use. When we already have a cached partial
5156 result from a previous iteration, it is best to make use of it
5157 before looking for another __builtin_pow opportunity.
5159 As an example, consider x * x * y * y * y * z * z * z * z.
5160 We want to first compose the product x * y * z, raise it to the
5161 second power, then multiply this by y * z, and finally multiply
5162 by z. This can be done in 5 multiplies provided we cache y * z
5163 for use in both expressions:
5165 t1 = y * z
5166 t2 = t1 * x
5167 t3 = t2 * t2
5168 t4 = t1 * t3
5169 result = t4 * z
5171 If we instead ignored the cached y * z and first multiplied by
5172 the __builtin_pow opportunity z * z, we would get the inferior:
5174 t1 = y * z
5175 t2 = t1 * x
5176 t3 = t2 * t2
5177 t4 = z * z
5178 t5 = t3 * t4
5179 result = t5 * y */
5181 vec_len = repeat_factor_vec.length ();
5183 /* Repeatedly look for opportunities to create a builtin_powi call. */
5184 while (true)
5186 HOST_WIDE_INT power;
5188 /* First look for the largest cached product of factors from
5189 preceding iterations. If found, create a builtin_powi for
5190 it if the minimum occurrence count for its factors is at
5191 least 2, or just use this cached product as our next
5192 multiplicand if the minimum occurrence count is 1. */
5193 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5195 if (rf1->repr && rf1->count > 0)
5196 break;
5199 if (j < vec_len)
5201 power = rf1->count;
5203 if (power == 1)
5205 iter_result = rf1->repr;
5207 if (dump_file && (dump_flags & TDF_DETAILS))
5209 unsigned elt;
5210 repeat_factor *rf;
5211 fputs ("Multiplying by cached product ", dump_file);
5212 for (elt = j; elt < vec_len; elt++)
5214 rf = &repeat_factor_vec[elt];
5215 print_generic_expr (dump_file, rf->factor, 0);
5216 if (elt < vec_len - 1)
5217 fputs (" * ", dump_file);
5219 fputs ("\n", dump_file);
5222 else
5224 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5225 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5226 build_int_cst (integer_type_node,
5227 power));
5228 gimple_call_set_lhs (pow_stmt, iter_result);
5229 gimple_set_location (pow_stmt, gimple_location (stmt));
5230 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5231 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5233 if (dump_file && (dump_flags & TDF_DETAILS))
5235 unsigned elt;
5236 repeat_factor *rf;
5237 fputs ("Building __builtin_pow call for cached product (",
5238 dump_file);
5239 for (elt = j; elt < vec_len; elt++)
5241 rf = &repeat_factor_vec[elt];
5242 print_generic_expr (dump_file, rf->factor, 0);
5243 if (elt < vec_len - 1)
5244 fputs (" * ", dump_file);
5246 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5247 power);
5251 else
5253 /* Otherwise, find the first factor in the repeated factor
5254 vector whose occurrence count is at least 2. If no such
5255 factor exists, there are no builtin_powi opportunities
5256 remaining. */
5257 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5259 if (rf1->count >= 2)
5260 break;
5263 if (j >= vec_len)
5264 break;
5266 power = rf1->count;
5268 if (dump_file && (dump_flags & TDF_DETAILS))
5270 unsigned elt;
5271 repeat_factor *rf;
5272 fputs ("Building __builtin_pow call for (", dump_file);
5273 for (elt = j; elt < vec_len; elt++)
5275 rf = &repeat_factor_vec[elt];
5276 print_generic_expr (dump_file, rf->factor, 0);
5277 if (elt < vec_len - 1)
5278 fputs (" * ", dump_file);
5280 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5283 reassociate_stats.pows_created++;
5285 /* Visit each element of the vector in reverse order (so that
5286 high-occurrence elements are visited first, and within the
5287 same occurrence count, lower-ranked elements are visited
5288 first). Form a linear product of all elements in this order
5289 whose occurrencce count is at least that of element J.
5290 Record the SSA name representing the product of each element
5291 with all subsequent elements in the vector. */
5292 if (j == vec_len - 1)
5293 rf1->repr = rf1->factor;
5294 else
5296 for (ii = vec_len - 2; ii >= (int)j; ii--)
5298 tree op1, op2;
5300 rf1 = &repeat_factor_vec[ii];
5301 rf2 = &repeat_factor_vec[ii + 1];
5303 /* Init the last factor's representative to be itself. */
5304 if (!rf2->repr)
5305 rf2->repr = rf2->factor;
5307 op1 = rf1->factor;
5308 op2 = rf2->repr;
5310 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5311 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5312 op1, op2);
5313 gimple_set_location (mul_stmt, gimple_location (stmt));
5314 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5315 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5316 rf1->repr = target_ssa;
5318 /* Don't reprocess the multiply we just introduced. */
5319 gimple_set_visited (mul_stmt, true);
5323 /* Form a call to __builtin_powi for the maximum product
5324 just formed, raised to the power obtained earlier. */
5325 rf1 = &repeat_factor_vec[j];
5326 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5327 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5328 build_int_cst (integer_type_node,
5329 power));
5330 gimple_call_set_lhs (pow_stmt, iter_result);
5331 gimple_set_location (pow_stmt, gimple_location (stmt));
5332 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5333 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5336 /* If we previously formed at least one other builtin_powi call,
5337 form the product of this one and those others. */
5338 if (result)
5340 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5341 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5342 result, iter_result);
5343 gimple_set_location (mul_stmt, gimple_location (stmt));
5344 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5345 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5346 gimple_set_visited (mul_stmt, true);
5347 result = new_result;
5349 else
5350 result = iter_result;
5352 /* Decrement the occurrence count of each element in the product
5353 by the count found above, and remove this many copies of each
5354 factor from OPS. */
5355 for (i = j; i < vec_len; i++)
5357 unsigned k = power;
5358 unsigned n;
5360 rf1 = &repeat_factor_vec[i];
5361 rf1->count -= power;
5363 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5365 if (oe->op == rf1->factor)
5367 if (oe->count <= k)
5369 ops->ordered_remove (n);
5370 k -= oe->count;
5372 if (k == 0)
5373 break;
5375 else
5377 oe->count -= k;
5378 break;
5385 /* At this point all elements in the repeated factor vector have a
5386 remaining occurrence count of 0 or 1, and those with a count of 1
5387 don't have cached representatives. Re-sort the ops vector and
5388 clean up. */
5389 ops->qsort (sort_by_operand_rank);
5390 repeat_factor_vec.release ();
5392 /* Return the final product computed herein. Note that there may
5393 still be some elements with single occurrence count left in OPS;
5394 those will be handled by the normal reassociation logic. */
5395 return result;
5398 /* Attempt to optimize
5399 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5400 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5402 static void
5403 attempt_builtin_copysign (vec<operand_entry *> *ops)
5405 operand_entry *oe;
5406 unsigned int i;
5407 unsigned int length = ops->length ();
5408 tree cst = ops->last ()->op;
5410 if (length == 1 || TREE_CODE (cst) != REAL_CST)
5411 return;
5413 FOR_EACH_VEC_ELT (*ops, i, oe)
5415 if (TREE_CODE (oe->op) == SSA_NAME
5416 && has_single_use (oe->op))
5418 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5419 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5421 tree arg0, arg1;
5422 switch (gimple_call_combined_fn (old_call))
5424 CASE_CFN_COPYSIGN:
5425 arg0 = gimple_call_arg (old_call, 0);
5426 arg1 = gimple_call_arg (old_call, 1);
5427 /* The first argument of copysign must be a constant,
5428 otherwise there's nothing to do. */
5429 if (TREE_CODE (arg0) == REAL_CST)
5431 tree type = TREE_TYPE (arg0);
5432 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5433 /* If we couldn't fold to a single constant, skip it.
5434 That happens e.g. for inexact multiplication when
5435 -frounding-math. */
5436 if (mul == NULL_TREE)
5437 break;
5438 /* Instead of adjusting OLD_CALL, let's build a new
5439 call to not leak the LHS and prevent keeping bogus
5440 debug statements. DCE will clean up the old call. */
5441 gcall *new_call;
5442 if (gimple_call_internal_p (old_call))
5443 new_call = gimple_build_call_internal
5444 (IFN_COPYSIGN, 2, mul, arg1);
5445 else
5446 new_call = gimple_build_call
5447 (gimple_call_fndecl (old_call), 2, mul, arg1);
5448 tree lhs = make_ssa_name (type);
5449 gimple_call_set_lhs (new_call, lhs);
5450 gimple_set_location (new_call,
5451 gimple_location (old_call));
5452 insert_stmt_after (new_call, old_call);
5453 /* We've used the constant, get rid of it. */
5454 ops->pop ();
5455 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5456 /* Handle the CST1 < 0 case by negating the result. */
5457 if (cst1_neg)
5459 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5460 gimple *negate_stmt
5461 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5462 insert_stmt_after (negate_stmt, new_call);
5463 oe->op = negrhs;
5465 else
5466 oe->op = lhs;
5467 if (dump_file && (dump_flags & TDF_DETAILS))
5469 fprintf (dump_file, "Optimizing copysign: ");
5470 print_generic_expr (dump_file, cst, 0);
5471 fprintf (dump_file, " * COPYSIGN (");
5472 print_generic_expr (dump_file, arg0, 0);
5473 fprintf (dump_file, ", ");
5474 print_generic_expr (dump_file, arg1, 0);
5475 fprintf (dump_file, ") into %sCOPYSIGN (",
5476 cst1_neg ? "-" : "");
5477 print_generic_expr (dump_file, mul, 0);
5478 fprintf (dump_file, ", ");
5479 print_generic_expr (dump_file, arg1, 0);
5480 fprintf (dump_file, "\n");
5482 return;
5484 break;
5485 default:
5486 break;
5493 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5495 static void
5496 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5498 tree rhs1;
5500 if (dump_file && (dump_flags & TDF_DETAILS))
5502 fprintf (dump_file, "Transforming ");
5503 print_gimple_stmt (dump_file, stmt, 0, 0);
5506 rhs1 = gimple_assign_rhs1 (stmt);
5507 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5508 update_stmt (stmt);
5509 remove_visited_stmt_chain (rhs1);
5511 if (dump_file && (dump_flags & TDF_DETAILS))
5513 fprintf (dump_file, " into ");
5514 print_gimple_stmt (dump_file, stmt, 0, 0);
5518 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5520 static void
5521 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5522 tree rhs1, tree rhs2)
5524 if (dump_file && (dump_flags & TDF_DETAILS))
5526 fprintf (dump_file, "Transforming ");
5527 print_gimple_stmt (dump_file, stmt, 0, 0);
5530 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5531 update_stmt (gsi_stmt (*gsi));
5532 remove_visited_stmt_chain (rhs1);
5534 if (dump_file && (dump_flags & TDF_DETAILS))
5536 fprintf (dump_file, " into ");
5537 print_gimple_stmt (dump_file, stmt, 0, 0);
5541 /* Reassociate expressions in basic block BB and its post-dominator as
5542 children.
5544 Bubble up return status from maybe_optimize_range_tests. */
5546 static bool
5547 reassociate_bb (basic_block bb)
5549 gimple_stmt_iterator gsi;
5550 basic_block son;
5551 gimple *stmt = last_stmt (bb);
5552 bool cfg_cleanup_needed = false;
5554 if (stmt && !gimple_visited_p (stmt))
5555 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5557 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5559 stmt = gsi_stmt (gsi);
5561 if (is_gimple_assign (stmt)
5562 && !stmt_could_throw_p (stmt))
5564 tree lhs, rhs1, rhs2;
5565 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5567 /* If this is not a gimple binary expression, there is
5568 nothing for us to do with it. */
5569 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5570 continue;
5572 /* If this was part of an already processed statement,
5573 we don't need to touch it again. */
5574 if (gimple_visited_p (stmt))
5576 /* This statement might have become dead because of previous
5577 reassociations. */
5578 if (has_zero_uses (gimple_get_lhs (stmt)))
5580 reassoc_remove_stmt (&gsi);
5581 release_defs (stmt);
5582 /* We might end up removing the last stmt above which
5583 places the iterator to the end of the sequence.
5584 Reset it to the last stmt in this case which might
5585 be the end of the sequence as well if we removed
5586 the last statement of the sequence. In which case
5587 we need to bail out. */
5588 if (gsi_end_p (gsi))
5590 gsi = gsi_last_bb (bb);
5591 if (gsi_end_p (gsi))
5592 break;
5595 continue;
5598 lhs = gimple_assign_lhs (stmt);
5599 rhs1 = gimple_assign_rhs1 (stmt);
5600 rhs2 = gimple_assign_rhs2 (stmt);
5602 /* For non-bit or min/max operations we can't associate
5603 all types. Verify that here. */
5604 if (rhs_code != BIT_IOR_EXPR
5605 && rhs_code != BIT_AND_EXPR
5606 && rhs_code != BIT_XOR_EXPR
5607 && rhs_code != MIN_EXPR
5608 && rhs_code != MAX_EXPR
5609 && (!can_reassociate_p (lhs)
5610 || !can_reassociate_p (rhs1)
5611 || !can_reassociate_p (rhs2)))
5612 continue;
5614 if (associative_tree_code (rhs_code))
5616 auto_vec<operand_entry *> ops;
5617 tree powi_result = NULL_TREE;
5618 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5620 /* There may be no immediate uses left by the time we
5621 get here because we may have eliminated them all. */
5622 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5623 continue;
5625 gimple_set_visited (stmt, true);
5626 linearize_expr_tree (&ops, stmt, true, true);
5627 ops.qsort (sort_by_operand_rank);
5628 optimize_ops_list (rhs_code, &ops);
5629 if (undistribute_ops_list (rhs_code, &ops,
5630 loop_containing_stmt (stmt)))
5632 ops.qsort (sort_by_operand_rank);
5633 optimize_ops_list (rhs_code, &ops);
5636 if (rhs_code == PLUS_EXPR
5637 && transform_add_to_multiply (&ops))
5638 ops.qsort (sort_by_operand_rank);
5640 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5642 if (is_vector)
5643 optimize_vec_cond_expr (rhs_code, &ops);
5644 else
5645 optimize_range_tests (rhs_code, &ops);
5648 if (rhs_code == MULT_EXPR && !is_vector)
5650 attempt_builtin_copysign (&ops);
5652 if (reassoc_insert_powi_p
5653 && flag_unsafe_math_optimizations)
5654 powi_result = attempt_builtin_powi (stmt, &ops);
5657 operand_entry *last;
5658 bool negate_result = false;
5659 if (ops.length () > 1
5660 && rhs_code == MULT_EXPR)
5662 last = ops.last ();
5663 if ((integer_minus_onep (last->op)
5664 || real_minus_onep (last->op))
5665 && !HONOR_SNANS (TREE_TYPE (lhs))
5666 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5667 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5669 ops.pop ();
5670 negate_result = true;
5674 tree new_lhs = lhs;
5675 /* If the operand vector is now empty, all operands were
5676 consumed by the __builtin_powi optimization. */
5677 if (ops.length () == 0)
5678 transform_stmt_to_copy (&gsi, stmt, powi_result);
5679 else if (ops.length () == 1)
5681 tree last_op = ops.last ()->op;
5683 /* If the stmt that defines operand has to be inserted, insert it
5684 before the use. */
5685 if (ops.last ()->stmt_to_insert)
5686 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5687 if (powi_result)
5688 transform_stmt_to_multiply (&gsi, stmt, last_op,
5689 powi_result);
5690 else
5691 transform_stmt_to_copy (&gsi, stmt, last_op);
5693 else
5695 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5696 int ops_num = ops.length ();
5697 int width = get_reassociation_width (ops_num, rhs_code, mode);
5699 if (dump_file && (dump_flags & TDF_DETAILS))
5700 fprintf (dump_file,
5701 "Width = %d was chosen for reassociation\n", width);
5703 if (width > 1
5704 && ops.length () > 3)
5705 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5706 width, ops);
5707 else
5709 /* When there are three operands left, we want
5710 to make sure the ones that get the double
5711 binary op are chosen wisely. */
5712 int len = ops.length ();
5713 if (len >= 3)
5714 swap_ops_for_binary_stmt (ops, len - 3, stmt);
5716 new_lhs = rewrite_expr_tree (stmt, 0, ops,
5717 powi_result != NULL
5718 || negate_result);
5721 /* If we combined some repeated factors into a
5722 __builtin_powi call, multiply that result by the
5723 reassociated operands. */
5724 if (powi_result)
5726 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5727 tree type = TREE_TYPE (lhs);
5728 tree target_ssa = make_temp_ssa_name (type, NULL,
5729 "reassocpow");
5730 gimple_set_lhs (lhs_stmt, target_ssa);
5731 update_stmt (lhs_stmt);
5732 if (lhs != new_lhs)
5734 target_ssa = new_lhs;
5735 new_lhs = lhs;
5737 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5738 powi_result, target_ssa);
5739 gimple_set_location (mul_stmt, gimple_location (stmt));
5740 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5741 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5745 if (negate_result)
5747 stmt = SSA_NAME_DEF_STMT (lhs);
5748 tree tmp = make_ssa_name (TREE_TYPE (lhs));
5749 gimple_set_lhs (stmt, tmp);
5750 if (lhs != new_lhs)
5751 tmp = new_lhs;
5752 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
5753 tmp);
5754 gimple_set_uid (neg_stmt, gimple_uid (stmt));
5755 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
5756 update_stmt (stmt);
5761 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5762 son;
5763 son = next_dom_son (CDI_POST_DOMINATORS, son))
5764 cfg_cleanup_needed |= reassociate_bb (son);
5766 return cfg_cleanup_needed;
5769 /* Add jumps around shifts for range tests turned into bit tests.
5770 For each SSA_NAME VAR we have code like:
5771 VAR = ...; // final stmt of range comparison
5772 // bit test here...;
5773 OTHERVAR = ...; // final stmt of the bit test sequence
5774 RES = VAR | OTHERVAR;
5775 Turn the above into:
5776 VAR = ...;
5777 if (VAR != 0)
5778 goto <l3>;
5779 else
5780 goto <l2>;
5781 <l2>:
5782 // bit test here...;
5783 OTHERVAR = ...;
5784 <l3>:
5785 # RES = PHI<1(l1), OTHERVAR(l2)>; */
5787 static void
5788 branch_fixup (void)
5790 tree var;
5791 unsigned int i;
5793 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
5795 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
5796 gimple *use_stmt;
5797 use_operand_p use;
5798 bool ok = single_imm_use (var, &use, &use_stmt);
5799 gcc_assert (ok
5800 && is_gimple_assign (use_stmt)
5801 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5802 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5804 basic_block cond_bb = gimple_bb (def_stmt);
5805 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5806 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5808 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5809 gimple *g = gimple_build_cond (NE_EXPR, var,
5810 build_zero_cst (TREE_TYPE (var)),
5811 NULL_TREE, NULL_TREE);
5812 location_t loc = gimple_location (use_stmt);
5813 gimple_set_location (g, loc);
5814 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5816 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5817 etrue->probability = REG_BR_PROB_BASE / 2;
5818 etrue->count = cond_bb->count / 2;
5819 edge efalse = find_edge (cond_bb, then_bb);
5820 efalse->flags = EDGE_FALSE_VALUE;
5821 efalse->probability -= etrue->probability;
5822 efalse->count -= etrue->count;
5823 then_bb->count -= etrue->count;
5825 tree othervar = NULL_TREE;
5826 if (gimple_assign_rhs1 (use_stmt) == var)
5827 othervar = gimple_assign_rhs2 (use_stmt);
5828 else if (gimple_assign_rhs2 (use_stmt) == var)
5829 othervar = gimple_assign_rhs1 (use_stmt);
5830 else
5831 gcc_unreachable ();
5832 tree lhs = gimple_assign_lhs (use_stmt);
5833 gphi *phi = create_phi_node (lhs, merge_bb);
5834 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5835 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5836 gsi = gsi_for_stmt (use_stmt);
5837 gsi_remove (&gsi, true);
5839 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5840 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5842 reassoc_branch_fixups.release ();
5845 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5846 void debug_ops_vector (vec<operand_entry *> ops);
5848 /* Dump the operand entry vector OPS to FILE. */
5850 void
5851 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5853 operand_entry *oe;
5854 unsigned int i;
5856 FOR_EACH_VEC_ELT (ops, i, oe)
5858 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5859 print_generic_expr (file, oe->op, 0);
5860 fprintf (file, "\n");
5864 /* Dump the operand entry vector OPS to STDERR. */
5866 DEBUG_FUNCTION void
5867 debug_ops_vector (vec<operand_entry *> ops)
5869 dump_ops_vector (stderr, ops);
5872 /* Bubble up return status from reassociate_bb. */
5874 static bool
5875 do_reassoc (void)
5877 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5878 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5881 /* Initialize the reassociation pass. */
5883 static void
5884 init_reassoc (void)
5886 int i;
5887 long rank = 2;
5888 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5890 /* Find the loops, so that we can prevent moving calculations in
5891 them. */
5892 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5894 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5896 next_operand_entry_id = 0;
5898 /* Reverse RPO (Reverse Post Order) will give us something where
5899 deeper loops come later. */
5900 pre_and_rev_post_order_compute (NULL, bbs, false);
5901 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5902 operand_rank = new hash_map<tree, long>;
5904 /* Give each default definition a distinct rank. This includes
5905 parameters and the static chain. Walk backwards over all
5906 SSA names so that we get proper rank ordering according
5907 to tree_swap_operands_p. */
5908 for (i = num_ssa_names - 1; i > 0; --i)
5910 tree name = ssa_name (i);
5911 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5912 insert_operand_rank (name, ++rank);
5915 /* Set up rank for each BB */
5916 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5917 bb_rank[bbs[i]] = ++rank << 16;
5919 free (bbs);
5920 calculate_dominance_info (CDI_POST_DOMINATORS);
5921 plus_negates = vNULL;
5924 /* Cleanup after the reassociation pass, and print stats if
5925 requested. */
5927 static void
5928 fini_reassoc (void)
5930 statistics_counter_event (cfun, "Linearized",
5931 reassociate_stats.linearized);
5932 statistics_counter_event (cfun, "Constants eliminated",
5933 reassociate_stats.constants_eliminated);
5934 statistics_counter_event (cfun, "Ops eliminated",
5935 reassociate_stats.ops_eliminated);
5936 statistics_counter_event (cfun, "Statements rewritten",
5937 reassociate_stats.rewritten);
5938 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5939 reassociate_stats.pows_encountered);
5940 statistics_counter_event (cfun, "Built-in powi calls created",
5941 reassociate_stats.pows_created);
5943 delete operand_rank;
5944 operand_entry_pool.release ();
5945 free (bb_rank);
5946 plus_negates.release ();
5947 free_dominance_info (CDI_POST_DOMINATORS);
5948 loop_optimizer_finalize ();
5951 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
5952 insertion of __builtin_powi calls.
5954 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5955 optimization of a gimple conditional. Otherwise returns zero. */
5957 static unsigned int
5958 execute_reassoc (bool insert_powi_p)
5960 reassoc_insert_powi_p = insert_powi_p;
5962 init_reassoc ();
5964 bool cfg_cleanup_needed;
5965 cfg_cleanup_needed = do_reassoc ();
5966 repropagate_negates ();
5967 branch_fixup ();
5969 fini_reassoc ();
5970 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
5973 namespace {
5975 const pass_data pass_data_reassoc =
5977 GIMPLE_PASS, /* type */
5978 "reassoc", /* name */
5979 OPTGROUP_NONE, /* optinfo_flags */
5980 TV_TREE_REASSOC, /* tv_id */
5981 ( PROP_cfg | PROP_ssa ), /* properties_required */
5982 0, /* properties_provided */
5983 0, /* properties_destroyed */
5984 0, /* todo_flags_start */
5985 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5988 class pass_reassoc : public gimple_opt_pass
5990 public:
5991 pass_reassoc (gcc::context *ctxt)
5992 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
5995 /* opt_pass methods: */
5996 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5997 void set_pass_param (unsigned int n, bool param)
5999 gcc_assert (n == 0);
6000 insert_powi_p = param;
6002 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
6003 virtual unsigned int execute (function *)
6004 { return execute_reassoc (insert_powi_p); }
6006 private:
6007 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6008 point 3a in the pass header comment. */
6009 bool insert_powi_p;
6010 }; // class pass_reassoc
6012 } // anon namespace
6014 gimple_opt_pass *
6015 make_pass_reassoc (gcc::context *ctxt)
6017 return new pass_reassoc (ctxt);