gcc/ChangeLog:
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob561acea4dcc14d6bc25e0c6812ffdefdd630aa14
1 /* Reassociation for trees.
2 Copyright (C) 2005-2017 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "params.h"
52 #include "builtins.h"
53 #include "gimplify.h"
54 #include "case-cfn-macros.h"
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
60 It consists of five steps:
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_*
70 3. Optimization of the operand lists, eliminating things like a +
71 -a, a & a, etc.
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
80 5. Repropagate negates, as nothing else will clean it up ATM.
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
101 a (1), b (1), c (1), d (2), e (2)
104 We start with our merge worklist empty, and the ops list with all of
105 those on it.
107 You want to first merge all leaves of the same rank, as much as
108 possible.
110 So first build a binary op of
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
122 Then build a binary op of d + e
123 mergetmp2 = d + e
125 and put mergetmp2 on the merge worklist.
127 so merge worklist = {mergetmp, c, mergetmp2}
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
132 So we have
134 build binary op
135 mergetmp3 = mergetmp + c
137 worklist = {mergetmp2, mergetmp3}
139 mergetmp4 = mergetmp2 + mergetmp3
141 worklist = {mergetmp4}
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
152 So why don't we do this?
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
160 mergetmp = op1 + op2
161 newstmt = mergetmp + op3
163 instead of
164 mergetmp = op2 + op3
165 newstmt = mergetmp + op1
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
169 can do.
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179 static bool reassoc_insert_powi_p;
181 /* Statistics */
182 static struct
184 int linearized;
185 int constants_eliminated;
186 int ops_eliminated;
187 int rewritten;
188 int pows_encountered;
189 int pows_created;
190 } reassociate_stats;
192 /* Operator, rank pair. */
193 struct operand_entry
195 unsigned int rank;
196 unsigned int id;
197 tree op;
198 unsigned int count;
199 gimple *stmt_to_insert;
202 static object_allocator<operand_entry> operand_entry_pool
203 ("operand entry pool");
205 /* This is used to assign a unique ID to each struct operand_entry
206 so that qsort results are identical on different hosts. */
207 static unsigned int next_operand_entry_id;
209 /* Starting rank number for a given basic block, so that we can rank
210 operations using unmovable instructions in that BB based on the bb
211 depth. */
212 static long *bb_rank;
214 /* Operand->rank hashtable. */
215 static hash_map<tree, long> *operand_rank;
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218 all basic blocks the CFG should be adjusted - basic blocks
219 split right after that SSA_NAME's definition statement and before
220 the only use, which must be a bit ior. */
221 static vec<tree> reassoc_branch_fixups;
223 /* Forward decls. */
224 static long get_rank (tree);
225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228 possibly added by gsi_remove. */
230 bool
231 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
233 gimple *stmt = gsi_stmt (*gsi);
235 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
236 return gsi_remove (gsi, true);
238 gimple_stmt_iterator prev = *gsi;
239 gsi_prev (&prev);
240 unsigned uid = gimple_uid (stmt);
241 basic_block bb = gimple_bb (stmt);
242 bool ret = gsi_remove (gsi, true);
243 if (!gsi_end_p (prev))
244 gsi_next (&prev);
245 else
246 prev = gsi_start_bb (bb);
247 gimple *end_stmt = gsi_stmt (*gsi);
248 while ((stmt = gsi_stmt (prev)) != end_stmt)
250 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
251 gimple_set_uid (stmt, uid);
252 gsi_next (&prev);
254 return ret;
257 /* Bias amount for loop-carried phis. We want this to be larger than
258 the depth of any reassociation tree we can see, but not larger than
259 the rank difference between two blocks. */
260 #define PHI_LOOP_BIAS (1 << 15)
262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
263 an innermost loop, and the phi has only a single use which is inside
264 the loop, then the rank is the block rank of the loop latch plus an
265 extra bias for the loop-carried dependence. This causes expressions
266 calculated into an accumulator variable to be independent for each
267 iteration of the loop. If STMT is some other phi, the rank is the
268 block rank of its containing block. */
269 static long
270 phi_rank (gimple *stmt)
272 basic_block bb = gimple_bb (stmt);
273 struct loop *father = bb->loop_father;
274 tree res;
275 unsigned i;
276 use_operand_p use;
277 gimple *use_stmt;
279 /* We only care about real loops (those with a latch). */
280 if (!father->latch)
281 return bb_rank[bb->index];
283 /* Interesting phis must be in headers of innermost loops. */
284 if (bb != father->header
285 || father->inner)
286 return bb_rank[bb->index];
288 /* Ignore virtual SSA_NAMEs. */
289 res = gimple_phi_result (stmt);
290 if (virtual_operand_p (res))
291 return bb_rank[bb->index];
293 /* The phi definition must have a single use, and that use must be
294 within the loop. Otherwise this isn't an accumulator pattern. */
295 if (!single_imm_use (res, &use, &use_stmt)
296 || gimple_bb (use_stmt)->loop_father != father)
297 return bb_rank[bb->index];
299 /* Look for phi arguments from within the loop. If found, bias this phi. */
300 for (i = 0; i < gimple_phi_num_args (stmt); i++)
302 tree arg = gimple_phi_arg_def (stmt, i);
303 if (TREE_CODE (arg) == SSA_NAME
304 && !SSA_NAME_IS_DEFAULT_DEF (arg))
306 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
307 if (gimple_bb (def_stmt)->loop_father == father)
308 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
312 /* Must be an uninteresting phi. */
313 return bb_rank[bb->index];
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317 loop-carried dependence of an innermost loop, return TRUE; else
318 return FALSE. */
319 static bool
320 loop_carried_phi (tree exp)
322 gimple *phi_stmt;
323 long block_rank;
325 if (TREE_CODE (exp) != SSA_NAME
326 || SSA_NAME_IS_DEFAULT_DEF (exp))
327 return false;
329 phi_stmt = SSA_NAME_DEF_STMT (exp);
331 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
332 return false;
334 /* Non-loop-carried phis have block rank. Loop-carried phis have
335 an additional bias added in. If this phi doesn't have block rank,
336 it's biased and should not be propagated. */
337 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
339 if (phi_rank (phi_stmt) != block_rank)
340 return true;
342 return false;
345 /* Return the maximum of RANK and the rank that should be propagated
346 from expression OP. For most operands, this is just the rank of OP.
347 For loop-carried phis, the value is zero to avoid undoing the bias
348 in favor of the phi. */
349 static long
350 propagate_rank (long rank, tree op)
352 long op_rank;
354 if (loop_carried_phi (op))
355 return rank;
357 op_rank = get_rank (op);
359 return MAX (rank, op_rank);
362 /* Look up the operand rank structure for expression E. */
364 static inline long
365 find_operand_rank (tree e)
367 long *slot = operand_rank->get (e);
368 return slot ? *slot : -1;
371 /* Insert {E,RANK} into the operand rank hashtable. */
373 static inline void
374 insert_operand_rank (tree e, long rank)
376 gcc_assert (rank > 0);
377 gcc_assert (!operand_rank->put (e, rank));
380 /* Given an expression E, return the rank of the expression. */
382 static long
383 get_rank (tree e)
385 /* SSA_NAME's have the rank of the expression they are the result
387 For globals and uninitialized values, the rank is 0.
388 For function arguments, use the pre-setup rank.
389 For PHI nodes, stores, asm statements, etc, we use the rank of
390 the BB.
391 For simple operations, the rank is the maximum rank of any of
392 its operands, or the bb_rank, whichever is less.
393 I make no claims that this is optimal, however, it gives good
394 results. */
396 /* We make an exception to the normal ranking system to break
397 dependences of accumulator variables in loops. Suppose we
398 have a simple one-block loop containing:
400 x_1 = phi(x_0, x_2)
401 b = a + x_1
402 c = b + d
403 x_2 = c + e
405 As shown, each iteration of the calculation into x is fully
406 dependent upon the iteration before it. We would prefer to
407 see this in the form:
409 x_1 = phi(x_0, x_2)
410 b = a + d
411 c = b + e
412 x_2 = c + x_1
414 If the loop is unrolled, the calculations of b and c from
415 different iterations can be interleaved.
417 To obtain this result during reassociation, we bias the rank
418 of the phi definition x_1 upward, when it is recognized as an
419 accumulator pattern. The artificial rank causes it to be
420 added last, providing the desired independence. */
422 if (TREE_CODE (e) == SSA_NAME)
424 ssa_op_iter iter;
425 gimple *stmt;
426 long rank;
427 tree op;
429 if (SSA_NAME_IS_DEFAULT_DEF (e))
430 return find_operand_rank (e);
432 stmt = SSA_NAME_DEF_STMT (e);
433 if (gimple_code (stmt) == GIMPLE_PHI)
434 return phi_rank (stmt);
436 if (!is_gimple_assign (stmt))
437 return bb_rank[gimple_bb (stmt)->index];
439 /* If we already have a rank for this expression, use that. */
440 rank = find_operand_rank (e);
441 if (rank != -1)
442 return rank;
444 /* Otherwise, find the maximum rank for the operands. As an
445 exception, remove the bias from loop-carried phis when propagating
446 the rank so that dependent operations are not also biased. */
447 /* Simply walk over all SSA uses - this takes advatage of the
448 fact that non-SSA operands are is_gimple_min_invariant and
449 thus have rank 0. */
450 rank = 0;
451 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
452 rank = propagate_rank (rank, op);
454 if (dump_file && (dump_flags & TDF_DETAILS))
456 fprintf (dump_file, "Rank for ");
457 print_generic_expr (dump_file, e);
458 fprintf (dump_file, " is %ld\n", (rank + 1));
461 /* Note the rank in the hashtable so we don't recompute it. */
462 insert_operand_rank (e, (rank + 1));
463 return (rank + 1);
466 /* Constants, globals, etc., are rank 0 */
467 return 0;
471 /* We want integer ones to end up last no matter what, since they are
472 the ones we can do the most with. */
473 #define INTEGER_CONST_TYPE 1 << 3
474 #define FLOAT_CONST_TYPE 1 << 2
475 #define OTHER_CONST_TYPE 1 << 1
477 /* Classify an invariant tree into integer, float, or other, so that
478 we can sort them to be near other constants of the same type. */
479 static inline int
480 constant_type (tree t)
482 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
483 return INTEGER_CONST_TYPE;
484 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
485 return FLOAT_CONST_TYPE;
486 else
487 return OTHER_CONST_TYPE;
490 /* qsort comparison function to sort operand entries PA and PB by rank
491 so that the sorted array is ordered by rank in decreasing order. */
492 static int
493 sort_by_operand_rank (const void *pa, const void *pb)
495 const operand_entry *oea = *(const operand_entry *const *)pa;
496 const operand_entry *oeb = *(const operand_entry *const *)pb;
498 /* It's nicer for optimize_expression if constants that are likely
499 to fold when added/multiplied//whatever are put next to each
500 other. Since all constants have rank 0, order them by type. */
501 if (oeb->rank == 0 && oea->rank == 0)
503 if (constant_type (oeb->op) != constant_type (oea->op))
504 return constant_type (oeb->op) - constant_type (oea->op);
505 else
506 /* To make sorting result stable, we use unique IDs to determine
507 order. */
508 return oeb->id > oea->id ? 1 : -1;
511 /* Lastly, make sure the versions that are the same go next to each
512 other. */
513 if (oeb->rank == oea->rank
514 && TREE_CODE (oea->op) == SSA_NAME
515 && TREE_CODE (oeb->op) == SSA_NAME)
517 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
518 versions of removed SSA_NAMEs, so if possible, prefer to sort
519 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
520 See PR60418. */
521 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
522 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
523 && !oea->stmt_to_insert
524 && !oeb->stmt_to_insert
525 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
527 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
528 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
529 basic_block bba = gimple_bb (stmta);
530 basic_block bbb = gimple_bb (stmtb);
531 if (bbb != bba)
533 if (bb_rank[bbb->index] != bb_rank[bba->index])
534 return bb_rank[bbb->index] - bb_rank[bba->index];
536 else
538 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
539 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
540 if (da != db)
541 return da ? 1 : -1;
545 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
546 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
547 else
548 return oeb->id > oea->id ? 1 : -1;
551 if (oeb->rank != oea->rank)
552 return oeb->rank > oea->rank ? 1 : -1;
553 else
554 return oeb->id > oea->id ? 1 : -1;
557 /* Add an operand entry to *OPS for the tree operand OP. */
559 static void
560 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
562 operand_entry *oe = operand_entry_pool.allocate ();
564 oe->op = op;
565 oe->rank = get_rank (op);
566 oe->id = next_operand_entry_id++;
567 oe->count = 1;
568 oe->stmt_to_insert = stmt_to_insert;
569 ops->safe_push (oe);
572 /* Add an operand entry to *OPS for the tree operand OP with repeat
573 count REPEAT. */
575 static void
576 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
577 HOST_WIDE_INT repeat)
579 operand_entry *oe = operand_entry_pool.allocate ();
581 oe->op = op;
582 oe->rank = get_rank (op);
583 oe->id = next_operand_entry_id++;
584 oe->count = repeat;
585 oe->stmt_to_insert = NULL;
586 ops->safe_push (oe);
588 reassociate_stats.pows_encountered++;
591 /* Return true if STMT is reassociable operation containing a binary
592 operation with tree code CODE, and is inside LOOP. */
594 static bool
595 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
597 basic_block bb = gimple_bb (stmt);
599 if (gimple_bb (stmt) == NULL)
600 return false;
602 if (!flow_bb_inside_loop_p (loop, bb))
603 return false;
605 if (is_gimple_assign (stmt)
606 && gimple_assign_rhs_code (stmt) == code
607 && has_single_use (gimple_assign_lhs (stmt)))
609 tree rhs1 = gimple_assign_rhs1 (stmt);
610 tree rhs2 = gimple_assign_rhs1 (stmt);
611 if (TREE_CODE (rhs1) == SSA_NAME
612 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
613 return false;
614 if (rhs2
615 && TREE_CODE (rhs2) == SSA_NAME
616 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
617 return false;
618 return true;
621 return false;
625 /* Return true if STMT is a nop-conversion. */
627 static bool
628 gimple_nop_conversion_p (gimple *stmt)
630 if (gassign *ass = dyn_cast <gassign *> (stmt))
632 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
633 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
634 TREE_TYPE (gimple_assign_rhs1 (ass))))
635 return true;
637 return false;
640 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
641 operand of the negate operation. Otherwise, return NULL. */
643 static tree
644 get_unary_op (tree name, enum tree_code opcode)
646 gimple *stmt = SSA_NAME_DEF_STMT (name);
648 /* Look through nop conversions (sign changes). */
649 if (gimple_nop_conversion_p (stmt)
650 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
651 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
653 if (!is_gimple_assign (stmt))
654 return NULL_TREE;
656 if (gimple_assign_rhs_code (stmt) == opcode)
657 return gimple_assign_rhs1 (stmt);
658 return NULL_TREE;
661 /* Return true if OP1 and OP2 have the same value if casted to either type. */
663 static bool
664 ops_equal_values_p (tree op1, tree op2)
666 if (op1 == op2)
667 return true;
669 tree orig_op1 = op1;
670 if (TREE_CODE (op1) == SSA_NAME)
672 gimple *stmt = SSA_NAME_DEF_STMT (op1);
673 if (gimple_nop_conversion_p (stmt))
675 op1 = gimple_assign_rhs1 (stmt);
676 if (op1 == op2)
677 return true;
681 if (TREE_CODE (op2) == SSA_NAME)
683 gimple *stmt = SSA_NAME_DEF_STMT (op2);
684 if (gimple_nop_conversion_p (stmt))
686 op2 = gimple_assign_rhs1 (stmt);
687 if (op1 == op2
688 || orig_op1 == op2)
689 return true;
693 return false;
697 /* If CURR and LAST are a pair of ops that OPCODE allows us to
698 eliminate through equivalences, do so, remove them from OPS, and
699 return true. Otherwise, return false. */
701 static bool
702 eliminate_duplicate_pair (enum tree_code opcode,
703 vec<operand_entry *> *ops,
704 bool *all_done,
705 unsigned int i,
706 operand_entry *curr,
707 operand_entry *last)
710 /* If we have two of the same op, and the opcode is & |, min, or max,
711 we can eliminate one of them.
712 If we have two of the same op, and the opcode is ^, we can
713 eliminate both of them. */
715 if (last && last->op == curr->op)
717 switch (opcode)
719 case MAX_EXPR:
720 case MIN_EXPR:
721 case BIT_IOR_EXPR:
722 case BIT_AND_EXPR:
723 if (dump_file && (dump_flags & TDF_DETAILS))
725 fprintf (dump_file, "Equivalence: ");
726 print_generic_expr (dump_file, curr->op);
727 fprintf (dump_file, " [&|minmax] ");
728 print_generic_expr (dump_file, last->op);
729 fprintf (dump_file, " -> ");
730 print_generic_stmt (dump_file, last->op);
733 ops->ordered_remove (i);
734 reassociate_stats.ops_eliminated ++;
736 return true;
738 case BIT_XOR_EXPR:
739 if (dump_file && (dump_flags & TDF_DETAILS))
741 fprintf (dump_file, "Equivalence: ");
742 print_generic_expr (dump_file, curr->op);
743 fprintf (dump_file, " ^ ");
744 print_generic_expr (dump_file, last->op);
745 fprintf (dump_file, " -> nothing\n");
748 reassociate_stats.ops_eliminated += 2;
750 if (ops->length () == 2)
752 ops->truncate (0);
753 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
754 *all_done = true;
756 else
758 ops->ordered_remove (i-1);
759 ops->ordered_remove (i-1);
762 return true;
764 default:
765 break;
768 return false;
771 static vec<tree> plus_negates;
773 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
774 expression, look in OPS for a corresponding positive operation to cancel
775 it out. If we find one, remove the other from OPS, replace
776 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
777 return false. */
779 static bool
780 eliminate_plus_minus_pair (enum tree_code opcode,
781 vec<operand_entry *> *ops,
782 unsigned int currindex,
783 operand_entry *curr)
785 tree negateop;
786 tree notop;
787 unsigned int i;
788 operand_entry *oe;
790 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
791 return false;
793 negateop = get_unary_op (curr->op, NEGATE_EXPR);
794 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
795 if (negateop == NULL_TREE && notop == NULL_TREE)
796 return false;
798 /* Any non-negated version will have a rank that is one less than
799 the current rank. So once we hit those ranks, if we don't find
800 one, we can stop. */
802 for (i = currindex + 1;
803 ops->iterate (i, &oe)
804 && oe->rank >= curr->rank - 1 ;
805 i++)
807 if (negateop
808 && ops_equal_values_p (oe->op, negateop))
810 if (dump_file && (dump_flags & TDF_DETAILS))
812 fprintf (dump_file, "Equivalence: ");
813 print_generic_expr (dump_file, negateop);
814 fprintf (dump_file, " + -");
815 print_generic_expr (dump_file, oe->op);
816 fprintf (dump_file, " -> 0\n");
819 ops->ordered_remove (i);
820 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
821 ops->ordered_remove (currindex);
822 reassociate_stats.ops_eliminated ++;
824 return true;
826 else if (notop
827 && ops_equal_values_p (oe->op, notop))
829 tree op_type = TREE_TYPE (oe->op);
831 if (dump_file && (dump_flags & TDF_DETAILS))
833 fprintf (dump_file, "Equivalence: ");
834 print_generic_expr (dump_file, notop);
835 fprintf (dump_file, " + ~");
836 print_generic_expr (dump_file, oe->op);
837 fprintf (dump_file, " -> -1\n");
840 ops->ordered_remove (i);
841 add_to_ops_vec (ops, build_all_ones_cst (op_type));
842 ops->ordered_remove (currindex);
843 reassociate_stats.ops_eliminated ++;
845 return true;
849 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
850 save it for later inspection in repropagate_negates(). */
851 if (negateop != NULL_TREE
852 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
853 plus_negates.safe_push (curr->op);
855 return false;
858 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
859 bitwise not expression, look in OPS for a corresponding operand to
860 cancel it out. If we find one, remove the other from OPS, replace
861 OPS[CURRINDEX] with 0, and return true. Otherwise, return
862 false. */
864 static bool
865 eliminate_not_pairs (enum tree_code opcode,
866 vec<operand_entry *> *ops,
867 unsigned int currindex,
868 operand_entry *curr)
870 tree notop;
871 unsigned int i;
872 operand_entry *oe;
874 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
875 || TREE_CODE (curr->op) != SSA_NAME)
876 return false;
878 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
879 if (notop == NULL_TREE)
880 return false;
882 /* Any non-not version will have a rank that is one less than
883 the current rank. So once we hit those ranks, if we don't find
884 one, we can stop. */
886 for (i = currindex + 1;
887 ops->iterate (i, &oe)
888 && oe->rank >= curr->rank - 1;
889 i++)
891 if (oe->op == notop)
893 if (dump_file && (dump_flags & TDF_DETAILS))
895 fprintf (dump_file, "Equivalence: ");
896 print_generic_expr (dump_file, notop);
897 if (opcode == BIT_AND_EXPR)
898 fprintf (dump_file, " & ~");
899 else if (opcode == BIT_IOR_EXPR)
900 fprintf (dump_file, " | ~");
901 print_generic_expr (dump_file, oe->op);
902 if (opcode == BIT_AND_EXPR)
903 fprintf (dump_file, " -> 0\n");
904 else if (opcode == BIT_IOR_EXPR)
905 fprintf (dump_file, " -> -1\n");
908 if (opcode == BIT_AND_EXPR)
909 oe->op = build_zero_cst (TREE_TYPE (oe->op));
910 else if (opcode == BIT_IOR_EXPR)
911 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
913 reassociate_stats.ops_eliminated += ops->length () - 1;
914 ops->truncate (0);
915 ops->quick_push (oe);
916 return true;
920 return false;
923 /* Use constant value that may be present in OPS to try to eliminate
924 operands. Note that this function is only really used when we've
925 eliminated ops for other reasons, or merged constants. Across
926 single statements, fold already does all of this, plus more. There
927 is little point in duplicating logic, so I've only included the
928 identities that I could ever construct testcases to trigger. */
930 static void
931 eliminate_using_constants (enum tree_code opcode,
932 vec<operand_entry *> *ops)
934 operand_entry *oelast = ops->last ();
935 tree type = TREE_TYPE (oelast->op);
937 if (oelast->rank == 0
938 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
940 switch (opcode)
942 case BIT_AND_EXPR:
943 if (integer_zerop (oelast->op))
945 if (ops->length () != 1)
947 if (dump_file && (dump_flags & TDF_DETAILS))
948 fprintf (dump_file, "Found & 0, removing all other ops\n");
950 reassociate_stats.ops_eliminated += ops->length () - 1;
952 ops->truncate (0);
953 ops->quick_push (oelast);
954 return;
957 else if (integer_all_onesp (oelast->op))
959 if (ops->length () != 1)
961 if (dump_file && (dump_flags & TDF_DETAILS))
962 fprintf (dump_file, "Found & -1, removing\n");
963 ops->pop ();
964 reassociate_stats.ops_eliminated++;
967 break;
968 case BIT_IOR_EXPR:
969 if (integer_all_onesp (oelast->op))
971 if (ops->length () != 1)
973 if (dump_file && (dump_flags & TDF_DETAILS))
974 fprintf (dump_file, "Found | -1, removing all other ops\n");
976 reassociate_stats.ops_eliminated += ops->length () - 1;
978 ops->truncate (0);
979 ops->quick_push (oelast);
980 return;
983 else if (integer_zerop (oelast->op))
985 if (ops->length () != 1)
987 if (dump_file && (dump_flags & TDF_DETAILS))
988 fprintf (dump_file, "Found | 0, removing\n");
989 ops->pop ();
990 reassociate_stats.ops_eliminated++;
993 break;
994 case MULT_EXPR:
995 if (integer_zerop (oelast->op)
996 || (FLOAT_TYPE_P (type)
997 && !HONOR_NANS (type)
998 && !HONOR_SIGNED_ZEROS (type)
999 && real_zerop (oelast->op)))
1001 if (ops->length () != 1)
1003 if (dump_file && (dump_flags & TDF_DETAILS))
1004 fprintf (dump_file, "Found * 0, removing all other ops\n");
1006 reassociate_stats.ops_eliminated += ops->length () - 1;
1007 ops->truncate (1);
1008 ops->quick_push (oelast);
1009 return;
1012 else if (integer_onep (oelast->op)
1013 || (FLOAT_TYPE_P (type)
1014 && !HONOR_SNANS (type)
1015 && real_onep (oelast->op)))
1017 if (ops->length () != 1)
1019 if (dump_file && (dump_flags & TDF_DETAILS))
1020 fprintf (dump_file, "Found * 1, removing\n");
1021 ops->pop ();
1022 reassociate_stats.ops_eliminated++;
1023 return;
1026 break;
1027 case BIT_XOR_EXPR:
1028 case PLUS_EXPR:
1029 case MINUS_EXPR:
1030 if (integer_zerop (oelast->op)
1031 || (FLOAT_TYPE_P (type)
1032 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1033 && fold_real_zero_addition_p (type, oelast->op,
1034 opcode == MINUS_EXPR)))
1036 if (ops->length () != 1)
1038 if (dump_file && (dump_flags & TDF_DETAILS))
1039 fprintf (dump_file, "Found [|^+] 0, removing\n");
1040 ops->pop ();
1041 reassociate_stats.ops_eliminated++;
1042 return;
1045 break;
1046 default:
1047 break;
1053 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1054 bool, bool);
1056 /* Structure for tracking and counting operands. */
1057 struct oecount {
1058 unsigned int cnt;
1059 unsigned int id;
1060 enum tree_code oecode;
1061 tree op;
1065 /* The heap for the oecount hashtable and the sorted list of operands. */
1066 static vec<oecount> cvec;
1069 /* Oecount hashtable helpers. */
1071 struct oecount_hasher : int_hash <int, 0, 1>
1073 static inline hashval_t hash (int);
1074 static inline bool equal (int, int);
1077 /* Hash function for oecount. */
1079 inline hashval_t
1080 oecount_hasher::hash (int p)
1082 const oecount *c = &cvec[p - 42];
1083 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1086 /* Comparison function for oecount. */
1088 inline bool
1089 oecount_hasher::equal (int p1, int p2)
1091 const oecount *c1 = &cvec[p1 - 42];
1092 const oecount *c2 = &cvec[p2 - 42];
1093 return c1->oecode == c2->oecode && c1->op == c2->op;
1096 /* Comparison function for qsort sorting oecount elements by count. */
1098 static int
1099 oecount_cmp (const void *p1, const void *p2)
1101 const oecount *c1 = (const oecount *)p1;
1102 const oecount *c2 = (const oecount *)p2;
1103 if (c1->cnt != c2->cnt)
1104 return c1->cnt > c2->cnt ? 1 : -1;
1105 else
1106 /* If counts are identical, use unique IDs to stabilize qsort. */
1107 return c1->id > c2->id ? 1 : -1;
1110 /* Return TRUE iff STMT represents a builtin call that raises OP
1111 to some exponent. */
1113 static bool
1114 stmt_is_power_of_op (gimple *stmt, tree op)
1116 if (!is_gimple_call (stmt))
1117 return false;
1119 switch (gimple_call_combined_fn (stmt))
1121 CASE_CFN_POW:
1122 CASE_CFN_POWI:
1123 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1125 default:
1126 return false;
1130 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1131 in place and return the result. Assumes that stmt_is_power_of_op
1132 was previously called for STMT and returned TRUE. */
1134 static HOST_WIDE_INT
1135 decrement_power (gimple *stmt)
1137 REAL_VALUE_TYPE c, cint;
1138 HOST_WIDE_INT power;
1139 tree arg1;
1141 switch (gimple_call_combined_fn (stmt))
1143 CASE_CFN_POW:
1144 arg1 = gimple_call_arg (stmt, 1);
1145 c = TREE_REAL_CST (arg1);
1146 power = real_to_integer (&c) - 1;
1147 real_from_integer (&cint, VOIDmode, power, SIGNED);
1148 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1149 return power;
1151 CASE_CFN_POWI:
1152 arg1 = gimple_call_arg (stmt, 1);
1153 power = TREE_INT_CST_LOW (arg1) - 1;
1154 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1155 return power;
1157 default:
1158 gcc_unreachable ();
1162 /* Replace SSA defined by STMT and replace all its uses with new
1163 SSA. Also return the new SSA. */
1165 static tree
1166 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1168 gimple *use_stmt;
1169 use_operand_p use;
1170 imm_use_iterator iter;
1171 tree new_lhs, new_debug_lhs = NULL_TREE;
1172 tree lhs = gimple_get_lhs (stmt);
1174 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1175 gimple_set_lhs (stmt, new_lhs);
1177 /* Also need to update GIMPLE_DEBUGs. */
1178 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1180 tree repl = new_lhs;
1181 if (is_gimple_debug (use_stmt))
1183 if (new_debug_lhs == NULL_TREE)
1185 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1186 gdebug *def_temp
1187 = gimple_build_debug_bind (new_debug_lhs,
1188 build2 (opcode, TREE_TYPE (lhs),
1189 new_lhs, op),
1190 stmt);
1191 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1192 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1193 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1194 gimple_set_uid (def_temp, gimple_uid (stmt));
1195 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1196 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1198 repl = new_debug_lhs;
1200 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1201 SET_USE (use, repl);
1202 update_stmt (use_stmt);
1204 return new_lhs;
1207 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1208 uses with new SSAs. Also do this for the stmt that defines DEF
1209 if *DEF is not OP. */
1211 static void
1212 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1213 vec<gimple *> &stmts_to_fix)
1215 unsigned i;
1216 gimple *stmt;
1218 if (*def != op
1219 && TREE_CODE (*def) == SSA_NAME
1220 && (stmt = SSA_NAME_DEF_STMT (*def))
1221 && gimple_code (stmt) != GIMPLE_NOP)
1222 *def = make_new_ssa_for_def (stmt, opcode, op);
1224 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1225 make_new_ssa_for_def (stmt, opcode, op);
1228 /* Find the single immediate use of STMT's LHS, and replace it
1229 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1230 replace *DEF with OP as well. */
1232 static void
1233 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1235 tree lhs;
1236 gimple *use_stmt;
1237 use_operand_p use;
1238 gimple_stmt_iterator gsi;
1240 if (is_gimple_call (stmt))
1241 lhs = gimple_call_lhs (stmt);
1242 else
1243 lhs = gimple_assign_lhs (stmt);
1245 gcc_assert (has_single_use (lhs));
1246 single_imm_use (lhs, &use, &use_stmt);
1247 if (lhs == *def)
1248 *def = op;
1249 SET_USE (use, op);
1250 if (TREE_CODE (op) != SSA_NAME)
1251 update_stmt (use_stmt);
1252 gsi = gsi_for_stmt (stmt);
1253 unlink_stmt_vdef (stmt);
1254 reassoc_remove_stmt (&gsi);
1255 release_defs (stmt);
1258 /* Walks the linear chain with result *DEF searching for an operation
1259 with operand OP and code OPCODE removing that from the chain. *DEF
1260 is updated if there is only one operand but no operation left. */
1262 static void
1263 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1265 tree orig_def = *def;
1266 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1267 /* PR72835 - Record the stmt chain that has to be updated such that
1268 we dont use the same LHS when the values computed are different. */
1269 auto_vec<gimple *, 64> stmts_to_fix;
1273 tree name;
1275 if (opcode == MULT_EXPR)
1277 if (stmt_is_power_of_op (stmt, op))
1279 if (decrement_power (stmt) == 1)
1281 if (stmts_to_fix.length () > 0)
1282 stmts_to_fix.pop ();
1283 propagate_op_to_single_use (op, stmt, def);
1285 break;
1287 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1289 if (gimple_assign_rhs1 (stmt) == op)
1291 tree cst = build_minus_one_cst (TREE_TYPE (op));
1292 if (stmts_to_fix.length () > 0)
1293 stmts_to_fix.pop ();
1294 propagate_op_to_single_use (cst, stmt, def);
1295 break;
1297 else if (integer_minus_onep (op)
1298 || real_minus_onep (op))
1300 gimple_assign_set_rhs_code
1301 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1302 break;
1307 name = gimple_assign_rhs1 (stmt);
1309 /* If this is the operation we look for and one of the operands
1310 is ours simply propagate the other operand into the stmts
1311 single use. */
1312 if (gimple_assign_rhs_code (stmt) == opcode
1313 && (name == op
1314 || gimple_assign_rhs2 (stmt) == op))
1316 if (name == op)
1317 name = gimple_assign_rhs2 (stmt);
1318 if (stmts_to_fix.length () > 0)
1319 stmts_to_fix.pop ();
1320 propagate_op_to_single_use (name, stmt, def);
1321 break;
1324 /* We might have a multiply of two __builtin_pow* calls, and
1325 the operand might be hiding in the rightmost one. Likewise
1326 this can happen for a negate. */
1327 if (opcode == MULT_EXPR
1328 && gimple_assign_rhs_code (stmt) == opcode
1329 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1330 && has_single_use (gimple_assign_rhs2 (stmt)))
1332 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1333 if (stmt_is_power_of_op (stmt2, op))
1335 if (decrement_power (stmt2) == 1)
1336 propagate_op_to_single_use (op, stmt2, def);
1337 else
1338 stmts_to_fix.safe_push (stmt2);
1339 break;
1341 else if (is_gimple_assign (stmt2)
1342 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1344 if (gimple_assign_rhs1 (stmt2) == op)
1346 tree cst = build_minus_one_cst (TREE_TYPE (op));
1347 propagate_op_to_single_use (cst, stmt2, def);
1348 break;
1350 else if (integer_minus_onep (op)
1351 || real_minus_onep (op))
1353 stmts_to_fix.safe_push (stmt2);
1354 gimple_assign_set_rhs_code
1355 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1356 break;
1361 /* Continue walking the chain. */
1362 gcc_assert (name != op
1363 && TREE_CODE (name) == SSA_NAME);
1364 stmt = SSA_NAME_DEF_STMT (name);
1365 stmts_to_fix.safe_push (stmt);
1367 while (1);
1369 if (stmts_to_fix.length () > 0 || *def == orig_def)
1370 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1373 /* Returns true if statement S1 dominates statement S2. Like
1374 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1376 static bool
1377 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1379 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1381 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1382 SSA_NAME. Assume it lives at the beginning of function and
1383 thus dominates everything. */
1384 if (!bb1 || s1 == s2)
1385 return true;
1387 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1388 if (!bb2)
1389 return false;
1391 if (bb1 == bb2)
1393 /* PHIs in the same basic block are assumed to be
1394 executed all in parallel, if only one stmt is a PHI,
1395 it dominates the other stmt in the same basic block. */
1396 if (gimple_code (s1) == GIMPLE_PHI)
1397 return true;
1399 if (gimple_code (s2) == GIMPLE_PHI)
1400 return false;
1402 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1404 if (gimple_uid (s1) < gimple_uid (s2))
1405 return true;
1407 if (gimple_uid (s1) > gimple_uid (s2))
1408 return false;
1410 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1411 unsigned int uid = gimple_uid (s1);
1412 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1414 gimple *s = gsi_stmt (gsi);
1415 if (gimple_uid (s) != uid)
1416 break;
1417 if (s == s2)
1418 return true;
1421 return false;
1424 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1427 /* Insert STMT after INSERT_POINT. */
1429 static void
1430 insert_stmt_after (gimple *stmt, gimple *insert_point)
1432 gimple_stmt_iterator gsi;
1433 basic_block bb;
1435 if (gimple_code (insert_point) == GIMPLE_PHI)
1436 bb = gimple_bb (insert_point);
1437 else if (!stmt_ends_bb_p (insert_point))
1439 gsi = gsi_for_stmt (insert_point);
1440 gimple_set_uid (stmt, gimple_uid (insert_point));
1441 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1442 return;
1444 else
1445 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1446 thus if it must end a basic block, it should be a call that can
1447 throw, or some assignment that can throw. If it throws, the LHS
1448 of it will not be initialized though, so only valid places using
1449 the SSA_NAME should be dominated by the fallthru edge. */
1450 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1451 gsi = gsi_after_labels (bb);
1452 if (gsi_end_p (gsi))
1454 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1455 gimple_set_uid (stmt,
1456 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1458 else
1459 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1460 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1463 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1464 the result. Places the statement after the definition of either
1465 OP1 or OP2. Returns the new statement. */
1467 static gimple *
1468 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1470 gimple *op1def = NULL, *op2def = NULL;
1471 gimple_stmt_iterator gsi;
1472 tree op;
1473 gassign *sum;
1475 /* Create the addition statement. */
1476 op = make_ssa_name (type);
1477 sum = gimple_build_assign (op, opcode, op1, op2);
1479 /* Find an insertion place and insert. */
1480 if (TREE_CODE (op1) == SSA_NAME)
1481 op1def = SSA_NAME_DEF_STMT (op1);
1482 if (TREE_CODE (op2) == SSA_NAME)
1483 op2def = SSA_NAME_DEF_STMT (op2);
1484 if ((!op1def || gimple_nop_p (op1def))
1485 && (!op2def || gimple_nop_p (op2def)))
1487 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1488 if (gsi_end_p (gsi))
1490 gimple_stmt_iterator gsi2
1491 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1492 gimple_set_uid (sum,
1493 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1495 else
1496 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1497 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1499 else
1501 gimple *insert_point;
1502 if ((!op1def || gimple_nop_p (op1def))
1503 || (op2def && !gimple_nop_p (op2def)
1504 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1505 insert_point = op2def;
1506 else
1507 insert_point = op1def;
1508 insert_stmt_after (sum, insert_point);
1510 update_stmt (sum);
1512 return sum;
1515 /* Perform un-distribution of divisions and multiplications.
1516 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1517 to (A + B) / X for real X.
1519 The algorithm is organized as follows.
1521 - First we walk the addition chain *OPS looking for summands that
1522 are defined by a multiplication or a real division. This results
1523 in the candidates bitmap with relevant indices into *OPS.
1525 - Second we build the chains of multiplications or divisions for
1526 these candidates, counting the number of occurrences of (operand, code)
1527 pairs in all of the candidates chains.
1529 - Third we sort the (operand, code) pairs by number of occurrence and
1530 process them starting with the pair with the most uses.
1532 * For each such pair we walk the candidates again to build a
1533 second candidate bitmap noting all multiplication/division chains
1534 that have at least one occurrence of (operand, code).
1536 * We build an alternate addition chain only covering these
1537 candidates with one (operand, code) operation removed from their
1538 multiplication/division chain.
1540 * The first candidate gets replaced by the alternate addition chain
1541 multiplied/divided by the operand.
1543 * All candidate chains get disabled for further processing and
1544 processing of (operand, code) pairs continues.
1546 The alternate addition chains built are re-processed by the main
1547 reassociation algorithm which allows optimizing a * x * y + b * y * x
1548 to (a + b ) * x * y in one invocation of the reassociation pass. */
1550 static bool
1551 undistribute_ops_list (enum tree_code opcode,
1552 vec<operand_entry *> *ops, struct loop *loop)
1554 unsigned int length = ops->length ();
1555 operand_entry *oe1;
1556 unsigned i, j;
1557 unsigned nr_candidates, nr_candidates2;
1558 sbitmap_iterator sbi0;
1559 vec<operand_entry *> *subops;
1560 bool changed = false;
1561 unsigned int next_oecount_id = 0;
1563 if (length <= 1
1564 || opcode != PLUS_EXPR)
1565 return false;
1567 /* Build a list of candidates to process. */
1568 auto_sbitmap candidates (length);
1569 bitmap_clear (candidates);
1570 nr_candidates = 0;
1571 FOR_EACH_VEC_ELT (*ops, i, oe1)
1573 enum tree_code dcode;
1574 gimple *oe1def;
1576 if (TREE_CODE (oe1->op) != SSA_NAME)
1577 continue;
1578 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1579 if (!is_gimple_assign (oe1def))
1580 continue;
1581 dcode = gimple_assign_rhs_code (oe1def);
1582 if ((dcode != MULT_EXPR
1583 && dcode != RDIV_EXPR)
1584 || !is_reassociable_op (oe1def, dcode, loop))
1585 continue;
1587 bitmap_set_bit (candidates, i);
1588 nr_candidates++;
1591 if (nr_candidates < 2)
1592 return false;
1594 if (dump_file && (dump_flags & TDF_DETAILS))
1596 fprintf (dump_file, "searching for un-distribute opportunities ");
1597 print_generic_expr (dump_file,
1598 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1599 fprintf (dump_file, " %d\n", nr_candidates);
1602 /* Build linearized sub-operand lists and the counting table. */
1603 cvec.create (0);
1605 hash_table<oecount_hasher> ctable (15);
1607 /* ??? Macro arguments cannot have multi-argument template types in
1608 them. This typedef is needed to workaround that limitation. */
1609 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1610 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1611 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1613 gimple *oedef;
1614 enum tree_code oecode;
1615 unsigned j;
1617 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1618 oecode = gimple_assign_rhs_code (oedef);
1619 linearize_expr_tree (&subops[i], oedef,
1620 associative_tree_code (oecode), false);
1622 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1624 oecount c;
1625 int *slot;
1626 int idx;
1627 c.oecode = oecode;
1628 c.cnt = 1;
1629 c.id = next_oecount_id++;
1630 c.op = oe1->op;
1631 cvec.safe_push (c);
1632 idx = cvec.length () + 41;
1633 slot = ctable.find_slot (idx, INSERT);
1634 if (!*slot)
1636 *slot = idx;
1638 else
1640 cvec.pop ();
1641 cvec[*slot - 42].cnt++;
1646 /* Sort the counting table. */
1647 cvec.qsort (oecount_cmp);
1649 if (dump_file && (dump_flags & TDF_DETAILS))
1651 oecount *c;
1652 fprintf (dump_file, "Candidates:\n");
1653 FOR_EACH_VEC_ELT (cvec, j, c)
1655 fprintf (dump_file, " %u %s: ", c->cnt,
1656 c->oecode == MULT_EXPR
1657 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1658 print_generic_expr (dump_file, c->op);
1659 fprintf (dump_file, "\n");
1663 /* Process the (operand, code) pairs in order of most occurrence. */
1664 auto_sbitmap candidates2 (length);
1665 while (!cvec.is_empty ())
1667 oecount *c = &cvec.last ();
1668 if (c->cnt < 2)
1669 break;
1671 /* Now collect the operands in the outer chain that contain
1672 the common operand in their inner chain. */
1673 bitmap_clear (candidates2);
1674 nr_candidates2 = 0;
1675 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1677 gimple *oedef;
1678 enum tree_code oecode;
1679 unsigned j;
1680 tree op = (*ops)[i]->op;
1682 /* If we undistributed in this chain already this may be
1683 a constant. */
1684 if (TREE_CODE (op) != SSA_NAME)
1685 continue;
1687 oedef = SSA_NAME_DEF_STMT (op);
1688 oecode = gimple_assign_rhs_code (oedef);
1689 if (oecode != c->oecode)
1690 continue;
1692 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1694 if (oe1->op == c->op)
1696 bitmap_set_bit (candidates2, i);
1697 ++nr_candidates2;
1698 break;
1703 if (nr_candidates2 >= 2)
1705 operand_entry *oe1, *oe2;
1706 gimple *prod;
1707 int first = bitmap_first_set_bit (candidates2);
1709 /* Build the new addition chain. */
1710 oe1 = (*ops)[first];
1711 if (dump_file && (dump_flags & TDF_DETAILS))
1713 fprintf (dump_file, "Building (");
1714 print_generic_expr (dump_file, oe1->op);
1716 zero_one_operation (&oe1->op, c->oecode, c->op);
1717 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1719 gimple *sum;
1720 oe2 = (*ops)[i];
1721 if (dump_file && (dump_flags & TDF_DETAILS))
1723 fprintf (dump_file, " + ");
1724 print_generic_expr (dump_file, oe2->op);
1726 zero_one_operation (&oe2->op, c->oecode, c->op);
1727 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1728 oe1->op, oe2->op, opcode);
1729 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1730 oe2->rank = 0;
1731 oe1->op = gimple_get_lhs (sum);
1734 /* Apply the multiplication/division. */
1735 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1736 oe1->op, c->op, c->oecode);
1737 if (dump_file && (dump_flags & TDF_DETAILS))
1739 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1740 print_generic_expr (dump_file, c->op);
1741 fprintf (dump_file, "\n");
1744 /* Record it in the addition chain and disable further
1745 undistribution with this op. */
1746 oe1->op = gimple_assign_lhs (prod);
1747 oe1->rank = get_rank (oe1->op);
1748 subops[first].release ();
1750 changed = true;
1753 cvec.pop ();
1756 for (i = 0; i < ops->length (); ++i)
1757 subops[i].release ();
1758 free (subops);
1759 cvec.release ();
1761 return changed;
1764 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1765 expression, examine the other OPS to see if any of them are comparisons
1766 of the same values, which we may be able to combine or eliminate.
1767 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1769 static bool
1770 eliminate_redundant_comparison (enum tree_code opcode,
1771 vec<operand_entry *> *ops,
1772 unsigned int currindex,
1773 operand_entry *curr)
1775 tree op1, op2;
1776 enum tree_code lcode, rcode;
1777 gimple *def1, *def2;
1778 int i;
1779 operand_entry *oe;
1781 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1782 return false;
1784 /* Check that CURR is a comparison. */
1785 if (TREE_CODE (curr->op) != SSA_NAME)
1786 return false;
1787 def1 = SSA_NAME_DEF_STMT (curr->op);
1788 if (!is_gimple_assign (def1))
1789 return false;
1790 lcode = gimple_assign_rhs_code (def1);
1791 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1792 return false;
1793 op1 = gimple_assign_rhs1 (def1);
1794 op2 = gimple_assign_rhs2 (def1);
1796 /* Now look for a similar comparison in the remaining OPS. */
1797 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1799 tree t;
1801 if (TREE_CODE (oe->op) != SSA_NAME)
1802 continue;
1803 def2 = SSA_NAME_DEF_STMT (oe->op);
1804 if (!is_gimple_assign (def2))
1805 continue;
1806 rcode = gimple_assign_rhs_code (def2);
1807 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1808 continue;
1810 /* If we got here, we have a match. See if we can combine the
1811 two comparisons. */
1812 if (opcode == BIT_IOR_EXPR)
1813 t = maybe_fold_or_comparisons (lcode, op1, op2,
1814 rcode, gimple_assign_rhs1 (def2),
1815 gimple_assign_rhs2 (def2));
1816 else
1817 t = maybe_fold_and_comparisons (lcode, op1, op2,
1818 rcode, gimple_assign_rhs1 (def2),
1819 gimple_assign_rhs2 (def2));
1820 if (!t)
1821 continue;
1823 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1824 always give us a boolean_type_node value back. If the original
1825 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1826 we need to convert. */
1827 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1828 t = fold_convert (TREE_TYPE (curr->op), t);
1830 if (TREE_CODE (t) != INTEGER_CST
1831 && !operand_equal_p (t, curr->op, 0))
1833 enum tree_code subcode;
1834 tree newop1, newop2;
1835 if (!COMPARISON_CLASS_P (t))
1836 continue;
1837 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1838 STRIP_USELESS_TYPE_CONVERSION (newop1);
1839 STRIP_USELESS_TYPE_CONVERSION (newop2);
1840 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1841 continue;
1844 if (dump_file && (dump_flags & TDF_DETAILS))
1846 fprintf (dump_file, "Equivalence: ");
1847 print_generic_expr (dump_file, curr->op);
1848 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1849 print_generic_expr (dump_file, oe->op);
1850 fprintf (dump_file, " -> ");
1851 print_generic_expr (dump_file, t);
1852 fprintf (dump_file, "\n");
1855 /* Now we can delete oe, as it has been subsumed by the new combined
1856 expression t. */
1857 ops->ordered_remove (i);
1858 reassociate_stats.ops_eliminated ++;
1860 /* If t is the same as curr->op, we're done. Otherwise we must
1861 replace curr->op with t. Special case is if we got a constant
1862 back, in which case we add it to the end instead of in place of
1863 the current entry. */
1864 if (TREE_CODE (t) == INTEGER_CST)
1866 ops->ordered_remove (currindex);
1867 add_to_ops_vec (ops, t);
1869 else if (!operand_equal_p (t, curr->op, 0))
1871 gimple *sum;
1872 enum tree_code subcode;
1873 tree newop1;
1874 tree newop2;
1875 gcc_assert (COMPARISON_CLASS_P (t));
1876 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1877 STRIP_USELESS_TYPE_CONVERSION (newop1);
1878 STRIP_USELESS_TYPE_CONVERSION (newop2);
1879 gcc_checking_assert (is_gimple_val (newop1)
1880 && is_gimple_val (newop2));
1881 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1882 curr->op = gimple_get_lhs (sum);
1884 return true;
1887 return false;
1891 /* Transform repeated addition of same values into multiply with
1892 constant. */
1893 static bool
1894 transform_add_to_multiply (vec<operand_entry *> *ops)
1896 operand_entry *oe;
1897 tree op = NULL_TREE;
1898 int j;
1899 int i, start = -1, end = 0, count = 0;
1900 auto_vec<std::pair <int, int> > indxs;
1901 bool changed = false;
1903 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1904 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1905 || !flag_unsafe_math_optimizations))
1906 return false;
1908 /* Look for repeated operands. */
1909 FOR_EACH_VEC_ELT (*ops, i, oe)
1911 if (start == -1)
1913 count = 1;
1914 op = oe->op;
1915 start = i;
1917 else if (operand_equal_p (oe->op, op, 0))
1919 count++;
1920 end = i;
1922 else
1924 if (count > 1)
1925 indxs.safe_push (std::make_pair (start, end));
1926 count = 1;
1927 op = oe->op;
1928 start = i;
1932 if (count > 1)
1933 indxs.safe_push (std::make_pair (start, end));
1935 for (j = indxs.length () - 1; j >= 0; --j)
1937 /* Convert repeated operand addition to multiplication. */
1938 start = indxs[j].first;
1939 end = indxs[j].second;
1940 op = (*ops)[start]->op;
1941 count = end - start + 1;
1942 for (i = end; i >= start; --i)
1943 ops->unordered_remove (i);
1944 tree tmp = make_ssa_name (TREE_TYPE (op));
1945 tree cst = build_int_cst (integer_type_node, count);
1946 gassign *mul_stmt
1947 = gimple_build_assign (tmp, MULT_EXPR,
1948 op, fold_convert (TREE_TYPE (op), cst));
1949 gimple_set_visited (mul_stmt, true);
1950 add_to_ops_vec (ops, tmp, mul_stmt);
1951 changed = true;
1954 return changed;
1958 /* Perform various identities and other optimizations on the list of
1959 operand entries, stored in OPS. The tree code for the binary
1960 operation between all the operands is OPCODE. */
1962 static void
1963 optimize_ops_list (enum tree_code opcode,
1964 vec<operand_entry *> *ops)
1966 unsigned int length = ops->length ();
1967 unsigned int i;
1968 operand_entry *oe;
1969 operand_entry *oelast = NULL;
1970 bool iterate = false;
1972 if (length == 1)
1973 return;
1975 oelast = ops->last ();
1977 /* If the last two are constants, pop the constants off, merge them
1978 and try the next two. */
1979 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1981 operand_entry *oelm1 = (*ops)[length - 2];
1983 if (oelm1->rank == 0
1984 && is_gimple_min_invariant (oelm1->op)
1985 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1986 TREE_TYPE (oelast->op)))
1988 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1989 oelm1->op, oelast->op);
1991 if (folded && is_gimple_min_invariant (folded))
1993 if (dump_file && (dump_flags & TDF_DETAILS))
1994 fprintf (dump_file, "Merging constants\n");
1996 ops->pop ();
1997 ops->pop ();
1999 add_to_ops_vec (ops, folded);
2000 reassociate_stats.constants_eliminated++;
2002 optimize_ops_list (opcode, ops);
2003 return;
2008 eliminate_using_constants (opcode, ops);
2009 oelast = NULL;
2011 for (i = 0; ops->iterate (i, &oe);)
2013 bool done = false;
2015 if (eliminate_not_pairs (opcode, ops, i, oe))
2016 return;
2017 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2018 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2019 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2021 if (done)
2022 return;
2023 iterate = true;
2024 oelast = NULL;
2025 continue;
2027 oelast = oe;
2028 i++;
2031 length = ops->length ();
2032 oelast = ops->last ();
2034 if (iterate)
2035 optimize_ops_list (opcode, ops);
2038 /* The following functions are subroutines to optimize_range_tests and allow
2039 it to try to change a logical combination of comparisons into a range
2040 test.
2042 For example, both
2043 X == 2 || X == 5 || X == 3 || X == 4
2045 X >= 2 && X <= 5
2046 are converted to
2047 (unsigned) (X - 2) <= 3
2049 For more information see comments above fold_test_range in fold-const.c,
2050 this implementation is for GIMPLE. */
2052 struct range_entry
2054 tree exp;
2055 tree low;
2056 tree high;
2057 bool in_p;
2058 bool strict_overflow_p;
2059 unsigned int idx, next;
2062 /* This is similar to make_range in fold-const.c, but on top of
2063 GIMPLE instead of trees. If EXP is non-NULL, it should be
2064 an SSA_NAME and STMT argument is ignored, otherwise STMT
2065 argument should be a GIMPLE_COND. */
2067 static void
2068 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2070 int in_p;
2071 tree low, high;
2072 bool is_bool, strict_overflow_p;
2074 r->exp = NULL_TREE;
2075 r->in_p = false;
2076 r->strict_overflow_p = false;
2077 r->low = NULL_TREE;
2078 r->high = NULL_TREE;
2079 if (exp != NULL_TREE
2080 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2081 return;
2083 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2084 and see if we can refine the range. Some of the cases below may not
2085 happen, but it doesn't seem worth worrying about this. We "continue"
2086 the outer loop when we've changed something; otherwise we "break"
2087 the switch, which will "break" the while. */
2088 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2089 high = low;
2090 in_p = 0;
2091 strict_overflow_p = false;
2092 is_bool = false;
2093 if (exp == NULL_TREE)
2094 is_bool = true;
2095 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2097 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2098 is_bool = true;
2099 else
2100 return;
2102 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2103 is_bool = true;
2105 while (1)
2107 enum tree_code code;
2108 tree arg0, arg1, exp_type;
2109 tree nexp;
2110 location_t loc;
2112 if (exp != NULL_TREE)
2114 if (TREE_CODE (exp) != SSA_NAME
2115 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2116 break;
2118 stmt = SSA_NAME_DEF_STMT (exp);
2119 if (!is_gimple_assign (stmt))
2120 break;
2122 code = gimple_assign_rhs_code (stmt);
2123 arg0 = gimple_assign_rhs1 (stmt);
2124 arg1 = gimple_assign_rhs2 (stmt);
2125 exp_type = TREE_TYPE (exp);
2127 else
2129 code = gimple_cond_code (stmt);
2130 arg0 = gimple_cond_lhs (stmt);
2131 arg1 = gimple_cond_rhs (stmt);
2132 exp_type = boolean_type_node;
2135 if (TREE_CODE (arg0) != SSA_NAME)
2136 break;
2137 loc = gimple_location (stmt);
2138 switch (code)
2140 case BIT_NOT_EXPR:
2141 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2142 /* Ensure the range is either +[-,0], +[0,0],
2143 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2144 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2145 or similar expression of unconditional true or
2146 false, it should not be negated. */
2147 && ((high && integer_zerop (high))
2148 || (low && integer_onep (low))))
2150 in_p = !in_p;
2151 exp = arg0;
2152 continue;
2154 break;
2155 case SSA_NAME:
2156 exp = arg0;
2157 continue;
2158 CASE_CONVERT:
2159 if (is_bool)
2160 goto do_default;
2161 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2163 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2164 is_bool = true;
2165 else
2166 return;
2168 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2169 is_bool = true;
2170 goto do_default;
2171 case EQ_EXPR:
2172 case NE_EXPR:
2173 case LT_EXPR:
2174 case LE_EXPR:
2175 case GE_EXPR:
2176 case GT_EXPR:
2177 is_bool = true;
2178 /* FALLTHRU */
2179 default:
2180 if (!is_bool)
2181 return;
2182 do_default:
2183 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2184 &low, &high, &in_p,
2185 &strict_overflow_p);
2186 if (nexp != NULL_TREE)
2188 exp = nexp;
2189 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2190 continue;
2192 break;
2194 break;
2196 if (is_bool)
2198 r->exp = exp;
2199 r->in_p = in_p;
2200 r->low = low;
2201 r->high = high;
2202 r->strict_overflow_p = strict_overflow_p;
2206 /* Comparison function for qsort. Sort entries
2207 without SSA_NAME exp first, then with SSA_NAMEs sorted
2208 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2209 by increasing ->low and if ->low is the same, by increasing
2210 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2211 maximum. */
2213 static int
2214 range_entry_cmp (const void *a, const void *b)
2216 const struct range_entry *p = (const struct range_entry *) a;
2217 const struct range_entry *q = (const struct range_entry *) b;
2219 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2221 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2223 /* Group range_entries for the same SSA_NAME together. */
2224 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2225 return -1;
2226 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2227 return 1;
2228 /* If ->low is different, NULL low goes first, then by
2229 ascending low. */
2230 if (p->low != NULL_TREE)
2232 if (q->low != NULL_TREE)
2234 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2235 p->low, q->low);
2236 if (tem && integer_onep (tem))
2237 return -1;
2238 tem = fold_binary (GT_EXPR, boolean_type_node,
2239 p->low, q->low);
2240 if (tem && integer_onep (tem))
2241 return 1;
2243 else
2244 return 1;
2246 else if (q->low != NULL_TREE)
2247 return -1;
2248 /* If ->high is different, NULL high goes last, before that by
2249 ascending high. */
2250 if (p->high != NULL_TREE)
2252 if (q->high != NULL_TREE)
2254 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2255 p->high, q->high);
2256 if (tem && integer_onep (tem))
2257 return -1;
2258 tem = fold_binary (GT_EXPR, boolean_type_node,
2259 p->high, q->high);
2260 if (tem && integer_onep (tem))
2261 return 1;
2263 else
2264 return -1;
2266 else if (q->high != NULL_TREE)
2267 return 1;
2268 /* If both ranges are the same, sort below by ascending idx. */
2270 else
2271 return 1;
2273 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2274 return -1;
2276 if (p->idx < q->idx)
2277 return -1;
2278 else
2280 gcc_checking_assert (p->idx > q->idx);
2281 return 1;
2285 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2286 insert needed statements BEFORE or after GSI. */
2288 static tree
2289 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2291 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2292 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2293 if (TREE_CODE (ret) != SSA_NAME)
2295 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2296 if (before)
2297 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2298 else
2299 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2300 ret = gimple_assign_lhs (g);
2302 return ret;
2305 /* Helper routine of optimize_range_test.
2306 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2307 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2308 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2309 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2310 an array of COUNT pointers to other ranges. Return
2311 true if the range merge has been successful.
2312 If OPCODE is ERROR_MARK, this is called from within
2313 maybe_optimize_range_tests and is performing inter-bb range optimization.
2314 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2315 oe->rank. */
2317 static bool
2318 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2319 struct range_entry **otherrangep,
2320 unsigned int count, enum tree_code opcode,
2321 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2322 bool in_p, tree low, tree high, bool strict_overflow_p)
2324 operand_entry *oe = (*ops)[range->idx];
2325 tree op = oe->op;
2326 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2327 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2328 location_t loc = gimple_location (stmt);
2329 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2330 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2331 in_p, low, high);
2332 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2333 gimple_stmt_iterator gsi;
2334 unsigned int i, uid;
2336 if (tem == NULL_TREE)
2337 return false;
2339 /* If op is default def SSA_NAME, there is no place to insert the
2340 new comparison. Give up, unless we can use OP itself as the
2341 range test. */
2342 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2344 if (op == range->exp
2345 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2346 || TREE_CODE (optype) == BOOLEAN_TYPE)
2347 && (op == tem
2348 || (TREE_CODE (tem) == EQ_EXPR
2349 && TREE_OPERAND (tem, 0) == op
2350 && integer_onep (TREE_OPERAND (tem, 1))))
2351 && opcode != BIT_IOR_EXPR
2352 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2354 stmt = NULL;
2355 tem = op;
2357 else
2358 return false;
2361 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2362 warning_at (loc, OPT_Wstrict_overflow,
2363 "assuming signed overflow does not occur "
2364 "when simplifying range test");
2366 if (dump_file && (dump_flags & TDF_DETAILS))
2368 struct range_entry *r;
2369 fprintf (dump_file, "Optimizing range tests ");
2370 print_generic_expr (dump_file, range->exp);
2371 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2372 print_generic_expr (dump_file, range->low);
2373 fprintf (dump_file, ", ");
2374 print_generic_expr (dump_file, range->high);
2375 fprintf (dump_file, "]");
2376 for (i = 0; i < count; i++)
2378 if (otherrange)
2379 r = otherrange + i;
2380 else
2381 r = otherrangep[i];
2382 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2383 print_generic_expr (dump_file, r->low);
2384 fprintf (dump_file, ", ");
2385 print_generic_expr (dump_file, r->high);
2386 fprintf (dump_file, "]");
2388 fprintf (dump_file, "\n into ");
2389 print_generic_expr (dump_file, tem);
2390 fprintf (dump_file, "\n");
2393 if (opcode == BIT_IOR_EXPR
2394 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2395 tem = invert_truthvalue_loc (loc, tem);
2397 tem = fold_convert_loc (loc, optype, tem);
2398 if (stmt)
2400 gsi = gsi_for_stmt (stmt);
2401 uid = gimple_uid (stmt);
2403 else
2405 gsi = gsi_none ();
2406 uid = 0;
2408 if (stmt == NULL)
2409 gcc_checking_assert (tem == op);
2410 /* In rare cases range->exp can be equal to lhs of stmt.
2411 In that case we have to insert after the stmt rather then before
2412 it. If stmt is a PHI, insert it at the start of the basic block. */
2413 else if (op != range->exp)
2415 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2416 tem = force_into_ssa_name (&gsi, tem, true);
2417 gsi_prev (&gsi);
2419 else if (gimple_code (stmt) != GIMPLE_PHI)
2421 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2422 tem = force_into_ssa_name (&gsi, tem, false);
2424 else
2426 gsi = gsi_after_labels (gimple_bb (stmt));
2427 if (!gsi_end_p (gsi))
2428 uid = gimple_uid (gsi_stmt (gsi));
2429 else
2431 gsi = gsi_start_bb (gimple_bb (stmt));
2432 uid = 1;
2433 while (!gsi_end_p (gsi))
2435 uid = gimple_uid (gsi_stmt (gsi));
2436 gsi_next (&gsi);
2439 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2440 tem = force_into_ssa_name (&gsi, tem, true);
2441 if (gsi_end_p (gsi))
2442 gsi = gsi_last_bb (gimple_bb (stmt));
2443 else
2444 gsi_prev (&gsi);
2446 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2447 if (gimple_uid (gsi_stmt (gsi)))
2448 break;
2449 else
2450 gimple_set_uid (gsi_stmt (gsi), uid);
2452 oe->op = tem;
2453 range->exp = exp;
2454 range->low = low;
2455 range->high = high;
2456 range->in_p = in_p;
2457 range->strict_overflow_p = false;
2459 for (i = 0; i < count; i++)
2461 if (otherrange)
2462 range = otherrange + i;
2463 else
2464 range = otherrangep[i];
2465 oe = (*ops)[range->idx];
2466 /* Now change all the other range test immediate uses, so that
2467 those tests will be optimized away. */
2468 if (opcode == ERROR_MARK)
2470 if (oe->op)
2471 oe->op = build_int_cst (TREE_TYPE (oe->op),
2472 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2473 else
2474 oe->op = (oe->rank == BIT_IOR_EXPR
2475 ? boolean_false_node : boolean_true_node);
2477 else
2478 oe->op = error_mark_node;
2479 range->exp = NULL_TREE;
2480 range->low = NULL_TREE;
2481 range->high = NULL_TREE;
2483 return true;
2486 /* Optimize X == CST1 || X == CST2
2487 if popcount (CST1 ^ CST2) == 1 into
2488 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2489 Similarly for ranges. E.g.
2490 X != 2 && X != 3 && X != 10 && X != 11
2491 will be transformed by the previous optimization into
2492 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2493 and this loop can transform that into
2494 !(((X & ~8) - 2U) <= 1U). */
2496 static bool
2497 optimize_range_tests_xor (enum tree_code opcode, tree type,
2498 tree lowi, tree lowj, tree highi, tree highj,
2499 vec<operand_entry *> *ops,
2500 struct range_entry *rangei,
2501 struct range_entry *rangej)
2503 tree lowxor, highxor, tem, exp;
2504 /* Check lowi ^ lowj == highi ^ highj and
2505 popcount (lowi ^ lowj) == 1. */
2506 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2507 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2508 return false;
2509 if (!integer_pow2p (lowxor))
2510 return false;
2511 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2512 if (!tree_int_cst_equal (lowxor, highxor))
2513 return false;
2515 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2516 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2517 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2518 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2519 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2520 NULL, rangei->in_p, lowj, highj,
2521 rangei->strict_overflow_p
2522 || rangej->strict_overflow_p))
2523 return true;
2524 return false;
2527 /* Optimize X == CST1 || X == CST2
2528 if popcount (CST2 - CST1) == 1 into
2529 ((X - CST1) & ~(CST2 - CST1)) == 0.
2530 Similarly for ranges. E.g.
2531 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2532 || X == 75 || X == 45
2533 will be transformed by the previous optimization into
2534 (X - 43U) <= 3U || (X - 75U) <= 3U
2535 and this loop can transform that into
2536 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2537 static bool
2538 optimize_range_tests_diff (enum tree_code opcode, tree type,
2539 tree lowi, tree lowj, tree highi, tree highj,
2540 vec<operand_entry *> *ops,
2541 struct range_entry *rangei,
2542 struct range_entry *rangej)
2544 tree tem1, tem2, mask;
2545 /* Check highi - lowi == highj - lowj. */
2546 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2547 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2548 return false;
2549 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2550 if (!tree_int_cst_equal (tem1, tem2))
2551 return false;
2552 /* Check popcount (lowj - lowi) == 1. */
2553 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2554 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2555 return false;
2556 if (!integer_pow2p (tem1))
2557 return false;
2559 type = unsigned_type_for (type);
2560 tem1 = fold_convert (type, tem1);
2561 tem2 = fold_convert (type, tem2);
2562 lowi = fold_convert (type, lowi);
2563 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2564 tem1 = fold_build2 (MINUS_EXPR, type,
2565 fold_convert (type, rangei->exp), lowi);
2566 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2567 lowj = build_int_cst (type, 0);
2568 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2569 NULL, rangei->in_p, lowj, tem2,
2570 rangei->strict_overflow_p
2571 || rangej->strict_overflow_p))
2572 return true;
2573 return false;
2576 /* It does some common checks for function optimize_range_tests_xor and
2577 optimize_range_tests_diff.
2578 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2579 Else it calls optimize_range_tests_diff. */
2581 static bool
2582 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2583 bool optimize_xor, vec<operand_entry *> *ops,
2584 struct range_entry *ranges)
2586 int i, j;
2587 bool any_changes = false;
2588 for (i = first; i < length; i++)
2590 tree lowi, highi, lowj, highj, type, tem;
2592 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2593 continue;
2594 type = TREE_TYPE (ranges[i].exp);
2595 if (!INTEGRAL_TYPE_P (type))
2596 continue;
2597 lowi = ranges[i].low;
2598 if (lowi == NULL_TREE)
2599 lowi = TYPE_MIN_VALUE (type);
2600 highi = ranges[i].high;
2601 if (highi == NULL_TREE)
2602 continue;
2603 for (j = i + 1; j < length && j < i + 64; j++)
2605 bool changes;
2606 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2607 continue;
2608 lowj = ranges[j].low;
2609 if (lowj == NULL_TREE)
2610 continue;
2611 highj = ranges[j].high;
2612 if (highj == NULL_TREE)
2613 highj = TYPE_MAX_VALUE (type);
2614 /* Check lowj > highi. */
2615 tem = fold_binary (GT_EXPR, boolean_type_node,
2616 lowj, highi);
2617 if (tem == NULL_TREE || !integer_onep (tem))
2618 continue;
2619 if (optimize_xor)
2620 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2621 highi, highj, ops,
2622 ranges + i, ranges + j);
2623 else
2624 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2625 highi, highj, ops,
2626 ranges + i, ranges + j);
2627 if (changes)
2629 any_changes = true;
2630 break;
2634 return any_changes;
2637 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2638 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2639 EXP on success, NULL otherwise. */
2641 static tree
2642 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2643 wide_int *mask, tree *totallowp)
2645 tree tem = int_const_binop (MINUS_EXPR, high, low);
2646 if (tem == NULL_TREE
2647 || TREE_CODE (tem) != INTEGER_CST
2648 || TREE_OVERFLOW (tem)
2649 || tree_int_cst_sgn (tem) == -1
2650 || compare_tree_int (tem, prec) != -1)
2651 return NULL_TREE;
2653 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2654 *mask = wi::shifted_mask (0, max, false, prec);
2655 if (TREE_CODE (exp) == BIT_AND_EXPR
2656 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2658 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2659 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2660 if (wi::popcount (msk) == 1
2661 && wi::ltu_p (msk, prec - max))
2663 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2664 max += msk.to_uhwi ();
2665 exp = TREE_OPERAND (exp, 0);
2666 if (integer_zerop (low)
2667 && TREE_CODE (exp) == PLUS_EXPR
2668 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2670 tree ret = TREE_OPERAND (exp, 0);
2671 STRIP_NOPS (ret);
2672 widest_int bias
2673 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2674 TYPE_PRECISION (TREE_TYPE (low))));
2675 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2676 if (totallowp)
2678 *totallowp = tbias;
2679 return ret;
2681 else if (!tree_int_cst_lt (totallow, tbias))
2682 return NULL_TREE;
2683 bias = wi::to_widest (tbias);
2684 bias -= wi::to_widest (totallow);
2685 if (bias >= 0 && bias < prec - max)
2687 *mask = wi::lshift (*mask, bias);
2688 return ret;
2693 if (totallowp)
2694 return exp;
2695 if (!tree_int_cst_lt (totallow, low))
2696 return exp;
2697 tem = int_const_binop (MINUS_EXPR, low, totallow);
2698 if (tem == NULL_TREE
2699 || TREE_CODE (tem) != INTEGER_CST
2700 || TREE_OVERFLOW (tem)
2701 || compare_tree_int (tem, prec - max) == 1)
2702 return NULL_TREE;
2704 *mask = wi::lshift (*mask, wi::to_widest (tem));
2705 return exp;
2708 /* Attempt to optimize small range tests using bit test.
2709 E.g.
2710 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2711 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2712 has been by earlier optimizations optimized into:
2713 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2714 As all the 43 through 82 range is less than 64 numbers,
2715 for 64-bit word targets optimize that into:
2716 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2718 static bool
2719 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2720 vec<operand_entry *> *ops,
2721 struct range_entry *ranges)
2723 int i, j;
2724 bool any_changes = false;
2725 int prec = GET_MODE_BITSIZE (word_mode);
2726 auto_vec<struct range_entry *, 64> candidates;
2728 for (i = first; i < length - 2; i++)
2730 tree lowi, highi, lowj, highj, type;
2732 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2733 continue;
2734 type = TREE_TYPE (ranges[i].exp);
2735 if (!INTEGRAL_TYPE_P (type))
2736 continue;
2737 lowi = ranges[i].low;
2738 if (lowi == NULL_TREE)
2739 lowi = TYPE_MIN_VALUE (type);
2740 highi = ranges[i].high;
2741 if (highi == NULL_TREE)
2742 continue;
2743 wide_int mask;
2744 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2745 highi, &mask, &lowi);
2746 if (exp == NULL_TREE)
2747 continue;
2748 bool strict_overflow_p = ranges[i].strict_overflow_p;
2749 candidates.truncate (0);
2750 int end = MIN (i + 64, length);
2751 for (j = i + 1; j < end; j++)
2753 tree exp2;
2754 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2755 continue;
2756 if (ranges[j].exp == exp)
2758 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2760 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2761 if (exp2 == exp)
2763 else if (TREE_CODE (exp2) == PLUS_EXPR)
2765 exp2 = TREE_OPERAND (exp2, 0);
2766 STRIP_NOPS (exp2);
2767 if (exp2 != exp)
2768 continue;
2770 else
2771 continue;
2773 else
2774 continue;
2775 lowj = ranges[j].low;
2776 if (lowj == NULL_TREE)
2777 continue;
2778 highj = ranges[j].high;
2779 if (highj == NULL_TREE)
2780 highj = TYPE_MAX_VALUE (type);
2781 wide_int mask2;
2782 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2783 highj, &mask2, NULL);
2784 if (exp2 != exp)
2785 continue;
2786 mask |= mask2;
2787 strict_overflow_p |= ranges[j].strict_overflow_p;
2788 candidates.safe_push (&ranges[j]);
2791 /* If we need otherwise 3 or more comparisons, use a bit test. */
2792 if (candidates.length () >= 2)
2794 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2795 wi::to_widest (lowi)
2796 + prec - 1 - wi::clz (mask));
2797 operand_entry *oe = (*ops)[ranges[i].idx];
2798 tree op = oe->op;
2799 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2800 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2801 location_t loc = gimple_location (stmt);
2802 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2804 /* See if it isn't cheaper to pretend the minimum value of the
2805 range is 0, if maximum value is small enough.
2806 We can avoid then subtraction of the minimum value, but the
2807 mask constant could be perhaps more expensive. */
2808 if (compare_tree_int (lowi, 0) > 0
2809 && compare_tree_int (high, prec) < 0)
2811 int cost_diff;
2812 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2813 rtx reg = gen_raw_REG (word_mode, 10000);
2814 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2815 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2816 GEN_INT (-m)), speed_p);
2817 rtx r = immed_wide_int_const (mask, word_mode);
2818 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2819 word_mode, speed_p);
2820 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2821 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2822 word_mode, speed_p);
2823 if (cost_diff > 0)
2825 mask = wi::lshift (mask, m);
2826 lowi = build_zero_cst (TREE_TYPE (lowi));
2830 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2831 false, lowi, high);
2832 if (tem == NULL_TREE || is_gimple_val (tem))
2833 continue;
2834 tree etype = unsigned_type_for (TREE_TYPE (exp));
2835 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2836 fold_convert_loc (loc, etype, exp),
2837 fold_convert_loc (loc, etype, lowi));
2838 exp = fold_convert_loc (loc, integer_type_node, exp);
2839 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2840 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2841 build_int_cst (word_type, 1), exp);
2842 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2843 wide_int_to_tree (word_type, mask));
2844 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2845 build_zero_cst (word_type));
2846 if (is_gimple_val (exp))
2847 continue;
2849 /* The shift might have undefined behavior if TEM is true,
2850 but reassociate_bb isn't prepared to have basic blocks
2851 split when it is running. So, temporarily emit a code
2852 with BIT_IOR_EXPR instead of &&, and fix it up in
2853 branch_fixup. */
2854 gimple_seq seq;
2855 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2856 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2857 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2858 gimple_seq seq2;
2859 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2860 gimple_seq_add_seq_without_update (&seq, seq2);
2861 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2862 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2863 gimple *g = gimple_build_assign (make_ssa_name (optype),
2864 BIT_IOR_EXPR, tem, exp);
2865 gimple_set_location (g, loc);
2866 gimple_seq_add_stmt_without_update (&seq, g);
2867 exp = gimple_assign_lhs (g);
2868 tree val = build_zero_cst (optype);
2869 if (update_range_test (&ranges[i], NULL, candidates.address (),
2870 candidates.length (), opcode, ops, exp,
2871 seq, false, val, val, strict_overflow_p))
2873 any_changes = true;
2874 reassoc_branch_fixups.safe_push (tem);
2876 else
2877 gimple_seq_discard (seq);
2880 return any_changes;
2883 /* Attempt to optimize for signed a and b where b is known to be >= 0:
2884 a >= 0 && a < b into (unsigned) a < (unsigned) b
2885 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
2887 static bool
2888 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
2889 vec<operand_entry *> *ops,
2890 struct range_entry *ranges)
2892 int i;
2893 bool any_changes = false;
2894 hash_map<tree, int> *map = NULL;
2896 for (i = first; i < length; i++)
2898 if (ranges[i].exp == NULL_TREE
2899 || TREE_CODE (ranges[i].exp) != SSA_NAME
2900 || !ranges[i].in_p)
2901 continue;
2903 tree type = TREE_TYPE (ranges[i].exp);
2904 if (!INTEGRAL_TYPE_P (type)
2905 || TYPE_UNSIGNED (type)
2906 || ranges[i].low == NULL_TREE
2907 || !integer_zerop (ranges[i].low)
2908 || ranges[i].high != NULL_TREE)
2909 continue;
2910 /* EXP >= 0 here. */
2911 if (map == NULL)
2912 map = new hash_map <tree, int>;
2913 map->put (ranges[i].exp, i);
2916 if (map == NULL)
2917 return false;
2919 for (i = 0; i < length; i++)
2921 bool in_p = ranges[i].in_p;
2922 if (ranges[i].low == NULL_TREE
2923 || ranges[i].high == NULL_TREE)
2924 continue;
2925 if (!integer_zerop (ranges[i].low)
2926 || !integer_zerop (ranges[i].high))
2928 if (ranges[i].exp
2929 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
2930 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
2931 && integer_onep (ranges[i].low)
2932 && integer_onep (ranges[i].high))
2933 in_p = !in_p;
2934 else
2935 continue;
2938 gimple *stmt;
2939 tree_code ccode;
2940 tree rhs1, rhs2;
2941 if (ranges[i].exp)
2943 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
2944 continue;
2945 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
2946 if (!is_gimple_assign (stmt))
2947 continue;
2948 ccode = gimple_assign_rhs_code (stmt);
2949 rhs1 = gimple_assign_rhs1 (stmt);
2950 rhs2 = gimple_assign_rhs2 (stmt);
2952 else
2954 operand_entry *oe = (*ops)[ranges[i].idx];
2955 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2956 if (gimple_code (stmt) != GIMPLE_COND)
2957 continue;
2958 ccode = gimple_cond_code (stmt);
2959 rhs1 = gimple_cond_lhs (stmt);
2960 rhs2 = gimple_cond_rhs (stmt);
2963 if (TREE_CODE (rhs1) != SSA_NAME
2964 || rhs2 == NULL_TREE
2965 || TREE_CODE (rhs2) != SSA_NAME)
2966 continue;
2968 switch (ccode)
2970 case GT_EXPR:
2971 case GE_EXPR:
2972 case LT_EXPR:
2973 case LE_EXPR:
2974 break;
2975 default:
2976 continue;
2978 if (in_p)
2979 ccode = invert_tree_comparison (ccode, false);
2980 switch (ccode)
2982 case GT_EXPR:
2983 case GE_EXPR:
2984 std::swap (rhs1, rhs2);
2985 ccode = swap_tree_comparison (ccode);
2986 break;
2987 case LT_EXPR:
2988 case LE_EXPR:
2989 break;
2990 default:
2991 gcc_unreachable ();
2994 int *idx = map->get (rhs1);
2995 if (idx == NULL)
2996 continue;
2998 wide_int nz = get_nonzero_bits (rhs2);
2999 if (wi::neg_p (nz))
3000 continue;
3002 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3003 and RHS2 is known to be RHS2 >= 0. */
3004 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3006 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3007 if ((ranges[*idx].strict_overflow_p
3008 || ranges[i].strict_overflow_p)
3009 && issue_strict_overflow_warning (wc))
3010 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3011 "assuming signed overflow does not occur "
3012 "when simplifying range test");
3014 if (dump_file && (dump_flags & TDF_DETAILS))
3016 struct range_entry *r = &ranges[*idx];
3017 fprintf (dump_file, "Optimizing range test ");
3018 print_generic_expr (dump_file, r->exp);
3019 fprintf (dump_file, " +[");
3020 print_generic_expr (dump_file, r->low);
3021 fprintf (dump_file, ", ");
3022 print_generic_expr (dump_file, r->high);
3023 fprintf (dump_file, "] and comparison ");
3024 print_generic_expr (dump_file, rhs1);
3025 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3026 print_generic_expr (dump_file, rhs2);
3027 fprintf (dump_file, "\n into (");
3028 print_generic_expr (dump_file, utype);
3029 fprintf (dump_file, ") ");
3030 print_generic_expr (dump_file, rhs1);
3031 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3032 print_generic_expr (dump_file, utype);
3033 fprintf (dump_file, ") ");
3034 print_generic_expr (dump_file, rhs2);
3035 fprintf (dump_file, "\n");
3038 operand_entry *oe = (*ops)[ranges[i].idx];
3039 ranges[i].in_p = 0;
3040 if (opcode == BIT_IOR_EXPR
3041 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3043 ranges[i].in_p = 1;
3044 ccode = invert_tree_comparison (ccode, false);
3047 unsigned int uid = gimple_uid (stmt);
3048 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3049 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3050 gimple_set_uid (g, uid);
3051 rhs1 = gimple_assign_lhs (g);
3052 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3053 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3054 gimple_set_uid (g, uid);
3055 rhs2 = gimple_assign_lhs (g);
3056 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3057 if (tree_swap_operands_p (rhs1, rhs2))
3059 std::swap (rhs1, rhs2);
3060 ccode = swap_tree_comparison (ccode);
3062 if (gimple_code (stmt) == GIMPLE_COND)
3064 gcond *c = as_a <gcond *> (stmt);
3065 gimple_cond_set_code (c, ccode);
3066 gimple_cond_set_lhs (c, rhs1);
3067 gimple_cond_set_rhs (c, rhs2);
3068 update_stmt (stmt);
3070 else
3072 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3073 if (!INTEGRAL_TYPE_P (ctype)
3074 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3075 && TYPE_PRECISION (ctype) != 1))
3076 ctype = boolean_type_node;
3077 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3078 gimple_set_uid (g, uid);
3079 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3080 if (oe->op && ctype != TREE_TYPE (oe->op))
3082 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3083 NOP_EXPR, gimple_assign_lhs (g));
3084 gimple_set_uid (g, uid);
3085 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3087 ranges[i].exp = gimple_assign_lhs (g);
3088 oe->op = ranges[i].exp;
3089 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3090 ranges[i].high = ranges[i].low;
3092 ranges[i].strict_overflow_p = false;
3093 oe = (*ops)[ranges[*idx].idx];
3094 /* Now change all the other range test immediate uses, so that
3095 those tests will be optimized away. */
3096 if (opcode == ERROR_MARK)
3098 if (oe->op)
3099 oe->op = build_int_cst (TREE_TYPE (oe->op),
3100 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3101 else
3102 oe->op = (oe->rank == BIT_IOR_EXPR
3103 ? boolean_false_node : boolean_true_node);
3105 else
3106 oe->op = error_mark_node;
3107 ranges[*idx].exp = NULL_TREE;
3108 ranges[*idx].low = NULL_TREE;
3109 ranges[*idx].high = NULL_TREE;
3110 any_changes = true;
3113 delete map;
3114 return any_changes;
3117 /* Optimize range tests, similarly how fold_range_test optimizes
3118 it on trees. The tree code for the binary
3119 operation between all the operands is OPCODE.
3120 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3121 maybe_optimize_range_tests for inter-bb range optimization.
3122 In that case if oe->op is NULL, oe->id is bb->index whose
3123 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3124 the actual opcode. */
3126 static bool
3127 optimize_range_tests (enum tree_code opcode,
3128 vec<operand_entry *> *ops)
3130 unsigned int length = ops->length (), i, j, first;
3131 operand_entry *oe;
3132 struct range_entry *ranges;
3133 bool any_changes = false;
3135 if (length == 1)
3136 return false;
3138 ranges = XNEWVEC (struct range_entry, length);
3139 for (i = 0; i < length; i++)
3141 oe = (*ops)[i];
3142 ranges[i].idx = i;
3143 init_range_entry (ranges + i, oe->op,
3144 oe->op
3145 ? NULL
3146 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3147 /* For | invert it now, we will invert it again before emitting
3148 the optimized expression. */
3149 if (opcode == BIT_IOR_EXPR
3150 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3151 ranges[i].in_p = !ranges[i].in_p;
3154 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3155 for (i = 0; i < length; i++)
3156 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3157 break;
3159 /* Try to merge ranges. */
3160 for (first = i; i < length; i++)
3162 tree low = ranges[i].low;
3163 tree high = ranges[i].high;
3164 int in_p = ranges[i].in_p;
3165 bool strict_overflow_p = ranges[i].strict_overflow_p;
3166 int update_fail_count = 0;
3168 for (j = i + 1; j < length; j++)
3170 if (ranges[i].exp != ranges[j].exp)
3171 break;
3172 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3173 ranges[j].in_p, ranges[j].low, ranges[j].high))
3174 break;
3175 strict_overflow_p |= ranges[j].strict_overflow_p;
3178 if (j == i + 1)
3179 continue;
3181 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3182 opcode, ops, ranges[i].exp, NULL, in_p,
3183 low, high, strict_overflow_p))
3185 i = j - 1;
3186 any_changes = true;
3188 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3189 while update_range_test would fail. */
3190 else if (update_fail_count == 64)
3191 i = j - 1;
3192 else
3193 ++update_fail_count;
3196 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3197 ops, ranges);
3199 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3200 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3201 ops, ranges);
3202 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3203 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3204 ops, ranges);
3205 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3206 ranges);
3208 if (any_changes && opcode != ERROR_MARK)
3210 j = 0;
3211 FOR_EACH_VEC_ELT (*ops, i, oe)
3213 if (oe->op == error_mark_node)
3214 continue;
3215 else if (i != j)
3216 (*ops)[j] = oe;
3217 j++;
3219 ops->truncate (j);
3222 XDELETEVEC (ranges);
3223 return any_changes;
3226 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3227 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3228 otherwise the comparison code. */
3230 static tree_code
3231 ovce_extract_ops (tree var, gassign **rets, bool *reti)
3233 if (TREE_CODE (var) != SSA_NAME)
3234 return ERROR_MARK;
3236 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3237 if (stmt == NULL)
3238 return ERROR_MARK;
3240 /* ??? If we start creating more COND_EXPR, we could perform
3241 this same optimization with them. For now, simplify. */
3242 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3243 return ERROR_MARK;
3245 tree cond = gimple_assign_rhs1 (stmt);
3246 tree_code cmp = TREE_CODE (cond);
3247 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3248 return ERROR_MARK;
3250 /* ??? For now, allow only canonical true and false result vectors.
3251 We could expand this to other constants should the need arise,
3252 but at the moment we don't create them. */
3253 tree t = gimple_assign_rhs2 (stmt);
3254 tree f = gimple_assign_rhs3 (stmt);
3255 bool inv;
3256 if (integer_all_onesp (t))
3257 inv = false;
3258 else if (integer_all_onesp (f))
3260 cmp = invert_tree_comparison (cmp, false);
3261 inv = true;
3263 else
3264 return ERROR_MARK;
3265 if (!integer_zerop (f))
3266 return ERROR_MARK;
3268 /* Success! */
3269 if (rets)
3270 *rets = stmt;
3271 if (reti)
3272 *reti = inv;
3273 return cmp;
3276 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3277 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3279 static bool
3280 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3282 unsigned int length = ops->length (), i, j;
3283 bool any_changes = false;
3285 if (length == 1)
3286 return false;
3288 for (i = 0; i < length; ++i)
3290 tree elt0 = (*ops)[i]->op;
3292 gassign *stmt0;
3293 bool invert;
3294 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3295 if (cmp0 == ERROR_MARK)
3296 continue;
3298 for (j = i + 1; j < length; ++j)
3300 tree &elt1 = (*ops)[j]->op;
3302 gassign *stmt1;
3303 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3304 if (cmp1 == ERROR_MARK)
3305 continue;
3307 tree cond0 = gimple_assign_rhs1 (stmt0);
3308 tree x0 = TREE_OPERAND (cond0, 0);
3309 tree y0 = TREE_OPERAND (cond0, 1);
3311 tree cond1 = gimple_assign_rhs1 (stmt1);
3312 tree x1 = TREE_OPERAND (cond1, 0);
3313 tree y1 = TREE_OPERAND (cond1, 1);
3315 tree comb;
3316 if (opcode == BIT_AND_EXPR)
3317 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3318 else if (opcode == BIT_IOR_EXPR)
3319 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3320 else
3321 gcc_unreachable ();
3322 if (comb == NULL)
3323 continue;
3325 /* Success! */
3326 if (dump_file && (dump_flags & TDF_DETAILS))
3328 fprintf (dump_file, "Transforming ");
3329 print_generic_expr (dump_file, cond0);
3330 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3331 print_generic_expr (dump_file, cond1);
3332 fprintf (dump_file, " into ");
3333 print_generic_expr (dump_file, comb);
3334 fputc ('\n', dump_file);
3337 gimple_assign_set_rhs1 (stmt0, comb);
3338 if (invert)
3339 std::swap (*gimple_assign_rhs2_ptr (stmt0),
3340 *gimple_assign_rhs3_ptr (stmt0));
3341 update_stmt (stmt0);
3343 elt1 = error_mark_node;
3344 any_changes = true;
3348 if (any_changes)
3350 operand_entry *oe;
3351 j = 0;
3352 FOR_EACH_VEC_ELT (*ops, i, oe)
3354 if (oe->op == error_mark_node)
3355 continue;
3356 else if (i != j)
3357 (*ops)[j] = oe;
3358 j++;
3360 ops->truncate (j);
3363 return any_changes;
3366 /* Return true if STMT is a cast like:
3367 <bb N>:
3369 _123 = (int) _234;
3371 <bb M>:
3372 # _345 = PHI <_123(N), 1(...), 1(...)>
3373 where _234 has bool type, _123 has single use and
3374 bb N has a single successor M. This is commonly used in
3375 the last block of a range test.
3377 Also Return true if STMT is tcc_compare like:
3378 <bb N>:
3380 _234 = a_2(D) == 2;
3382 <bb M>:
3383 # _345 = PHI <_234(N), 1(...), 1(...)>
3384 _346 = (int) _345;
3385 where _234 has booltype, single use and
3386 bb N has a single successor M. This is commonly used in
3387 the last block of a range test. */
3389 static bool
3390 final_range_test_p (gimple *stmt)
3392 basic_block bb, rhs_bb, lhs_bb;
3393 edge e;
3394 tree lhs, rhs;
3395 use_operand_p use_p;
3396 gimple *use_stmt;
3398 if (!gimple_assign_cast_p (stmt)
3399 && (!is_gimple_assign (stmt)
3400 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3401 != tcc_comparison)))
3402 return false;
3403 bb = gimple_bb (stmt);
3404 if (!single_succ_p (bb))
3405 return false;
3406 e = single_succ_edge (bb);
3407 if (e->flags & EDGE_COMPLEX)
3408 return false;
3410 lhs = gimple_assign_lhs (stmt);
3411 rhs = gimple_assign_rhs1 (stmt);
3412 if (gimple_assign_cast_p (stmt)
3413 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3414 || TREE_CODE (rhs) != SSA_NAME
3415 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3416 return false;
3418 if (!gimple_assign_cast_p (stmt)
3419 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3420 return false;
3422 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3423 if (!single_imm_use (lhs, &use_p, &use_stmt))
3424 return false;
3426 if (gimple_code (use_stmt) != GIMPLE_PHI
3427 || gimple_bb (use_stmt) != e->dest)
3428 return false;
3430 /* And that the rhs is defined in the same loop. */
3431 if (gimple_assign_cast_p (stmt))
3433 if (TREE_CODE (rhs) != SSA_NAME
3434 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3435 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3436 return false;
3438 else
3440 if (TREE_CODE (lhs) != SSA_NAME
3441 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3442 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3443 return false;
3446 return true;
3449 /* Return true if BB is suitable basic block for inter-bb range test
3450 optimization. If BACKWARD is true, BB should be the only predecessor
3451 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3452 or compared with to find a common basic block to which all conditions
3453 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3454 be the only predecessor of BB. */
3456 static bool
3457 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3458 bool backward)
3460 edge_iterator ei, ei2;
3461 edge e, e2;
3462 gimple *stmt;
3463 gphi_iterator gsi;
3464 bool other_edge_seen = false;
3465 bool is_cond;
3467 if (test_bb == bb)
3468 return false;
3469 /* Check last stmt first. */
3470 stmt = last_stmt (bb);
3471 if (stmt == NULL
3472 || (gimple_code (stmt) != GIMPLE_COND
3473 && (backward || !final_range_test_p (stmt)))
3474 || gimple_visited_p (stmt)
3475 || stmt_could_throw_p (stmt)
3476 || *other_bb == bb)
3477 return false;
3478 is_cond = gimple_code (stmt) == GIMPLE_COND;
3479 if (is_cond)
3481 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3482 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3483 to *OTHER_BB (if not set yet, try to find it out). */
3484 if (EDGE_COUNT (bb->succs) != 2)
3485 return false;
3486 FOR_EACH_EDGE (e, ei, bb->succs)
3488 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3489 return false;
3490 if (e->dest == test_bb)
3492 if (backward)
3493 continue;
3494 else
3495 return false;
3497 if (e->dest == bb)
3498 return false;
3499 if (*other_bb == NULL)
3501 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3502 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3503 return false;
3504 else if (e->dest == e2->dest)
3505 *other_bb = e->dest;
3506 if (*other_bb == NULL)
3507 return false;
3509 if (e->dest == *other_bb)
3510 other_edge_seen = true;
3511 else if (backward)
3512 return false;
3514 if (*other_bb == NULL || !other_edge_seen)
3515 return false;
3517 else if (single_succ (bb) != *other_bb)
3518 return false;
3520 /* Now check all PHIs of *OTHER_BB. */
3521 e = find_edge (bb, *other_bb);
3522 e2 = find_edge (test_bb, *other_bb);
3523 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3525 gphi *phi = gsi.phi ();
3526 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3527 corresponding to BB and TEST_BB predecessor must be the same. */
3528 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3529 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3531 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3532 one of the PHIs should have the lhs of the last stmt in
3533 that block as PHI arg and that PHI should have 0 or 1
3534 corresponding to it in all other range test basic blocks
3535 considered. */
3536 if (!is_cond)
3538 if (gimple_phi_arg_def (phi, e->dest_idx)
3539 == gimple_assign_lhs (stmt)
3540 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3541 || integer_onep (gimple_phi_arg_def (phi,
3542 e2->dest_idx))))
3543 continue;
3545 else
3547 gimple *test_last = last_stmt (test_bb);
3548 if (gimple_code (test_last) != GIMPLE_COND
3549 && gimple_phi_arg_def (phi, e2->dest_idx)
3550 == gimple_assign_lhs (test_last)
3551 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3552 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3553 continue;
3556 return false;
3559 return true;
3562 /* Return true if BB doesn't have side-effects that would disallow
3563 range test optimization, all SSA_NAMEs set in the bb are consumed
3564 in the bb and there are no PHIs. */
3566 static bool
3567 no_side_effect_bb (basic_block bb)
3569 gimple_stmt_iterator gsi;
3570 gimple *last;
3572 if (!gimple_seq_empty_p (phi_nodes (bb)))
3573 return false;
3574 last = last_stmt (bb);
3575 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3577 gimple *stmt = gsi_stmt (gsi);
3578 tree lhs;
3579 imm_use_iterator imm_iter;
3580 use_operand_p use_p;
3582 if (is_gimple_debug (stmt))
3583 continue;
3584 if (gimple_has_side_effects (stmt))
3585 return false;
3586 if (stmt == last)
3587 return true;
3588 if (!is_gimple_assign (stmt))
3589 return false;
3590 lhs = gimple_assign_lhs (stmt);
3591 if (TREE_CODE (lhs) != SSA_NAME)
3592 return false;
3593 if (gimple_assign_rhs_could_trap_p (stmt))
3594 return false;
3595 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3597 gimple *use_stmt = USE_STMT (use_p);
3598 if (is_gimple_debug (use_stmt))
3599 continue;
3600 if (gimple_bb (use_stmt) != bb)
3601 return false;
3604 return false;
3607 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3608 return true and fill in *OPS recursively. */
3610 static bool
3611 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3612 struct loop *loop)
3614 gimple *stmt = SSA_NAME_DEF_STMT (var);
3615 tree rhs[2];
3616 int i;
3618 if (!is_reassociable_op (stmt, code, loop))
3619 return false;
3621 rhs[0] = gimple_assign_rhs1 (stmt);
3622 rhs[1] = gimple_assign_rhs2 (stmt);
3623 gimple_set_visited (stmt, true);
3624 for (i = 0; i < 2; i++)
3625 if (TREE_CODE (rhs[i]) == SSA_NAME
3626 && !get_ops (rhs[i], code, ops, loop)
3627 && has_single_use (rhs[i]))
3629 operand_entry *oe = operand_entry_pool.allocate ();
3631 oe->op = rhs[i];
3632 oe->rank = code;
3633 oe->id = 0;
3634 oe->count = 1;
3635 oe->stmt_to_insert = NULL;
3636 ops->safe_push (oe);
3638 return true;
3641 /* Find the ops that were added by get_ops starting from VAR, see if
3642 they were changed during update_range_test and if yes, create new
3643 stmts. */
3645 static tree
3646 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3647 unsigned int *pidx, struct loop *loop)
3649 gimple *stmt = SSA_NAME_DEF_STMT (var);
3650 tree rhs[4];
3651 int i;
3653 if (!is_reassociable_op (stmt, code, loop))
3654 return NULL;
3656 rhs[0] = gimple_assign_rhs1 (stmt);
3657 rhs[1] = gimple_assign_rhs2 (stmt);
3658 rhs[2] = rhs[0];
3659 rhs[3] = rhs[1];
3660 for (i = 0; i < 2; i++)
3661 if (TREE_CODE (rhs[i]) == SSA_NAME)
3663 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3664 if (rhs[2 + i] == NULL_TREE)
3666 if (has_single_use (rhs[i]))
3667 rhs[2 + i] = ops[(*pidx)++]->op;
3668 else
3669 rhs[2 + i] = rhs[i];
3672 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3673 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3675 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3676 var = make_ssa_name (TREE_TYPE (var));
3677 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3678 rhs[2], rhs[3]);
3679 gimple_set_uid (g, gimple_uid (stmt));
3680 gimple_set_visited (g, true);
3681 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3683 return var;
3686 /* Structure to track the initial value passed to get_ops and
3687 the range in the ops vector for each basic block. */
3689 struct inter_bb_range_test_entry
3691 tree op;
3692 unsigned int first_idx, last_idx;
3695 /* Inter-bb range test optimization.
3697 Returns TRUE if a gimple conditional is optimized to a true/false,
3698 otherwise return FALSE.
3700 This indicates to the caller that it should run a CFG cleanup pass
3701 once reassociation is completed. */
3703 static bool
3704 maybe_optimize_range_tests (gimple *stmt)
3706 basic_block first_bb = gimple_bb (stmt);
3707 basic_block last_bb = first_bb;
3708 basic_block other_bb = NULL;
3709 basic_block bb;
3710 edge_iterator ei;
3711 edge e;
3712 auto_vec<operand_entry *> ops;
3713 auto_vec<inter_bb_range_test_entry> bbinfo;
3714 bool any_changes = false;
3715 bool cfg_cleanup_needed = false;
3717 /* Consider only basic blocks that end with GIMPLE_COND or
3718 a cast statement satisfying final_range_test_p. All
3719 but the last bb in the first_bb .. last_bb range
3720 should end with GIMPLE_COND. */
3721 if (gimple_code (stmt) == GIMPLE_COND)
3723 if (EDGE_COUNT (first_bb->succs) != 2)
3724 return cfg_cleanup_needed;
3726 else if (final_range_test_p (stmt))
3727 other_bb = single_succ (first_bb);
3728 else
3729 return cfg_cleanup_needed;
3731 if (stmt_could_throw_p (stmt))
3732 return cfg_cleanup_needed;
3734 /* As relative ordering of post-dominator sons isn't fixed,
3735 maybe_optimize_range_tests can be called first on any
3736 bb in the range we want to optimize. So, start searching
3737 backwards, if first_bb can be set to a predecessor. */
3738 while (single_pred_p (first_bb))
3740 basic_block pred_bb = single_pred (first_bb);
3741 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3742 break;
3743 if (!no_side_effect_bb (first_bb))
3744 break;
3745 first_bb = pred_bb;
3747 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3748 Before starting forward search in last_bb successors, find
3749 out the other_bb. */
3750 if (first_bb == last_bb)
3752 other_bb = NULL;
3753 /* As non-GIMPLE_COND last stmt always terminates the range,
3754 if forward search didn't discover anything, just give up. */
3755 if (gimple_code (stmt) != GIMPLE_COND)
3756 return cfg_cleanup_needed;
3757 /* Look at both successors. Either it ends with a GIMPLE_COND
3758 and satisfies suitable_cond_bb, or ends with a cast and
3759 other_bb is that cast's successor. */
3760 FOR_EACH_EDGE (e, ei, first_bb->succs)
3761 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3762 || e->dest == first_bb)
3763 return cfg_cleanup_needed;
3764 else if (single_pred_p (e->dest))
3766 stmt = last_stmt (e->dest);
3767 if (stmt
3768 && gimple_code (stmt) == GIMPLE_COND
3769 && EDGE_COUNT (e->dest->succs) == 2)
3771 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3772 break;
3773 else
3774 other_bb = NULL;
3776 else if (stmt
3777 && final_range_test_p (stmt)
3778 && find_edge (first_bb, single_succ (e->dest)))
3780 other_bb = single_succ (e->dest);
3781 if (other_bb == first_bb)
3782 other_bb = NULL;
3785 if (other_bb == NULL)
3786 return cfg_cleanup_needed;
3788 /* Now do the forward search, moving last_bb to successor bbs
3789 that aren't other_bb. */
3790 while (EDGE_COUNT (last_bb->succs) == 2)
3792 FOR_EACH_EDGE (e, ei, last_bb->succs)
3793 if (e->dest != other_bb)
3794 break;
3795 if (e == NULL)
3796 break;
3797 if (!single_pred_p (e->dest))
3798 break;
3799 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3800 break;
3801 if (!no_side_effect_bb (e->dest))
3802 break;
3803 last_bb = e->dest;
3805 if (first_bb == last_bb)
3806 return cfg_cleanup_needed;
3807 /* Here basic blocks first_bb through last_bb's predecessor
3808 end with GIMPLE_COND, all of them have one of the edges to
3809 other_bb and another to another block in the range,
3810 all blocks except first_bb don't have side-effects and
3811 last_bb ends with either GIMPLE_COND, or cast satisfying
3812 final_range_test_p. */
3813 for (bb = last_bb; ; bb = single_pred (bb))
3815 enum tree_code code;
3816 tree lhs, rhs;
3817 inter_bb_range_test_entry bb_ent;
3819 bb_ent.op = NULL_TREE;
3820 bb_ent.first_idx = ops.length ();
3821 bb_ent.last_idx = bb_ent.first_idx;
3822 e = find_edge (bb, other_bb);
3823 stmt = last_stmt (bb);
3824 gimple_set_visited (stmt, true);
3825 if (gimple_code (stmt) != GIMPLE_COND)
3827 use_operand_p use_p;
3828 gimple *phi;
3829 edge e2;
3830 unsigned int d;
3832 lhs = gimple_assign_lhs (stmt);
3833 rhs = gimple_assign_rhs1 (stmt);
3834 gcc_assert (bb == last_bb);
3836 /* stmt is
3837 _123 = (int) _234;
3839 _234 = a_2(D) == 2;
3841 followed by:
3842 <bb M>:
3843 # _345 = PHI <_123(N), 1(...), 1(...)>
3845 or 0 instead of 1. If it is 0, the _234
3846 range test is anded together with all the
3847 other range tests, if it is 1, it is ored with
3848 them. */
3849 single_imm_use (lhs, &use_p, &phi);
3850 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3851 e2 = find_edge (first_bb, other_bb);
3852 d = e2->dest_idx;
3853 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3854 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3855 code = BIT_AND_EXPR;
3856 else
3858 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3859 code = BIT_IOR_EXPR;
3862 /* If _234 SSA_NAME_DEF_STMT is
3863 _234 = _567 | _789;
3864 (or &, corresponding to 1/0 in the phi arguments,
3865 push into ops the individual range test arguments
3866 of the bitwise or resp. and, recursively. */
3867 if (TREE_CODE (rhs) == SSA_NAME
3868 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3869 != tcc_comparison)
3870 && !get_ops (rhs, code, &ops,
3871 loop_containing_stmt (stmt))
3872 && has_single_use (rhs))
3874 /* Otherwise, push the _234 range test itself. */
3875 operand_entry *oe = operand_entry_pool.allocate ();
3877 oe->op = rhs;
3878 oe->rank = code;
3879 oe->id = 0;
3880 oe->count = 1;
3881 oe->stmt_to_insert = NULL;
3882 ops.safe_push (oe);
3883 bb_ent.last_idx++;
3884 bb_ent.op = rhs;
3886 else if (is_gimple_assign (stmt)
3887 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3888 == tcc_comparison)
3889 && !get_ops (lhs, code, &ops,
3890 loop_containing_stmt (stmt))
3891 && has_single_use (lhs))
3893 operand_entry *oe = operand_entry_pool.allocate ();
3894 oe->op = lhs;
3895 oe->rank = code;
3896 oe->id = 0;
3897 oe->count = 1;
3898 ops.safe_push (oe);
3899 bb_ent.last_idx++;
3900 bb_ent.op = lhs;
3902 else
3904 bb_ent.last_idx = ops.length ();
3905 bb_ent.op = rhs;
3907 bbinfo.safe_push (bb_ent);
3908 continue;
3910 /* Otherwise stmt is GIMPLE_COND. */
3911 code = gimple_cond_code (stmt);
3912 lhs = gimple_cond_lhs (stmt);
3913 rhs = gimple_cond_rhs (stmt);
3914 if (TREE_CODE (lhs) == SSA_NAME
3915 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3916 && ((code != EQ_EXPR && code != NE_EXPR)
3917 || rhs != boolean_false_node
3918 /* Either push into ops the individual bitwise
3919 or resp. and operands, depending on which
3920 edge is other_bb. */
3921 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3922 ^ (code == EQ_EXPR))
3923 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3924 loop_containing_stmt (stmt))))
3926 /* Or push the GIMPLE_COND stmt itself. */
3927 operand_entry *oe = operand_entry_pool.allocate ();
3929 oe->op = NULL;
3930 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3931 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3932 /* oe->op = NULL signs that there is no SSA_NAME
3933 for the range test, and oe->id instead is the
3934 basic block number, at which's end the GIMPLE_COND
3935 is. */
3936 oe->id = bb->index;
3937 oe->count = 1;
3938 oe->stmt_to_insert = NULL;
3939 ops.safe_push (oe);
3940 bb_ent.op = NULL;
3941 bb_ent.last_idx++;
3943 else if (ops.length () > bb_ent.first_idx)
3945 bb_ent.op = lhs;
3946 bb_ent.last_idx = ops.length ();
3948 bbinfo.safe_push (bb_ent);
3949 if (bb == first_bb)
3950 break;
3952 if (ops.length () > 1)
3953 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3954 if (any_changes)
3956 unsigned int idx, max_idx = 0;
3957 /* update_ops relies on has_single_use predicates returning the
3958 same values as it did during get_ops earlier. Additionally it
3959 never removes statements, only adds new ones and it should walk
3960 from the single imm use and check the predicate already before
3961 making those changes.
3962 On the other side, the handling of GIMPLE_COND directly can turn
3963 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3964 it needs to be done in a separate loop afterwards. */
3965 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3967 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3968 && bbinfo[idx].op != NULL_TREE)
3970 tree new_op;
3972 max_idx = idx;
3973 stmt = last_stmt (bb);
3974 new_op = update_ops (bbinfo[idx].op,
3975 (enum tree_code)
3976 ops[bbinfo[idx].first_idx]->rank,
3977 ops, &bbinfo[idx].first_idx,
3978 loop_containing_stmt (stmt));
3979 if (new_op == NULL_TREE)
3981 gcc_assert (bb == last_bb);
3982 new_op = ops[bbinfo[idx].first_idx++]->op;
3984 if (bbinfo[idx].op != new_op)
3986 imm_use_iterator iter;
3987 use_operand_p use_p;
3988 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
3990 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3991 if (is_gimple_debug (use_stmt))
3992 continue;
3993 else if (gimple_code (use_stmt) == GIMPLE_COND
3994 || gimple_code (use_stmt) == GIMPLE_PHI)
3995 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3996 SET_USE (use_p, new_op);
3997 else if ((is_gimple_assign (use_stmt)
3998 && (TREE_CODE_CLASS
3999 (gimple_assign_rhs_code (use_stmt))
4000 == tcc_comparison)))
4001 cast_or_tcc_cmp_stmt = use_stmt;
4002 else if (gimple_assign_cast_p (use_stmt))
4003 cast_or_tcc_cmp_stmt = use_stmt;
4004 else
4005 gcc_unreachable ();
4007 if (cast_or_tcc_cmp_stmt)
4009 gcc_assert (bb == last_bb);
4010 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4011 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4012 enum tree_code rhs_code
4013 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4014 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4015 : CONVERT_EXPR;
4016 gassign *g;
4017 if (is_gimple_min_invariant (new_op))
4019 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4020 g = gimple_build_assign (new_lhs, new_op);
4022 else
4023 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4024 gimple_stmt_iterator gsi
4025 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4026 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4027 gimple_set_visited (g, true);
4028 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4029 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4030 if (is_gimple_debug (use_stmt))
4031 continue;
4032 else if (gimple_code (use_stmt) == GIMPLE_COND
4033 || gimple_code (use_stmt) == GIMPLE_PHI)
4034 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4035 SET_USE (use_p, new_lhs);
4036 else
4037 gcc_unreachable ();
4041 if (bb == first_bb)
4042 break;
4044 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4046 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4047 && bbinfo[idx].op == NULL_TREE
4048 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4050 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4052 if (idx > max_idx)
4053 max_idx = idx;
4055 /* If we collapse the conditional to a true/false
4056 condition, then bubble that knowledge up to our caller. */
4057 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4059 gimple_cond_make_false (cond_stmt);
4060 cfg_cleanup_needed = true;
4062 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4064 gimple_cond_make_true (cond_stmt);
4065 cfg_cleanup_needed = true;
4067 else
4069 gimple_cond_set_code (cond_stmt, NE_EXPR);
4070 gimple_cond_set_lhs (cond_stmt,
4071 ops[bbinfo[idx].first_idx]->op);
4072 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4074 update_stmt (cond_stmt);
4076 if (bb == first_bb)
4077 break;
4080 /* The above changes could result in basic blocks after the first
4081 modified one, up to and including last_bb, to be executed even if
4082 they would not be in the original program. If the value ranges of
4083 assignment lhs' in those bbs were dependent on the conditions
4084 guarding those basic blocks which now can change, the VRs might
4085 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4086 are only used within the same bb, it should be not a big deal if
4087 we just reset all the VRs in those bbs. See PR68671. */
4088 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4089 reset_flow_sensitive_info_in_bb (bb);
4091 return cfg_cleanup_needed;
4094 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4095 of STMT in it's operands. This is also known as a "destructive
4096 update" operation. */
4098 static bool
4099 is_phi_for_stmt (gimple *stmt, tree operand)
4101 gimple *def_stmt;
4102 gphi *def_phi;
4103 tree lhs;
4104 use_operand_p arg_p;
4105 ssa_op_iter i;
4107 if (TREE_CODE (operand) != SSA_NAME)
4108 return false;
4110 lhs = gimple_assign_lhs (stmt);
4112 def_stmt = SSA_NAME_DEF_STMT (operand);
4113 def_phi = dyn_cast <gphi *> (def_stmt);
4114 if (!def_phi)
4115 return false;
4117 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4118 if (lhs == USE_FROM_PTR (arg_p))
4119 return true;
4120 return false;
4123 /* Remove def stmt of VAR if VAR has zero uses and recurse
4124 on rhs1 operand if so. */
4126 static void
4127 remove_visited_stmt_chain (tree var)
4129 gimple *stmt;
4130 gimple_stmt_iterator gsi;
4132 while (1)
4134 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4135 return;
4136 stmt = SSA_NAME_DEF_STMT (var);
4137 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4139 var = gimple_assign_rhs1 (stmt);
4140 gsi = gsi_for_stmt (stmt);
4141 reassoc_remove_stmt (&gsi);
4142 release_defs (stmt);
4144 else
4145 return;
4149 /* This function checks three consequtive operands in
4150 passed operands vector OPS starting from OPINDEX and
4151 swaps two operands if it is profitable for binary operation
4152 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4154 We pair ops with the same rank if possible.
4156 The alternative we try is to see if STMT is a destructive
4157 update style statement, which is like:
4158 b = phi (a, ...)
4159 a = c + b;
4160 In that case, we want to use the destructive update form to
4161 expose the possible vectorizer sum reduction opportunity.
4162 In that case, the third operand will be the phi node. This
4163 check is not performed if STMT is null.
4165 We could, of course, try to be better as noted above, and do a
4166 lot of work to try to find these opportunities in >3 operand
4167 cases, but it is unlikely to be worth it. */
4169 static void
4170 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4171 unsigned int opindex, gimple *stmt)
4173 operand_entry *oe1, *oe2, *oe3;
4175 oe1 = ops[opindex];
4176 oe2 = ops[opindex + 1];
4177 oe3 = ops[opindex + 2];
4179 if ((oe1->rank == oe2->rank
4180 && oe2->rank != oe3->rank)
4181 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4182 && !is_phi_for_stmt (stmt, oe1->op)
4183 && !is_phi_for_stmt (stmt, oe2->op)))
4184 std::swap (*oe1, *oe3);
4185 else if ((oe1->rank == oe3->rank
4186 && oe2->rank != oe3->rank)
4187 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4188 && !is_phi_for_stmt (stmt, oe1->op)
4189 && !is_phi_for_stmt (stmt, oe3->op)))
4190 std::swap (*oe1, *oe2);
4193 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4194 two definitions, otherwise return STMT. */
4196 static inline gimple *
4197 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4199 if (TREE_CODE (rhs1) == SSA_NAME
4200 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4201 stmt = SSA_NAME_DEF_STMT (rhs1);
4202 if (TREE_CODE (rhs2) == SSA_NAME
4203 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4204 stmt = SSA_NAME_DEF_STMT (rhs2);
4205 return stmt;
4208 /* If the stmt that defines operand has to be inserted, insert it
4209 before the use. */
4210 static void
4211 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4213 gcc_assert (is_gimple_assign (stmt_to_insert));
4214 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4215 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4216 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4217 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4218 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4220 /* If the insert point is not stmt, then insert_point would be
4221 the point where operand rhs1 or rhs2 is defined. In this case,
4222 stmt_to_insert has to be inserted afterwards. This would
4223 only happen when the stmt insertion point is flexible. */
4224 if (stmt == insert_point)
4225 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4226 else
4227 insert_stmt_after (stmt_to_insert, insert_point);
4231 /* Recursively rewrite our linearized statements so that the operators
4232 match those in OPS[OPINDEX], putting the computation in rank
4233 order. Return new lhs.
4234 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4235 the current stmt and during recursive invocations.
4236 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4237 recursive invocations. */
4239 static tree
4240 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4241 vec<operand_entry *> ops, bool changed, bool next_changed)
4243 tree rhs1 = gimple_assign_rhs1 (stmt);
4244 tree rhs2 = gimple_assign_rhs2 (stmt);
4245 tree lhs = gimple_assign_lhs (stmt);
4246 operand_entry *oe;
4248 /* The final recursion case for this function is that you have
4249 exactly two operations left.
4250 If we had exactly one op in the entire list to start with, we
4251 would have never called this function, and the tail recursion
4252 rewrites them one at a time. */
4253 if (opindex + 2 == ops.length ())
4255 operand_entry *oe1, *oe2;
4257 oe1 = ops[opindex];
4258 oe2 = ops[opindex + 1];
4260 if (rhs1 != oe1->op || rhs2 != oe2->op)
4262 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4263 unsigned int uid = gimple_uid (stmt);
4265 if (dump_file && (dump_flags & TDF_DETAILS))
4267 fprintf (dump_file, "Transforming ");
4268 print_gimple_stmt (dump_file, stmt, 0);
4271 /* If the stmt that defines operand has to be inserted, insert it
4272 before the use. */
4273 if (oe1->stmt_to_insert)
4274 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4275 if (oe2->stmt_to_insert)
4276 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4277 /* Even when changed is false, reassociation could have e.g. removed
4278 some redundant operations, so unless we are just swapping the
4279 arguments or unless there is no change at all (then we just
4280 return lhs), force creation of a new SSA_NAME. */
4281 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4283 gimple *insert_point
4284 = find_insert_point (stmt, oe1->op, oe2->op);
4285 lhs = make_ssa_name (TREE_TYPE (lhs));
4286 stmt
4287 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4288 oe1->op, oe2->op);
4289 gimple_set_uid (stmt, uid);
4290 gimple_set_visited (stmt, true);
4291 if (insert_point == gsi_stmt (gsi))
4292 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4293 else
4294 insert_stmt_after (stmt, insert_point);
4296 else
4298 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4299 == stmt);
4300 gimple_assign_set_rhs1 (stmt, oe1->op);
4301 gimple_assign_set_rhs2 (stmt, oe2->op);
4302 update_stmt (stmt);
4305 if (rhs1 != oe1->op && rhs1 != oe2->op)
4306 remove_visited_stmt_chain (rhs1);
4308 if (dump_file && (dump_flags & TDF_DETAILS))
4310 fprintf (dump_file, " into ");
4311 print_gimple_stmt (dump_file, stmt, 0);
4314 return lhs;
4317 /* If we hit here, we should have 3 or more ops left. */
4318 gcc_assert (opindex + 2 < ops.length ());
4320 /* Rewrite the next operator. */
4321 oe = ops[opindex];
4323 /* If the stmt that defines operand has to be inserted, insert it
4324 before the use. */
4325 if (oe->stmt_to_insert)
4326 insert_stmt_before_use (stmt, oe->stmt_to_insert);
4328 /* Recurse on the LHS of the binary operator, which is guaranteed to
4329 be the non-leaf side. */
4330 tree new_rhs1
4331 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4332 changed || oe->op != rhs2 || next_changed,
4333 false);
4335 if (oe->op != rhs2 || new_rhs1 != rhs1)
4337 if (dump_file && (dump_flags & TDF_DETAILS))
4339 fprintf (dump_file, "Transforming ");
4340 print_gimple_stmt (dump_file, stmt, 0);
4343 /* If changed is false, this is either opindex == 0
4344 or all outer rhs2's were equal to corresponding oe->op,
4345 and powi_result is NULL.
4346 That means lhs is equivalent before and after reassociation.
4347 Otherwise ensure the old lhs SSA_NAME is not reused and
4348 create a new stmt as well, so that any debug stmts will be
4349 properly adjusted. */
4350 if (changed)
4352 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4353 unsigned int uid = gimple_uid (stmt);
4354 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4356 lhs = make_ssa_name (TREE_TYPE (lhs));
4357 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4358 new_rhs1, oe->op);
4359 gimple_set_uid (stmt, uid);
4360 gimple_set_visited (stmt, true);
4361 if (insert_point == gsi_stmt (gsi))
4362 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4363 else
4364 insert_stmt_after (stmt, insert_point);
4366 else
4368 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4369 == stmt);
4370 gimple_assign_set_rhs1 (stmt, new_rhs1);
4371 gimple_assign_set_rhs2 (stmt, oe->op);
4372 update_stmt (stmt);
4375 if (dump_file && (dump_flags & TDF_DETAILS))
4377 fprintf (dump_file, " into ");
4378 print_gimple_stmt (dump_file, stmt, 0);
4381 return lhs;
4384 /* Find out how many cycles we need to compute statements chain.
4385 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4386 maximum number of independent statements we may execute per cycle. */
4388 static int
4389 get_required_cycles (int ops_num, int cpu_width)
4391 int res;
4392 int elog;
4393 unsigned int rest;
4395 /* While we have more than 2 * cpu_width operands
4396 we may reduce number of operands by cpu_width
4397 per cycle. */
4398 res = ops_num / (2 * cpu_width);
4400 /* Remained operands count may be reduced twice per cycle
4401 until we have only one operand. */
4402 rest = (unsigned)(ops_num - res * cpu_width);
4403 elog = exact_log2 (rest);
4404 if (elog >= 0)
4405 res += elog;
4406 else
4407 res += floor_log2 (rest) + 1;
4409 return res;
4412 /* Returns an optimal number of registers to use for computation of
4413 given statements. */
4415 static int
4416 get_reassociation_width (int ops_num, enum tree_code opc,
4417 machine_mode mode)
4419 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4420 int width;
4421 int width_min;
4422 int cycles_best;
4424 if (param_width > 0)
4425 width = param_width;
4426 else
4427 width = targetm.sched.reassociation_width (opc, mode);
4429 if (width == 1)
4430 return width;
4432 /* Get the minimal time required for sequence computation. */
4433 cycles_best = get_required_cycles (ops_num, width);
4435 /* Check if we may use less width and still compute sequence for
4436 the same time. It will allow us to reduce registers usage.
4437 get_required_cycles is monotonically increasing with lower width
4438 so we can perform a binary search for the minimal width that still
4439 results in the optimal cycle count. */
4440 width_min = 1;
4441 while (width > width_min)
4443 int width_mid = (width + width_min) / 2;
4445 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4446 width = width_mid;
4447 else if (width_min < width_mid)
4448 width_min = width_mid;
4449 else
4450 break;
4453 return width;
4456 /* Recursively rewrite our linearized statements so that the operators
4457 match those in OPS[OPINDEX], putting the computation in rank
4458 order and trying to allow operations to be executed in
4459 parallel. */
4461 static void
4462 rewrite_expr_tree_parallel (gassign *stmt, int width,
4463 vec<operand_entry *> ops)
4465 enum tree_code opcode = gimple_assign_rhs_code (stmt);
4466 int op_num = ops.length ();
4467 gcc_assert (op_num > 0);
4468 int stmt_num = op_num - 1;
4469 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4470 int op_index = op_num - 1;
4471 int stmt_index = 0;
4472 int ready_stmts_end = 0;
4473 int i = 0;
4474 gimple *stmt1 = NULL, *stmt2 = NULL;
4475 tree last_rhs1 = gimple_assign_rhs1 (stmt);
4477 /* We start expression rewriting from the top statements.
4478 So, in this loop we create a full list of statements
4479 we will work with. */
4480 stmts[stmt_num - 1] = stmt;
4481 for (i = stmt_num - 2; i >= 0; i--)
4482 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4484 for (i = 0; i < stmt_num; i++)
4486 tree op1, op2;
4488 /* Determine whether we should use results of
4489 already handled statements or not. */
4490 if (ready_stmts_end == 0
4491 && (i - stmt_index >= width || op_index < 1))
4492 ready_stmts_end = i;
4494 /* Now we choose operands for the next statement. Non zero
4495 value in ready_stmts_end means here that we should use
4496 the result of already generated statements as new operand. */
4497 if (ready_stmts_end > 0)
4499 op1 = gimple_assign_lhs (stmts[stmt_index++]);
4500 if (ready_stmts_end > stmt_index)
4501 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4502 else if (op_index >= 0)
4504 operand_entry *oe = ops[op_index--];
4505 stmt2 = oe->stmt_to_insert;
4506 op2 = oe->op;
4508 else
4510 gcc_assert (stmt_index < i);
4511 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4514 if (stmt_index >= ready_stmts_end)
4515 ready_stmts_end = 0;
4517 else
4519 if (op_index > 1)
4520 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4521 operand_entry *oe2 = ops[op_index--];
4522 operand_entry *oe1 = ops[op_index--];
4523 op2 = oe2->op;
4524 stmt2 = oe2->stmt_to_insert;
4525 op1 = oe1->op;
4526 stmt1 = oe1->stmt_to_insert;
4529 /* If we emit the last statement then we should put
4530 operands into the last statement. It will also
4531 break the loop. */
4532 if (op_index < 0 && stmt_index == i)
4533 i = stmt_num - 1;
4535 if (dump_file && (dump_flags & TDF_DETAILS))
4537 fprintf (dump_file, "Transforming ");
4538 print_gimple_stmt (dump_file, stmts[i], 0);
4541 /* If the stmt that defines operand has to be inserted, insert it
4542 before the use. */
4543 if (stmt1)
4544 insert_stmt_before_use (stmts[i], stmt1);
4545 if (stmt2)
4546 insert_stmt_before_use (stmts[i], stmt2);
4547 stmt1 = stmt2 = NULL;
4549 /* We keep original statement only for the last one. All
4550 others are recreated. */
4551 if (i == stmt_num - 1)
4553 gimple_assign_set_rhs1 (stmts[i], op1);
4554 gimple_assign_set_rhs2 (stmts[i], op2);
4555 update_stmt (stmts[i]);
4557 else
4559 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4561 if (dump_file && (dump_flags & TDF_DETAILS))
4563 fprintf (dump_file, " into ");
4564 print_gimple_stmt (dump_file, stmts[i], 0);
4568 remove_visited_stmt_chain (last_rhs1);
4571 /* Transform STMT, which is really (A +B) + (C + D) into the left
4572 linear form, ((A+B)+C)+D.
4573 Recurse on D if necessary. */
4575 static void
4576 linearize_expr (gimple *stmt)
4578 gimple_stmt_iterator gsi;
4579 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4580 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4581 gimple *oldbinrhs = binrhs;
4582 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4583 gimple *newbinrhs = NULL;
4584 struct loop *loop = loop_containing_stmt (stmt);
4585 tree lhs = gimple_assign_lhs (stmt);
4587 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4588 && is_reassociable_op (binrhs, rhscode, loop));
4590 gsi = gsi_for_stmt (stmt);
4592 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4593 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4594 gimple_assign_rhs_code (binrhs),
4595 gimple_assign_lhs (binlhs),
4596 gimple_assign_rhs2 (binrhs));
4597 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4598 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4599 gimple_set_uid (binrhs, gimple_uid (stmt));
4601 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4602 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4604 if (dump_file && (dump_flags & TDF_DETAILS))
4606 fprintf (dump_file, "Linearized: ");
4607 print_gimple_stmt (dump_file, stmt, 0);
4610 reassociate_stats.linearized++;
4611 update_stmt (stmt);
4613 gsi = gsi_for_stmt (oldbinrhs);
4614 reassoc_remove_stmt (&gsi);
4615 release_defs (oldbinrhs);
4617 gimple_set_visited (stmt, true);
4618 gimple_set_visited (binlhs, true);
4619 gimple_set_visited (binrhs, true);
4621 /* Tail recurse on the new rhs if it still needs reassociation. */
4622 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4623 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4624 want to change the algorithm while converting to tuples. */
4625 linearize_expr (stmt);
4628 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4629 it. Otherwise, return NULL. */
4631 static gimple *
4632 get_single_immediate_use (tree lhs)
4634 use_operand_p immuse;
4635 gimple *immusestmt;
4637 if (TREE_CODE (lhs) == SSA_NAME
4638 && single_imm_use (lhs, &immuse, &immusestmt)
4639 && is_gimple_assign (immusestmt))
4640 return immusestmt;
4642 return NULL;
4645 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4646 representing the negated value. Insertions of any necessary
4647 instructions go before GSI.
4648 This function is recursive in that, if you hand it "a_5" as the
4649 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4650 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4652 static tree
4653 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4655 gimple *negatedefstmt = NULL;
4656 tree resultofnegate;
4657 gimple_stmt_iterator gsi;
4658 unsigned int uid;
4660 /* If we are trying to negate a name, defined by an add, negate the
4661 add operands instead. */
4662 if (TREE_CODE (tonegate) == SSA_NAME)
4663 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4664 if (TREE_CODE (tonegate) == SSA_NAME
4665 && is_gimple_assign (negatedefstmt)
4666 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4667 && has_single_use (gimple_assign_lhs (negatedefstmt))
4668 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4670 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4671 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4672 tree lhs = gimple_assign_lhs (negatedefstmt);
4673 gimple *g;
4675 gsi = gsi_for_stmt (negatedefstmt);
4676 rhs1 = negate_value (rhs1, &gsi);
4678 gsi = gsi_for_stmt (negatedefstmt);
4679 rhs2 = negate_value (rhs2, &gsi);
4681 gsi = gsi_for_stmt (negatedefstmt);
4682 lhs = make_ssa_name (TREE_TYPE (lhs));
4683 gimple_set_visited (negatedefstmt, true);
4684 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4685 gimple_set_uid (g, gimple_uid (negatedefstmt));
4686 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4687 return lhs;
4690 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4691 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4692 NULL_TREE, true, GSI_SAME_STMT);
4693 gsi = *gsip;
4694 uid = gimple_uid (gsi_stmt (gsi));
4695 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4697 gimple *stmt = gsi_stmt (gsi);
4698 if (gimple_uid (stmt) != 0)
4699 break;
4700 gimple_set_uid (stmt, uid);
4702 return resultofnegate;
4705 /* Return true if we should break up the subtract in STMT into an add
4706 with negate. This is true when we the subtract operands are really
4707 adds, or the subtract itself is used in an add expression. In
4708 either case, breaking up the subtract into an add with negate
4709 exposes the adds to reassociation. */
4711 static bool
4712 should_break_up_subtract (gimple *stmt)
4714 tree lhs = gimple_assign_lhs (stmt);
4715 tree binlhs = gimple_assign_rhs1 (stmt);
4716 tree binrhs = gimple_assign_rhs2 (stmt);
4717 gimple *immusestmt;
4718 struct loop *loop = loop_containing_stmt (stmt);
4720 if (TREE_CODE (binlhs) == SSA_NAME
4721 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4722 return true;
4724 if (TREE_CODE (binrhs) == SSA_NAME
4725 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4726 return true;
4728 if (TREE_CODE (lhs) == SSA_NAME
4729 && (immusestmt = get_single_immediate_use (lhs))
4730 && is_gimple_assign (immusestmt)
4731 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4732 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
4733 && gimple_assign_rhs1 (immusestmt) == lhs)
4734 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4735 return true;
4736 return false;
4739 /* Transform STMT from A - B into A + -B. */
4741 static void
4742 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4744 tree rhs1 = gimple_assign_rhs1 (stmt);
4745 tree rhs2 = gimple_assign_rhs2 (stmt);
4747 if (dump_file && (dump_flags & TDF_DETAILS))
4749 fprintf (dump_file, "Breaking up subtract ");
4750 print_gimple_stmt (dump_file, stmt, 0);
4753 rhs2 = negate_value (rhs2, gsip);
4754 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4755 update_stmt (stmt);
4758 /* Determine whether STMT is a builtin call that raises an SSA name
4759 to an integer power and has only one use. If so, and this is early
4760 reassociation and unsafe math optimizations are permitted, place
4761 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4762 If any of these conditions does not hold, return FALSE. */
4764 static bool
4765 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4767 tree arg1;
4768 REAL_VALUE_TYPE c, cint;
4770 switch (gimple_call_combined_fn (stmt))
4772 CASE_CFN_POW:
4773 if (flag_errno_math)
4774 return false;
4776 *base = gimple_call_arg (stmt, 0);
4777 arg1 = gimple_call_arg (stmt, 1);
4779 if (TREE_CODE (arg1) != REAL_CST)
4780 return false;
4782 c = TREE_REAL_CST (arg1);
4784 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4785 return false;
4787 *exponent = real_to_integer (&c);
4788 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4789 if (!real_identical (&c, &cint))
4790 return false;
4792 break;
4794 CASE_CFN_POWI:
4795 *base = gimple_call_arg (stmt, 0);
4796 arg1 = gimple_call_arg (stmt, 1);
4798 if (!tree_fits_shwi_p (arg1))
4799 return false;
4801 *exponent = tree_to_shwi (arg1);
4802 break;
4804 default:
4805 return false;
4808 /* Expanding negative exponents is generally unproductive, so we don't
4809 complicate matters with those. Exponents of zero and one should
4810 have been handled by expression folding. */
4811 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4812 return false;
4814 return true;
4817 /* Try to derive and add operand entry for OP to *OPS. Return false if
4818 unsuccessful. */
4820 static bool
4821 try_special_add_to_ops (vec<operand_entry *> *ops,
4822 enum tree_code code,
4823 tree op, gimple* def_stmt)
4825 tree base = NULL_TREE;
4826 HOST_WIDE_INT exponent = 0;
4828 if (TREE_CODE (op) != SSA_NAME
4829 || ! has_single_use (op))
4830 return false;
4832 if (code == MULT_EXPR
4833 && reassoc_insert_powi_p
4834 && flag_unsafe_math_optimizations
4835 && is_gimple_call (def_stmt)
4836 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
4838 add_repeat_to_ops_vec (ops, base, exponent);
4839 gimple_set_visited (def_stmt, true);
4840 return true;
4842 else if (code == MULT_EXPR
4843 && is_gimple_assign (def_stmt)
4844 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
4845 && !HONOR_SNANS (TREE_TYPE (op))
4846 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
4847 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
4849 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4850 tree cst = build_minus_one_cst (TREE_TYPE (op));
4851 add_to_ops_vec (ops, rhs1);
4852 add_to_ops_vec (ops, cst);
4853 gimple_set_visited (def_stmt, true);
4854 return true;
4857 return false;
4860 /* Recursively linearize a binary expression that is the RHS of STMT.
4861 Place the operands of the expression tree in the vector named OPS. */
4863 static void
4864 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4865 bool is_associative, bool set_visited)
4867 tree binlhs = gimple_assign_rhs1 (stmt);
4868 tree binrhs = gimple_assign_rhs2 (stmt);
4869 gimple *binlhsdef = NULL, *binrhsdef = NULL;
4870 bool binlhsisreassoc = false;
4871 bool binrhsisreassoc = false;
4872 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4873 struct loop *loop = loop_containing_stmt (stmt);
4875 if (set_visited)
4876 gimple_set_visited (stmt, true);
4878 if (TREE_CODE (binlhs) == SSA_NAME)
4880 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4881 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4882 && !stmt_could_throw_p (binlhsdef));
4885 if (TREE_CODE (binrhs) == SSA_NAME)
4887 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4888 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4889 && !stmt_could_throw_p (binrhsdef));
4892 /* If the LHS is not reassociable, but the RHS is, we need to swap
4893 them. If neither is reassociable, there is nothing we can do, so
4894 just put them in the ops vector. If the LHS is reassociable,
4895 linearize it. If both are reassociable, then linearize the RHS
4896 and the LHS. */
4898 if (!binlhsisreassoc)
4900 /* If this is not a associative operation like division, give up. */
4901 if (!is_associative)
4903 add_to_ops_vec (ops, binrhs);
4904 return;
4907 if (!binrhsisreassoc)
4909 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4910 add_to_ops_vec (ops, binrhs);
4912 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
4913 add_to_ops_vec (ops, binlhs);
4915 return;
4918 if (dump_file && (dump_flags & TDF_DETAILS))
4920 fprintf (dump_file, "swapping operands of ");
4921 print_gimple_stmt (dump_file, stmt, 0);
4924 swap_ssa_operands (stmt,
4925 gimple_assign_rhs1_ptr (stmt),
4926 gimple_assign_rhs2_ptr (stmt));
4927 update_stmt (stmt);
4929 if (dump_file && (dump_flags & TDF_DETAILS))
4931 fprintf (dump_file, " is now ");
4932 print_gimple_stmt (dump_file, stmt, 0);
4935 /* We want to make it so the lhs is always the reassociative op,
4936 so swap. */
4937 std::swap (binlhs, binrhs);
4939 else if (binrhsisreassoc)
4941 linearize_expr (stmt);
4942 binlhs = gimple_assign_rhs1 (stmt);
4943 binrhs = gimple_assign_rhs2 (stmt);
4946 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4947 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4948 rhscode, loop));
4949 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4950 is_associative, set_visited);
4952 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4953 add_to_ops_vec (ops, binrhs);
4956 /* Repropagate the negates back into subtracts, since no other pass
4957 currently does it. */
4959 static void
4960 repropagate_negates (void)
4962 unsigned int i = 0;
4963 tree negate;
4965 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4967 gimple *user = get_single_immediate_use (negate);
4969 if (!user || !is_gimple_assign (user))
4970 continue;
4972 /* The negate operand can be either operand of a PLUS_EXPR
4973 (it can be the LHS if the RHS is a constant for example).
4975 Force the negate operand to the RHS of the PLUS_EXPR, then
4976 transform the PLUS_EXPR into a MINUS_EXPR. */
4977 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4979 /* If the negated operand appears on the LHS of the
4980 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4981 to force the negated operand to the RHS of the PLUS_EXPR. */
4982 if (gimple_assign_rhs1 (user) == negate)
4984 swap_ssa_operands (user,
4985 gimple_assign_rhs1_ptr (user),
4986 gimple_assign_rhs2_ptr (user));
4989 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4990 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4991 if (gimple_assign_rhs2 (user) == negate)
4993 tree rhs1 = gimple_assign_rhs1 (user);
4994 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
4995 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4996 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4997 update_stmt (user);
5000 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5002 if (gimple_assign_rhs1 (user) == negate)
5004 /* We have
5005 x = -a
5006 y = x - b
5007 which we transform into
5008 x = a + b
5009 y = -x .
5010 This pushes down the negate which we possibly can merge
5011 into some other operation, hence insert it into the
5012 plus_negates vector. */
5013 gimple *feed = SSA_NAME_DEF_STMT (negate);
5014 tree a = gimple_assign_rhs1 (feed);
5015 tree b = gimple_assign_rhs2 (user);
5016 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5017 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5018 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5019 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5020 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5021 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5022 user = gsi_stmt (gsi2);
5023 update_stmt (user);
5024 reassoc_remove_stmt (&gsi);
5025 release_defs (feed);
5026 plus_negates.safe_push (gimple_assign_lhs (user));
5028 else
5030 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5031 rid of one operation. */
5032 gimple *feed = SSA_NAME_DEF_STMT (negate);
5033 tree a = gimple_assign_rhs1 (feed);
5034 tree rhs1 = gimple_assign_rhs1 (user);
5035 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5036 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5037 update_stmt (gsi_stmt (gsi));
5043 /* Returns true if OP is of a type for which we can do reassociation.
5044 That is for integral or non-saturating fixed-point types, and for
5045 floating point type when associative-math is enabled. */
5047 static bool
5048 can_reassociate_p (tree op)
5050 tree type = TREE_TYPE (op);
5051 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5052 return false;
5053 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5054 || NON_SAT_FIXED_POINT_TYPE_P (type)
5055 || (flag_associative_math && FLOAT_TYPE_P (type)))
5056 return true;
5057 return false;
5060 /* Break up subtract operations in block BB.
5062 We do this top down because we don't know whether the subtract is
5063 part of a possible chain of reassociation except at the top.
5065 IE given
5066 d = f + g
5067 c = a + e
5068 b = c - d
5069 q = b - r
5070 k = t - q
5072 we want to break up k = t - q, but we won't until we've transformed q
5073 = b - r, which won't be broken up until we transform b = c - d.
5075 En passant, clear the GIMPLE visited flag on every statement
5076 and set UIDs within each basic block. */
5078 static void
5079 break_up_subtract_bb (basic_block bb)
5081 gimple_stmt_iterator gsi;
5082 basic_block son;
5083 unsigned int uid = 1;
5085 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5087 gimple *stmt = gsi_stmt (gsi);
5088 gimple_set_visited (stmt, false);
5089 gimple_set_uid (stmt, uid++);
5091 if (!is_gimple_assign (stmt)
5092 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5093 continue;
5095 /* Look for simple gimple subtract operations. */
5096 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5098 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5099 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5100 continue;
5102 /* Check for a subtract used only in an addition. If this
5103 is the case, transform it into add of a negate for better
5104 reassociation. IE transform C = A-B into C = A + -B if C
5105 is only used in an addition. */
5106 if (should_break_up_subtract (stmt))
5107 break_up_subtract (stmt, &gsi);
5109 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5110 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5111 plus_negates.safe_push (gimple_assign_lhs (stmt));
5113 for (son = first_dom_son (CDI_DOMINATORS, bb);
5114 son;
5115 son = next_dom_son (CDI_DOMINATORS, son))
5116 break_up_subtract_bb (son);
5119 /* Used for repeated factor analysis. */
5120 struct repeat_factor
5122 /* An SSA name that occurs in a multiply chain. */
5123 tree factor;
5125 /* Cached rank of the factor. */
5126 unsigned rank;
5128 /* Number of occurrences of the factor in the chain. */
5129 HOST_WIDE_INT count;
5131 /* An SSA name representing the product of this factor and
5132 all factors appearing later in the repeated factor vector. */
5133 tree repr;
5137 static vec<repeat_factor> repeat_factor_vec;
5139 /* Used for sorting the repeat factor vector. Sort primarily by
5140 ascending occurrence count, secondarily by descending rank. */
5142 static int
5143 compare_repeat_factors (const void *x1, const void *x2)
5145 const repeat_factor *rf1 = (const repeat_factor *) x1;
5146 const repeat_factor *rf2 = (const repeat_factor *) x2;
5148 if (rf1->count != rf2->count)
5149 return rf1->count - rf2->count;
5151 return rf2->rank - rf1->rank;
5154 /* Look for repeated operands in OPS in the multiply tree rooted at
5155 STMT. Replace them with an optimal sequence of multiplies and powi
5156 builtin calls, and remove the used operands from OPS. Return an
5157 SSA name representing the value of the replacement sequence. */
5159 static tree
5160 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5162 unsigned i, j, vec_len;
5163 int ii;
5164 operand_entry *oe;
5165 repeat_factor *rf1, *rf2;
5166 repeat_factor rfnew;
5167 tree result = NULL_TREE;
5168 tree target_ssa, iter_result;
5169 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5170 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5171 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5172 gimple *mul_stmt, *pow_stmt;
5174 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5175 target. */
5176 if (!powi_fndecl)
5177 return NULL_TREE;
5179 /* Allocate the repeated factor vector. */
5180 repeat_factor_vec.create (10);
5182 /* Scan the OPS vector for all SSA names in the product and build
5183 up a vector of occurrence counts for each factor. */
5184 FOR_EACH_VEC_ELT (*ops, i, oe)
5186 if (TREE_CODE (oe->op) == SSA_NAME)
5188 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5190 if (rf1->factor == oe->op)
5192 rf1->count += oe->count;
5193 break;
5197 if (j >= repeat_factor_vec.length ())
5199 rfnew.factor = oe->op;
5200 rfnew.rank = oe->rank;
5201 rfnew.count = oe->count;
5202 rfnew.repr = NULL_TREE;
5203 repeat_factor_vec.safe_push (rfnew);
5208 /* Sort the repeated factor vector by (a) increasing occurrence count,
5209 and (b) decreasing rank. */
5210 repeat_factor_vec.qsort (compare_repeat_factors);
5212 /* It is generally best to combine as many base factors as possible
5213 into a product before applying __builtin_powi to the result.
5214 However, the sort order chosen for the repeated factor vector
5215 allows us to cache partial results for the product of the base
5216 factors for subsequent use. When we already have a cached partial
5217 result from a previous iteration, it is best to make use of it
5218 before looking for another __builtin_pow opportunity.
5220 As an example, consider x * x * y * y * y * z * z * z * z.
5221 We want to first compose the product x * y * z, raise it to the
5222 second power, then multiply this by y * z, and finally multiply
5223 by z. This can be done in 5 multiplies provided we cache y * z
5224 for use in both expressions:
5226 t1 = y * z
5227 t2 = t1 * x
5228 t3 = t2 * t2
5229 t4 = t1 * t3
5230 result = t4 * z
5232 If we instead ignored the cached y * z and first multiplied by
5233 the __builtin_pow opportunity z * z, we would get the inferior:
5235 t1 = y * z
5236 t2 = t1 * x
5237 t3 = t2 * t2
5238 t4 = z * z
5239 t5 = t3 * t4
5240 result = t5 * y */
5242 vec_len = repeat_factor_vec.length ();
5244 /* Repeatedly look for opportunities to create a builtin_powi call. */
5245 while (true)
5247 HOST_WIDE_INT power;
5249 /* First look for the largest cached product of factors from
5250 preceding iterations. If found, create a builtin_powi for
5251 it if the minimum occurrence count for its factors is at
5252 least 2, or just use this cached product as our next
5253 multiplicand if the minimum occurrence count is 1. */
5254 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5256 if (rf1->repr && rf1->count > 0)
5257 break;
5260 if (j < vec_len)
5262 power = rf1->count;
5264 if (power == 1)
5266 iter_result = rf1->repr;
5268 if (dump_file && (dump_flags & TDF_DETAILS))
5270 unsigned elt;
5271 repeat_factor *rf;
5272 fputs ("Multiplying by cached product ", dump_file);
5273 for (elt = j; elt < vec_len; elt++)
5275 rf = &repeat_factor_vec[elt];
5276 print_generic_expr (dump_file, rf->factor);
5277 if (elt < vec_len - 1)
5278 fputs (" * ", dump_file);
5280 fputs ("\n", dump_file);
5283 else
5285 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5286 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5287 build_int_cst (integer_type_node,
5288 power));
5289 gimple_call_set_lhs (pow_stmt, iter_result);
5290 gimple_set_location (pow_stmt, gimple_location (stmt));
5291 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5292 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5294 if (dump_file && (dump_flags & TDF_DETAILS))
5296 unsigned elt;
5297 repeat_factor *rf;
5298 fputs ("Building __builtin_pow call for cached product (",
5299 dump_file);
5300 for (elt = j; elt < vec_len; elt++)
5302 rf = &repeat_factor_vec[elt];
5303 print_generic_expr (dump_file, rf->factor);
5304 if (elt < vec_len - 1)
5305 fputs (" * ", dump_file);
5307 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5308 power);
5312 else
5314 /* Otherwise, find the first factor in the repeated factor
5315 vector whose occurrence count is at least 2. If no such
5316 factor exists, there are no builtin_powi opportunities
5317 remaining. */
5318 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5320 if (rf1->count >= 2)
5321 break;
5324 if (j >= vec_len)
5325 break;
5327 power = rf1->count;
5329 if (dump_file && (dump_flags & TDF_DETAILS))
5331 unsigned elt;
5332 repeat_factor *rf;
5333 fputs ("Building __builtin_pow call for (", dump_file);
5334 for (elt = j; elt < vec_len; elt++)
5336 rf = &repeat_factor_vec[elt];
5337 print_generic_expr (dump_file, rf->factor);
5338 if (elt < vec_len - 1)
5339 fputs (" * ", dump_file);
5341 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5344 reassociate_stats.pows_created++;
5346 /* Visit each element of the vector in reverse order (so that
5347 high-occurrence elements are visited first, and within the
5348 same occurrence count, lower-ranked elements are visited
5349 first). Form a linear product of all elements in this order
5350 whose occurrencce count is at least that of element J.
5351 Record the SSA name representing the product of each element
5352 with all subsequent elements in the vector. */
5353 if (j == vec_len - 1)
5354 rf1->repr = rf1->factor;
5355 else
5357 for (ii = vec_len - 2; ii >= (int)j; ii--)
5359 tree op1, op2;
5361 rf1 = &repeat_factor_vec[ii];
5362 rf2 = &repeat_factor_vec[ii + 1];
5364 /* Init the last factor's representative to be itself. */
5365 if (!rf2->repr)
5366 rf2->repr = rf2->factor;
5368 op1 = rf1->factor;
5369 op2 = rf2->repr;
5371 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5372 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5373 op1, op2);
5374 gimple_set_location (mul_stmt, gimple_location (stmt));
5375 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5376 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5377 rf1->repr = target_ssa;
5379 /* Don't reprocess the multiply we just introduced. */
5380 gimple_set_visited (mul_stmt, true);
5384 /* Form a call to __builtin_powi for the maximum product
5385 just formed, raised to the power obtained earlier. */
5386 rf1 = &repeat_factor_vec[j];
5387 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5388 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5389 build_int_cst (integer_type_node,
5390 power));
5391 gimple_call_set_lhs (pow_stmt, iter_result);
5392 gimple_set_location (pow_stmt, gimple_location (stmt));
5393 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5394 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5397 /* If we previously formed at least one other builtin_powi call,
5398 form the product of this one and those others. */
5399 if (result)
5401 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5402 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5403 result, iter_result);
5404 gimple_set_location (mul_stmt, gimple_location (stmt));
5405 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5406 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5407 gimple_set_visited (mul_stmt, true);
5408 result = new_result;
5410 else
5411 result = iter_result;
5413 /* Decrement the occurrence count of each element in the product
5414 by the count found above, and remove this many copies of each
5415 factor from OPS. */
5416 for (i = j; i < vec_len; i++)
5418 unsigned k = power;
5419 unsigned n;
5421 rf1 = &repeat_factor_vec[i];
5422 rf1->count -= power;
5424 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5426 if (oe->op == rf1->factor)
5428 if (oe->count <= k)
5430 ops->ordered_remove (n);
5431 k -= oe->count;
5433 if (k == 0)
5434 break;
5436 else
5438 oe->count -= k;
5439 break;
5446 /* At this point all elements in the repeated factor vector have a
5447 remaining occurrence count of 0 or 1, and those with a count of 1
5448 don't have cached representatives. Re-sort the ops vector and
5449 clean up. */
5450 ops->qsort (sort_by_operand_rank);
5451 repeat_factor_vec.release ();
5453 /* Return the final product computed herein. Note that there may
5454 still be some elements with single occurrence count left in OPS;
5455 those will be handled by the normal reassociation logic. */
5456 return result;
5459 /* Attempt to optimize
5460 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5461 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5463 static void
5464 attempt_builtin_copysign (vec<operand_entry *> *ops)
5466 operand_entry *oe;
5467 unsigned int i;
5468 unsigned int length = ops->length ();
5469 tree cst = ops->last ()->op;
5471 if (length == 1 || TREE_CODE (cst) != REAL_CST)
5472 return;
5474 FOR_EACH_VEC_ELT (*ops, i, oe)
5476 if (TREE_CODE (oe->op) == SSA_NAME
5477 && has_single_use (oe->op))
5479 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5480 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5482 tree arg0, arg1;
5483 switch (gimple_call_combined_fn (old_call))
5485 CASE_CFN_COPYSIGN:
5486 arg0 = gimple_call_arg (old_call, 0);
5487 arg1 = gimple_call_arg (old_call, 1);
5488 /* The first argument of copysign must be a constant,
5489 otherwise there's nothing to do. */
5490 if (TREE_CODE (arg0) == REAL_CST)
5492 tree type = TREE_TYPE (arg0);
5493 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5494 /* If we couldn't fold to a single constant, skip it.
5495 That happens e.g. for inexact multiplication when
5496 -frounding-math. */
5497 if (mul == NULL_TREE)
5498 break;
5499 /* Instead of adjusting OLD_CALL, let's build a new
5500 call to not leak the LHS and prevent keeping bogus
5501 debug statements. DCE will clean up the old call. */
5502 gcall *new_call;
5503 if (gimple_call_internal_p (old_call))
5504 new_call = gimple_build_call_internal
5505 (IFN_COPYSIGN, 2, mul, arg1);
5506 else
5507 new_call = gimple_build_call
5508 (gimple_call_fndecl (old_call), 2, mul, arg1);
5509 tree lhs = make_ssa_name (type);
5510 gimple_call_set_lhs (new_call, lhs);
5511 gimple_set_location (new_call,
5512 gimple_location (old_call));
5513 insert_stmt_after (new_call, old_call);
5514 /* We've used the constant, get rid of it. */
5515 ops->pop ();
5516 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5517 /* Handle the CST1 < 0 case by negating the result. */
5518 if (cst1_neg)
5520 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5521 gimple *negate_stmt
5522 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5523 insert_stmt_after (negate_stmt, new_call);
5524 oe->op = negrhs;
5526 else
5527 oe->op = lhs;
5528 if (dump_file && (dump_flags & TDF_DETAILS))
5530 fprintf (dump_file, "Optimizing copysign: ");
5531 print_generic_expr (dump_file, cst);
5532 fprintf (dump_file, " * COPYSIGN (");
5533 print_generic_expr (dump_file, arg0);
5534 fprintf (dump_file, ", ");
5535 print_generic_expr (dump_file, arg1);
5536 fprintf (dump_file, ") into %sCOPYSIGN (",
5537 cst1_neg ? "-" : "");
5538 print_generic_expr (dump_file, mul);
5539 fprintf (dump_file, ", ");
5540 print_generic_expr (dump_file, arg1);
5541 fprintf (dump_file, "\n");
5543 return;
5545 break;
5546 default:
5547 break;
5554 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5556 static void
5557 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5559 tree rhs1;
5561 if (dump_file && (dump_flags & TDF_DETAILS))
5563 fprintf (dump_file, "Transforming ");
5564 print_gimple_stmt (dump_file, stmt, 0);
5567 rhs1 = gimple_assign_rhs1 (stmt);
5568 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5569 update_stmt (stmt);
5570 remove_visited_stmt_chain (rhs1);
5572 if (dump_file && (dump_flags & TDF_DETAILS))
5574 fprintf (dump_file, " into ");
5575 print_gimple_stmt (dump_file, stmt, 0);
5579 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5581 static void
5582 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5583 tree rhs1, tree rhs2)
5585 if (dump_file && (dump_flags & TDF_DETAILS))
5587 fprintf (dump_file, "Transforming ");
5588 print_gimple_stmt (dump_file, stmt, 0);
5591 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5592 update_stmt (gsi_stmt (*gsi));
5593 remove_visited_stmt_chain (rhs1);
5595 if (dump_file && (dump_flags & TDF_DETAILS))
5597 fprintf (dump_file, " into ");
5598 print_gimple_stmt (dump_file, stmt, 0);
5602 /* Reassociate expressions in basic block BB and its post-dominator as
5603 children.
5605 Bubble up return status from maybe_optimize_range_tests. */
5607 static bool
5608 reassociate_bb (basic_block bb)
5610 gimple_stmt_iterator gsi;
5611 basic_block son;
5612 gimple *stmt = last_stmt (bb);
5613 bool cfg_cleanup_needed = false;
5615 if (stmt && !gimple_visited_p (stmt))
5616 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5618 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5620 stmt = gsi_stmt (gsi);
5622 if (is_gimple_assign (stmt)
5623 && !stmt_could_throw_p (stmt))
5625 tree lhs, rhs1, rhs2;
5626 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5628 /* If this is not a gimple binary expression, there is
5629 nothing for us to do with it. */
5630 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5631 continue;
5633 /* If this was part of an already processed statement,
5634 we don't need to touch it again. */
5635 if (gimple_visited_p (stmt))
5637 /* This statement might have become dead because of previous
5638 reassociations. */
5639 if (has_zero_uses (gimple_get_lhs (stmt)))
5641 reassoc_remove_stmt (&gsi);
5642 release_defs (stmt);
5643 /* We might end up removing the last stmt above which
5644 places the iterator to the end of the sequence.
5645 Reset it to the last stmt in this case which might
5646 be the end of the sequence as well if we removed
5647 the last statement of the sequence. In which case
5648 we need to bail out. */
5649 if (gsi_end_p (gsi))
5651 gsi = gsi_last_bb (bb);
5652 if (gsi_end_p (gsi))
5653 break;
5656 continue;
5659 lhs = gimple_assign_lhs (stmt);
5660 rhs1 = gimple_assign_rhs1 (stmt);
5661 rhs2 = gimple_assign_rhs2 (stmt);
5663 /* For non-bit or min/max operations we can't associate
5664 all types. Verify that here. */
5665 if (rhs_code != BIT_IOR_EXPR
5666 && rhs_code != BIT_AND_EXPR
5667 && rhs_code != BIT_XOR_EXPR
5668 && rhs_code != MIN_EXPR
5669 && rhs_code != MAX_EXPR
5670 && (!can_reassociate_p (lhs)
5671 || !can_reassociate_p (rhs1)
5672 || !can_reassociate_p (rhs2)))
5673 continue;
5675 if (associative_tree_code (rhs_code))
5677 auto_vec<operand_entry *> ops;
5678 tree powi_result = NULL_TREE;
5679 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5681 /* There may be no immediate uses left by the time we
5682 get here because we may have eliminated them all. */
5683 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5684 continue;
5686 gimple_set_visited (stmt, true);
5687 linearize_expr_tree (&ops, stmt, true, true);
5688 ops.qsort (sort_by_operand_rank);
5689 int orig_len = ops.length ();
5690 optimize_ops_list (rhs_code, &ops);
5691 if (undistribute_ops_list (rhs_code, &ops,
5692 loop_containing_stmt (stmt)))
5694 ops.qsort (sort_by_operand_rank);
5695 optimize_ops_list (rhs_code, &ops);
5698 if (rhs_code == PLUS_EXPR
5699 && transform_add_to_multiply (&ops))
5700 ops.qsort (sort_by_operand_rank);
5702 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5704 if (is_vector)
5705 optimize_vec_cond_expr (rhs_code, &ops);
5706 else
5707 optimize_range_tests (rhs_code, &ops);
5710 if (rhs_code == MULT_EXPR && !is_vector)
5712 attempt_builtin_copysign (&ops);
5714 if (reassoc_insert_powi_p
5715 && flag_unsafe_math_optimizations)
5716 powi_result = attempt_builtin_powi (stmt, &ops);
5719 operand_entry *last;
5720 bool negate_result = false;
5721 if (ops.length () > 1
5722 && rhs_code == MULT_EXPR)
5724 last = ops.last ();
5725 if ((integer_minus_onep (last->op)
5726 || real_minus_onep (last->op))
5727 && !HONOR_SNANS (TREE_TYPE (lhs))
5728 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5729 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5731 ops.pop ();
5732 negate_result = true;
5736 tree new_lhs = lhs;
5737 /* If the operand vector is now empty, all operands were
5738 consumed by the __builtin_powi optimization. */
5739 if (ops.length () == 0)
5740 transform_stmt_to_copy (&gsi, stmt, powi_result);
5741 else if (ops.length () == 1)
5743 tree last_op = ops.last ()->op;
5745 /* If the stmt that defines operand has to be inserted, insert it
5746 before the use. */
5747 if (ops.last ()->stmt_to_insert)
5748 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5749 if (powi_result)
5750 transform_stmt_to_multiply (&gsi, stmt, last_op,
5751 powi_result);
5752 else
5753 transform_stmt_to_copy (&gsi, stmt, last_op);
5755 else
5757 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5758 int ops_num = ops.length ();
5759 int width = get_reassociation_width (ops_num, rhs_code, mode);
5761 if (dump_file && (dump_flags & TDF_DETAILS))
5762 fprintf (dump_file,
5763 "Width = %d was chosen for reassociation\n", width);
5765 if (width > 1
5766 && ops.length () > 3)
5767 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5768 width, ops);
5769 else
5771 /* When there are three operands left, we want
5772 to make sure the ones that get the double
5773 binary op are chosen wisely. */
5774 int len = ops.length ();
5775 if (len >= 3)
5776 swap_ops_for_binary_stmt (ops, len - 3, stmt);
5778 new_lhs = rewrite_expr_tree (stmt, 0, ops,
5779 powi_result != NULL
5780 || negate_result,
5781 len != orig_len);
5784 /* If we combined some repeated factors into a
5785 __builtin_powi call, multiply that result by the
5786 reassociated operands. */
5787 if (powi_result)
5789 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5790 tree type = TREE_TYPE (lhs);
5791 tree target_ssa = make_temp_ssa_name (type, NULL,
5792 "reassocpow");
5793 gimple_set_lhs (lhs_stmt, target_ssa);
5794 update_stmt (lhs_stmt);
5795 if (lhs != new_lhs)
5797 target_ssa = new_lhs;
5798 new_lhs = lhs;
5800 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5801 powi_result, target_ssa);
5802 gimple_set_location (mul_stmt, gimple_location (stmt));
5803 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5804 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5808 if (negate_result)
5810 stmt = SSA_NAME_DEF_STMT (lhs);
5811 tree tmp = make_ssa_name (TREE_TYPE (lhs));
5812 gimple_set_lhs (stmt, tmp);
5813 if (lhs != new_lhs)
5814 tmp = new_lhs;
5815 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
5816 tmp);
5817 gimple_set_uid (neg_stmt, gimple_uid (stmt));
5818 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
5819 update_stmt (stmt);
5824 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5825 son;
5826 son = next_dom_son (CDI_POST_DOMINATORS, son))
5827 cfg_cleanup_needed |= reassociate_bb (son);
5829 return cfg_cleanup_needed;
5832 /* Add jumps around shifts for range tests turned into bit tests.
5833 For each SSA_NAME VAR we have code like:
5834 VAR = ...; // final stmt of range comparison
5835 // bit test here...;
5836 OTHERVAR = ...; // final stmt of the bit test sequence
5837 RES = VAR | OTHERVAR;
5838 Turn the above into:
5839 VAR = ...;
5840 if (VAR != 0)
5841 goto <l3>;
5842 else
5843 goto <l2>;
5844 <l2>:
5845 // bit test here...;
5846 OTHERVAR = ...;
5847 <l3>:
5848 # RES = PHI<1(l1), OTHERVAR(l2)>; */
5850 static void
5851 branch_fixup (void)
5853 tree var;
5854 unsigned int i;
5856 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
5858 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
5859 gimple *use_stmt;
5860 use_operand_p use;
5861 bool ok = single_imm_use (var, &use, &use_stmt);
5862 gcc_assert (ok
5863 && is_gimple_assign (use_stmt)
5864 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5865 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5867 basic_block cond_bb = gimple_bb (def_stmt);
5868 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5869 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5871 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5872 gimple *g = gimple_build_cond (NE_EXPR, var,
5873 build_zero_cst (TREE_TYPE (var)),
5874 NULL_TREE, NULL_TREE);
5875 location_t loc = gimple_location (use_stmt);
5876 gimple_set_location (g, loc);
5877 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5879 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5880 etrue->probability = profile_probability::even ();
5881 etrue->count = cond_bb->count.apply_scale (1, 2);
5882 edge efalse = find_edge (cond_bb, then_bb);
5883 efalse->flags = EDGE_FALSE_VALUE;
5884 efalse->probability -= etrue->probability;
5885 efalse->count -= etrue->count;
5886 then_bb->count -= etrue->count;
5888 tree othervar = NULL_TREE;
5889 if (gimple_assign_rhs1 (use_stmt) == var)
5890 othervar = gimple_assign_rhs2 (use_stmt);
5891 else if (gimple_assign_rhs2 (use_stmt) == var)
5892 othervar = gimple_assign_rhs1 (use_stmt);
5893 else
5894 gcc_unreachable ();
5895 tree lhs = gimple_assign_lhs (use_stmt);
5896 gphi *phi = create_phi_node (lhs, merge_bb);
5897 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5898 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5899 gsi = gsi_for_stmt (use_stmt);
5900 gsi_remove (&gsi, true);
5902 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5903 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5905 reassoc_branch_fixups.release ();
5908 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5909 void debug_ops_vector (vec<operand_entry *> ops);
5911 /* Dump the operand entry vector OPS to FILE. */
5913 void
5914 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5916 operand_entry *oe;
5917 unsigned int i;
5919 FOR_EACH_VEC_ELT (ops, i, oe)
5921 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5922 print_generic_expr (file, oe->op);
5923 fprintf (file, "\n");
5927 /* Dump the operand entry vector OPS to STDERR. */
5929 DEBUG_FUNCTION void
5930 debug_ops_vector (vec<operand_entry *> ops)
5932 dump_ops_vector (stderr, ops);
5935 /* Bubble up return status from reassociate_bb. */
5937 static bool
5938 do_reassoc (void)
5940 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5941 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5944 /* Initialize the reassociation pass. */
5946 static void
5947 init_reassoc (void)
5949 int i;
5950 long rank = 2;
5951 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5953 /* Find the loops, so that we can prevent moving calculations in
5954 them. */
5955 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5957 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5959 next_operand_entry_id = 0;
5961 /* Reverse RPO (Reverse Post Order) will give us something where
5962 deeper loops come later. */
5963 pre_and_rev_post_order_compute (NULL, bbs, false);
5964 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5965 operand_rank = new hash_map<tree, long>;
5967 /* Give each default definition a distinct rank. This includes
5968 parameters and the static chain. Walk backwards over all
5969 SSA names so that we get proper rank ordering according
5970 to tree_swap_operands_p. */
5971 for (i = num_ssa_names - 1; i > 0; --i)
5973 tree name = ssa_name (i);
5974 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5975 insert_operand_rank (name, ++rank);
5978 /* Set up rank for each BB */
5979 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5980 bb_rank[bbs[i]] = ++rank << 16;
5982 free (bbs);
5983 calculate_dominance_info (CDI_POST_DOMINATORS);
5984 plus_negates = vNULL;
5987 /* Cleanup after the reassociation pass, and print stats if
5988 requested. */
5990 static void
5991 fini_reassoc (void)
5993 statistics_counter_event (cfun, "Linearized",
5994 reassociate_stats.linearized);
5995 statistics_counter_event (cfun, "Constants eliminated",
5996 reassociate_stats.constants_eliminated);
5997 statistics_counter_event (cfun, "Ops eliminated",
5998 reassociate_stats.ops_eliminated);
5999 statistics_counter_event (cfun, "Statements rewritten",
6000 reassociate_stats.rewritten);
6001 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6002 reassociate_stats.pows_encountered);
6003 statistics_counter_event (cfun, "Built-in powi calls created",
6004 reassociate_stats.pows_created);
6006 delete operand_rank;
6007 operand_entry_pool.release ();
6008 free (bb_rank);
6009 plus_negates.release ();
6010 free_dominance_info (CDI_POST_DOMINATORS);
6011 loop_optimizer_finalize ();
6014 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6015 insertion of __builtin_powi calls.
6017 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6018 optimization of a gimple conditional. Otherwise returns zero. */
6020 static unsigned int
6021 execute_reassoc (bool insert_powi_p)
6023 reassoc_insert_powi_p = insert_powi_p;
6025 init_reassoc ();
6027 bool cfg_cleanup_needed;
6028 cfg_cleanup_needed = do_reassoc ();
6029 repropagate_negates ();
6030 branch_fixup ();
6032 fini_reassoc ();
6033 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6036 namespace {
6038 const pass_data pass_data_reassoc =
6040 GIMPLE_PASS, /* type */
6041 "reassoc", /* name */
6042 OPTGROUP_NONE, /* optinfo_flags */
6043 TV_TREE_REASSOC, /* tv_id */
6044 ( PROP_cfg | PROP_ssa ), /* properties_required */
6045 0, /* properties_provided */
6046 0, /* properties_destroyed */
6047 0, /* todo_flags_start */
6048 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6051 class pass_reassoc : public gimple_opt_pass
6053 public:
6054 pass_reassoc (gcc::context *ctxt)
6055 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6058 /* opt_pass methods: */
6059 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
6060 void set_pass_param (unsigned int n, bool param)
6062 gcc_assert (n == 0);
6063 insert_powi_p = param;
6065 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
6066 virtual unsigned int execute (function *)
6067 { return execute_reassoc (insert_powi_p); }
6069 private:
6070 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6071 point 3a in the pass header comment. */
6072 bool insert_powi_p;
6073 }; // class pass_reassoc
6075 } // anon namespace
6077 gimple_opt_pass *
6078 make_pass_reassoc (gcc::context *ctxt)
6080 return new pass_reassoc (ctxt);