* testsuite/26_numerics/headers/cmath/hypot.cc: XFAIL on AIX.
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobf781c5ff28c4f3aee0e7cfd3aae31f403dd28795
1 /* Reassociation for trees.
2 Copyright (C) 2005-2016 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "params.h"
52 #include "builtins.h"
53 #include "gimplify.h"
54 #include "case-cfn-macros.h"
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
60 It consists of five steps:
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_*
70 3. Optimization of the operand lists, eliminating things like a +
71 -a, a & a, etc.
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
80 5. Repropagate negates, as nothing else will clean it up ATM.
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
101 a (1), b (1), c (1), d (2), e (2)
104 We start with our merge worklist empty, and the ops list with all of
105 those on it.
107 You want to first merge all leaves of the same rank, as much as
108 possible.
110 So first build a binary op of
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
122 Then build a binary op of d + e
123 mergetmp2 = d + e
125 and put mergetmp2 on the merge worklist.
127 so merge worklist = {mergetmp, c, mergetmp2}
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
132 So we have
134 build binary op
135 mergetmp3 = mergetmp + c
137 worklist = {mergetmp2, mergetmp3}
139 mergetmp4 = mergetmp2 + mergetmp3
141 worklist = {mergetmp4}
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
152 So why don't we do this?
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
160 mergetmp = op1 + op2
161 newstmt = mergetmp + op3
163 instead of
164 mergetmp = op2 + op3
165 newstmt = mergetmp + op1
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
169 can do.
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179 static bool reassoc_insert_powi_p;
181 /* Statistics */
182 static struct
184 int linearized;
185 int constants_eliminated;
186 int ops_eliminated;
187 int rewritten;
188 int pows_encountered;
189 int pows_created;
190 } reassociate_stats;
192 /* Operator, rank pair. */
193 struct operand_entry
195 unsigned int rank;
196 int id;
197 tree op;
198 unsigned int count;
199 gimple *stmt_to_insert;
202 static object_allocator<operand_entry> operand_entry_pool
203 ("operand entry pool");
205 /* This is used to assign a unique ID to each struct operand_entry
206 so that qsort results are identical on different hosts. */
207 static int next_operand_entry_id;
209 /* Starting rank number for a given basic block, so that we can rank
210 operations using unmovable instructions in that BB based on the bb
211 depth. */
212 static long *bb_rank;
214 /* Operand->rank hashtable. */
215 static hash_map<tree, long> *operand_rank;
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218 all basic blocks the CFG should be adjusted - basic blocks
219 split right after that SSA_NAME's definition statement and before
220 the only use, which must be a bit ior. */
221 static vec<tree> reassoc_branch_fixups;
223 /* Forward decls. */
224 static long get_rank (tree);
225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228 possibly added by gsi_remove. */
230 bool
231 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
233 gimple *stmt = gsi_stmt (*gsi);
235 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
236 return gsi_remove (gsi, true);
238 gimple_stmt_iterator prev = *gsi;
239 gsi_prev (&prev);
240 unsigned uid = gimple_uid (stmt);
241 basic_block bb = gimple_bb (stmt);
242 bool ret = gsi_remove (gsi, true);
243 if (!gsi_end_p (prev))
244 gsi_next (&prev);
245 else
246 prev = gsi_start_bb (bb);
247 gimple *end_stmt = gsi_stmt (*gsi);
248 while ((stmt = gsi_stmt (prev)) != end_stmt)
250 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
251 gimple_set_uid (stmt, uid);
252 gsi_next (&prev);
254 return ret;
257 /* Bias amount for loop-carried phis. We want this to be larger than
258 the depth of any reassociation tree we can see, but not larger than
259 the rank difference between two blocks. */
260 #define PHI_LOOP_BIAS (1 << 15)
262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
263 an innermost loop, and the phi has only a single use which is inside
264 the loop, then the rank is the block rank of the loop latch plus an
265 extra bias for the loop-carried dependence. This causes expressions
266 calculated into an accumulator variable to be independent for each
267 iteration of the loop. If STMT is some other phi, the rank is the
268 block rank of its containing block. */
269 static long
270 phi_rank (gimple *stmt)
272 basic_block bb = gimple_bb (stmt);
273 struct loop *father = bb->loop_father;
274 tree res;
275 unsigned i;
276 use_operand_p use;
277 gimple *use_stmt;
279 /* We only care about real loops (those with a latch). */
280 if (!father->latch)
281 return bb_rank[bb->index];
283 /* Interesting phis must be in headers of innermost loops. */
284 if (bb != father->header
285 || father->inner)
286 return bb_rank[bb->index];
288 /* Ignore virtual SSA_NAMEs. */
289 res = gimple_phi_result (stmt);
290 if (virtual_operand_p (res))
291 return bb_rank[bb->index];
293 /* The phi definition must have a single use, and that use must be
294 within the loop. Otherwise this isn't an accumulator pattern. */
295 if (!single_imm_use (res, &use, &use_stmt)
296 || gimple_bb (use_stmt)->loop_father != father)
297 return bb_rank[bb->index];
299 /* Look for phi arguments from within the loop. If found, bias this phi. */
300 for (i = 0; i < gimple_phi_num_args (stmt); i++)
302 tree arg = gimple_phi_arg_def (stmt, i);
303 if (TREE_CODE (arg) == SSA_NAME
304 && !SSA_NAME_IS_DEFAULT_DEF (arg))
306 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
307 if (gimple_bb (def_stmt)->loop_father == father)
308 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
312 /* Must be an uninteresting phi. */
313 return bb_rank[bb->index];
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317 loop-carried dependence of an innermost loop, return TRUE; else
318 return FALSE. */
319 static bool
320 loop_carried_phi (tree exp)
322 gimple *phi_stmt;
323 long block_rank;
325 if (TREE_CODE (exp) != SSA_NAME
326 || SSA_NAME_IS_DEFAULT_DEF (exp))
327 return false;
329 phi_stmt = SSA_NAME_DEF_STMT (exp);
331 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
332 return false;
334 /* Non-loop-carried phis have block rank. Loop-carried phis have
335 an additional bias added in. If this phi doesn't have block rank,
336 it's biased and should not be propagated. */
337 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
339 if (phi_rank (phi_stmt) != block_rank)
340 return true;
342 return false;
345 /* Return the maximum of RANK and the rank that should be propagated
346 from expression OP. For most operands, this is just the rank of OP.
347 For loop-carried phis, the value is zero to avoid undoing the bias
348 in favor of the phi. */
349 static long
350 propagate_rank (long rank, tree op)
352 long op_rank;
354 if (loop_carried_phi (op))
355 return rank;
357 op_rank = get_rank (op);
359 return MAX (rank, op_rank);
362 /* Look up the operand rank structure for expression E. */
364 static inline long
365 find_operand_rank (tree e)
367 long *slot = operand_rank->get (e);
368 return slot ? *slot : -1;
371 /* Insert {E,RANK} into the operand rank hashtable. */
373 static inline void
374 insert_operand_rank (tree e, long rank)
376 gcc_assert (rank > 0);
377 gcc_assert (!operand_rank->put (e, rank));
380 /* Given an expression E, return the rank of the expression. */
382 static long
383 get_rank (tree e)
385 /* SSA_NAME's have the rank of the expression they are the result
387 For globals and uninitialized values, the rank is 0.
388 For function arguments, use the pre-setup rank.
389 For PHI nodes, stores, asm statements, etc, we use the rank of
390 the BB.
391 For simple operations, the rank is the maximum rank of any of
392 its operands, or the bb_rank, whichever is less.
393 I make no claims that this is optimal, however, it gives good
394 results. */
396 /* We make an exception to the normal ranking system to break
397 dependences of accumulator variables in loops. Suppose we
398 have a simple one-block loop containing:
400 x_1 = phi(x_0, x_2)
401 b = a + x_1
402 c = b + d
403 x_2 = c + e
405 As shown, each iteration of the calculation into x is fully
406 dependent upon the iteration before it. We would prefer to
407 see this in the form:
409 x_1 = phi(x_0, x_2)
410 b = a + d
411 c = b + e
412 x_2 = c + x_1
414 If the loop is unrolled, the calculations of b and c from
415 different iterations can be interleaved.
417 To obtain this result during reassociation, we bias the rank
418 of the phi definition x_1 upward, when it is recognized as an
419 accumulator pattern. The artificial rank causes it to be
420 added last, providing the desired independence. */
422 if (TREE_CODE (e) == SSA_NAME)
424 ssa_op_iter iter;
425 gimple *stmt;
426 long rank;
427 tree op;
429 if (SSA_NAME_IS_DEFAULT_DEF (e))
430 return find_operand_rank (e);
432 stmt = SSA_NAME_DEF_STMT (e);
433 if (gimple_code (stmt) == GIMPLE_PHI)
434 return phi_rank (stmt);
436 if (!is_gimple_assign (stmt))
437 return bb_rank[gimple_bb (stmt)->index];
439 /* If we already have a rank for this expression, use that. */
440 rank = find_operand_rank (e);
441 if (rank != -1)
442 return rank;
444 /* Otherwise, find the maximum rank for the operands. As an
445 exception, remove the bias from loop-carried phis when propagating
446 the rank so that dependent operations are not also biased. */
447 /* Simply walk over all SSA uses - this takes advatage of the
448 fact that non-SSA operands are is_gimple_min_invariant and
449 thus have rank 0. */
450 rank = 0;
451 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
452 rank = propagate_rank (rank, op);
454 if (dump_file && (dump_flags & TDF_DETAILS))
456 fprintf (dump_file, "Rank for ");
457 print_generic_expr (dump_file, e, 0);
458 fprintf (dump_file, " is %ld\n", (rank + 1));
461 /* Note the rank in the hashtable so we don't recompute it. */
462 insert_operand_rank (e, (rank + 1));
463 return (rank + 1);
466 /* Constants, globals, etc., are rank 0 */
467 return 0;
471 /* We want integer ones to end up last no matter what, since they are
472 the ones we can do the most with. */
473 #define INTEGER_CONST_TYPE 1 << 3
474 #define FLOAT_CONST_TYPE 1 << 2
475 #define OTHER_CONST_TYPE 1 << 1
477 /* Classify an invariant tree into integer, float, or other, so that
478 we can sort them to be near other constants of the same type. */
479 static inline int
480 constant_type (tree t)
482 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
483 return INTEGER_CONST_TYPE;
484 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
485 return FLOAT_CONST_TYPE;
486 else
487 return OTHER_CONST_TYPE;
490 /* qsort comparison function to sort operand entries PA and PB by rank
491 so that the sorted array is ordered by rank in decreasing order. */
492 static int
493 sort_by_operand_rank (const void *pa, const void *pb)
495 const operand_entry *oea = *(const operand_entry *const *)pa;
496 const operand_entry *oeb = *(const operand_entry *const *)pb;
498 /* It's nicer for optimize_expression if constants that are likely
499 to fold when added/multiplied//whatever are put next to each
500 other. Since all constants have rank 0, order them by type. */
501 if (oeb->rank == 0 && oea->rank == 0)
503 if (constant_type (oeb->op) != constant_type (oea->op))
504 return constant_type (oeb->op) - constant_type (oea->op);
505 else
506 /* To make sorting result stable, we use unique IDs to determine
507 order. */
508 return oeb->id - oea->id;
511 /* Lastly, make sure the versions that are the same go next to each
512 other. */
513 if ((oeb->rank - oea->rank == 0)
514 && TREE_CODE (oea->op) == SSA_NAME
515 && TREE_CODE (oeb->op) == SSA_NAME)
517 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
518 versions of removed SSA_NAMEs, so if possible, prefer to sort
519 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
520 See PR60418. */
521 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
522 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
523 && !oea->stmt_to_insert
524 && !oeb->stmt_to_insert
525 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
527 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
528 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
529 basic_block bba = gimple_bb (stmta);
530 basic_block bbb = gimple_bb (stmtb);
531 if (bbb != bba)
533 if (bb_rank[bbb->index] != bb_rank[bba->index])
534 return bb_rank[bbb->index] - bb_rank[bba->index];
536 else
538 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
539 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
540 if (da != db)
541 return da ? 1 : -1;
545 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
546 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
547 else
548 return oeb->id - oea->id;
551 if (oeb->rank != oea->rank)
552 return oeb->rank - oea->rank;
553 else
554 return oeb->id - oea->id;
557 /* Add an operand entry to *OPS for the tree operand OP. */
559 static void
560 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
562 operand_entry *oe = operand_entry_pool.allocate ();
564 oe->op = op;
565 oe->rank = get_rank (op);
566 oe->id = next_operand_entry_id++;
567 oe->count = 1;
568 oe->stmt_to_insert = stmt_to_insert;
569 ops->safe_push (oe);
572 /* Add an operand entry to *OPS for the tree operand OP with repeat
573 count REPEAT. */
575 static void
576 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
577 HOST_WIDE_INT repeat)
579 operand_entry *oe = operand_entry_pool.allocate ();
581 oe->op = op;
582 oe->rank = get_rank (op);
583 oe->id = next_operand_entry_id++;
584 oe->count = repeat;
585 oe->stmt_to_insert = NULL;
586 ops->safe_push (oe);
588 reassociate_stats.pows_encountered++;
591 /* Return true if STMT is reassociable operation containing a binary
592 operation with tree code CODE, and is inside LOOP. */
594 static bool
595 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
597 basic_block bb = gimple_bb (stmt);
599 if (gimple_bb (stmt) == NULL)
600 return false;
602 if (!flow_bb_inside_loop_p (loop, bb))
603 return false;
605 if (is_gimple_assign (stmt)
606 && gimple_assign_rhs_code (stmt) == code
607 && has_single_use (gimple_assign_lhs (stmt)))
608 return true;
610 return false;
614 /* Return true if STMT is a nop-conversion. */
616 static bool
617 gimple_nop_conversion_p (gimple *stmt)
619 if (gassign *ass = dyn_cast <gassign *> (stmt))
621 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
622 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
623 TREE_TYPE (gimple_assign_rhs1 (ass))))
624 return true;
626 return false;
629 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
630 operand of the negate operation. Otherwise, return NULL. */
632 static tree
633 get_unary_op (tree name, enum tree_code opcode)
635 gimple *stmt = SSA_NAME_DEF_STMT (name);
637 /* Look through nop conversions (sign changes). */
638 if (gimple_nop_conversion_p (stmt)
639 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
640 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
642 if (!is_gimple_assign (stmt))
643 return NULL_TREE;
645 if (gimple_assign_rhs_code (stmt) == opcode)
646 return gimple_assign_rhs1 (stmt);
647 return NULL_TREE;
650 /* Return true if OP1 and OP2 have the same value if casted to either type. */
652 static bool
653 ops_equal_values_p (tree op1, tree op2)
655 if (op1 == op2)
656 return true;
658 tree orig_op1 = op1;
659 if (TREE_CODE (op1) == SSA_NAME)
661 gimple *stmt = SSA_NAME_DEF_STMT (op1);
662 if (gimple_nop_conversion_p (stmt))
664 op1 = gimple_assign_rhs1 (stmt);
665 if (op1 == op2)
666 return true;
670 if (TREE_CODE (op2) == SSA_NAME)
672 gimple *stmt = SSA_NAME_DEF_STMT (op2);
673 if (gimple_nop_conversion_p (stmt))
675 op2 = gimple_assign_rhs1 (stmt);
676 if (op1 == op2
677 || orig_op1 == op2)
678 return true;
682 return false;
686 /* If CURR and LAST are a pair of ops that OPCODE allows us to
687 eliminate through equivalences, do so, remove them from OPS, and
688 return true. Otherwise, return false. */
690 static bool
691 eliminate_duplicate_pair (enum tree_code opcode,
692 vec<operand_entry *> *ops,
693 bool *all_done,
694 unsigned int i,
695 operand_entry *curr,
696 operand_entry *last)
699 /* If we have two of the same op, and the opcode is & |, min, or max,
700 we can eliminate one of them.
701 If we have two of the same op, and the opcode is ^, we can
702 eliminate both of them. */
704 if (last && last->op == curr->op)
706 switch (opcode)
708 case MAX_EXPR:
709 case MIN_EXPR:
710 case BIT_IOR_EXPR:
711 case BIT_AND_EXPR:
712 if (dump_file && (dump_flags & TDF_DETAILS))
714 fprintf (dump_file, "Equivalence: ");
715 print_generic_expr (dump_file, curr->op, 0);
716 fprintf (dump_file, " [&|minmax] ");
717 print_generic_expr (dump_file, last->op, 0);
718 fprintf (dump_file, " -> ");
719 print_generic_stmt (dump_file, last->op, 0);
722 ops->ordered_remove (i);
723 reassociate_stats.ops_eliminated ++;
725 return true;
727 case BIT_XOR_EXPR:
728 if (dump_file && (dump_flags & TDF_DETAILS))
730 fprintf (dump_file, "Equivalence: ");
731 print_generic_expr (dump_file, curr->op, 0);
732 fprintf (dump_file, " ^ ");
733 print_generic_expr (dump_file, last->op, 0);
734 fprintf (dump_file, " -> nothing\n");
737 reassociate_stats.ops_eliminated += 2;
739 if (ops->length () == 2)
741 ops->truncate (0);
742 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
743 *all_done = true;
745 else
747 ops->ordered_remove (i-1);
748 ops->ordered_remove (i-1);
751 return true;
753 default:
754 break;
757 return false;
760 static vec<tree> plus_negates;
762 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
763 expression, look in OPS for a corresponding positive operation to cancel
764 it out. If we find one, remove the other from OPS, replace
765 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
766 return false. */
768 static bool
769 eliminate_plus_minus_pair (enum tree_code opcode,
770 vec<operand_entry *> *ops,
771 unsigned int currindex,
772 operand_entry *curr)
774 tree negateop;
775 tree notop;
776 unsigned int i;
777 operand_entry *oe;
779 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
780 return false;
782 negateop = get_unary_op (curr->op, NEGATE_EXPR);
783 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
784 if (negateop == NULL_TREE && notop == NULL_TREE)
785 return false;
787 /* Any non-negated version will have a rank that is one less than
788 the current rank. So once we hit those ranks, if we don't find
789 one, we can stop. */
791 for (i = currindex + 1;
792 ops->iterate (i, &oe)
793 && oe->rank >= curr->rank - 1 ;
794 i++)
796 if (negateop
797 && ops_equal_values_p (oe->op, negateop))
799 if (dump_file && (dump_flags & TDF_DETAILS))
801 fprintf (dump_file, "Equivalence: ");
802 print_generic_expr (dump_file, negateop, 0);
803 fprintf (dump_file, " + -");
804 print_generic_expr (dump_file, oe->op, 0);
805 fprintf (dump_file, " -> 0\n");
808 ops->ordered_remove (i);
809 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
810 ops->ordered_remove (currindex);
811 reassociate_stats.ops_eliminated ++;
813 return true;
815 else if (notop
816 && ops_equal_values_p (oe->op, notop))
818 tree op_type = TREE_TYPE (oe->op);
820 if (dump_file && (dump_flags & TDF_DETAILS))
822 fprintf (dump_file, "Equivalence: ");
823 print_generic_expr (dump_file, notop, 0);
824 fprintf (dump_file, " + ~");
825 print_generic_expr (dump_file, oe->op, 0);
826 fprintf (dump_file, " -> -1\n");
829 ops->ordered_remove (i);
830 add_to_ops_vec (ops, build_all_ones_cst (op_type));
831 ops->ordered_remove (currindex);
832 reassociate_stats.ops_eliminated ++;
834 return true;
838 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
839 save it for later inspection in repropagate_negates(). */
840 if (negateop != NULL_TREE
841 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
842 plus_negates.safe_push (curr->op);
844 return false;
847 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
848 bitwise not expression, look in OPS for a corresponding operand to
849 cancel it out. If we find one, remove the other from OPS, replace
850 OPS[CURRINDEX] with 0, and return true. Otherwise, return
851 false. */
853 static bool
854 eliminate_not_pairs (enum tree_code opcode,
855 vec<operand_entry *> *ops,
856 unsigned int currindex,
857 operand_entry *curr)
859 tree notop;
860 unsigned int i;
861 operand_entry *oe;
863 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
864 || TREE_CODE (curr->op) != SSA_NAME)
865 return false;
867 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
868 if (notop == NULL_TREE)
869 return false;
871 /* Any non-not version will have a rank that is one less than
872 the current rank. So once we hit those ranks, if we don't find
873 one, we can stop. */
875 for (i = currindex + 1;
876 ops->iterate (i, &oe)
877 && oe->rank >= curr->rank - 1;
878 i++)
880 if (oe->op == notop)
882 if (dump_file && (dump_flags & TDF_DETAILS))
884 fprintf (dump_file, "Equivalence: ");
885 print_generic_expr (dump_file, notop, 0);
886 if (opcode == BIT_AND_EXPR)
887 fprintf (dump_file, " & ~");
888 else if (opcode == BIT_IOR_EXPR)
889 fprintf (dump_file, " | ~");
890 print_generic_expr (dump_file, oe->op, 0);
891 if (opcode == BIT_AND_EXPR)
892 fprintf (dump_file, " -> 0\n");
893 else if (opcode == BIT_IOR_EXPR)
894 fprintf (dump_file, " -> -1\n");
897 if (opcode == BIT_AND_EXPR)
898 oe->op = build_zero_cst (TREE_TYPE (oe->op));
899 else if (opcode == BIT_IOR_EXPR)
900 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
902 reassociate_stats.ops_eliminated += ops->length () - 1;
903 ops->truncate (0);
904 ops->quick_push (oe);
905 return true;
909 return false;
912 /* Use constant value that may be present in OPS to try to eliminate
913 operands. Note that this function is only really used when we've
914 eliminated ops for other reasons, or merged constants. Across
915 single statements, fold already does all of this, plus more. There
916 is little point in duplicating logic, so I've only included the
917 identities that I could ever construct testcases to trigger. */
919 static void
920 eliminate_using_constants (enum tree_code opcode,
921 vec<operand_entry *> *ops)
923 operand_entry *oelast = ops->last ();
924 tree type = TREE_TYPE (oelast->op);
926 if (oelast->rank == 0
927 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
929 switch (opcode)
931 case BIT_AND_EXPR:
932 if (integer_zerop (oelast->op))
934 if (ops->length () != 1)
936 if (dump_file && (dump_flags & TDF_DETAILS))
937 fprintf (dump_file, "Found & 0, removing all other ops\n");
939 reassociate_stats.ops_eliminated += ops->length () - 1;
941 ops->truncate (0);
942 ops->quick_push (oelast);
943 return;
946 else if (integer_all_onesp (oelast->op))
948 if (ops->length () != 1)
950 if (dump_file && (dump_flags & TDF_DETAILS))
951 fprintf (dump_file, "Found & -1, removing\n");
952 ops->pop ();
953 reassociate_stats.ops_eliminated++;
956 break;
957 case BIT_IOR_EXPR:
958 if (integer_all_onesp (oelast->op))
960 if (ops->length () != 1)
962 if (dump_file && (dump_flags & TDF_DETAILS))
963 fprintf (dump_file, "Found | -1, removing all other ops\n");
965 reassociate_stats.ops_eliminated += ops->length () - 1;
967 ops->truncate (0);
968 ops->quick_push (oelast);
969 return;
972 else if (integer_zerop (oelast->op))
974 if (ops->length () != 1)
976 if (dump_file && (dump_flags & TDF_DETAILS))
977 fprintf (dump_file, "Found | 0, removing\n");
978 ops->pop ();
979 reassociate_stats.ops_eliminated++;
982 break;
983 case MULT_EXPR:
984 if (integer_zerop (oelast->op)
985 || (FLOAT_TYPE_P (type)
986 && !HONOR_NANS (type)
987 && !HONOR_SIGNED_ZEROS (type)
988 && real_zerop (oelast->op)))
990 if (ops->length () != 1)
992 if (dump_file && (dump_flags & TDF_DETAILS))
993 fprintf (dump_file, "Found * 0, removing all other ops\n");
995 reassociate_stats.ops_eliminated += ops->length () - 1;
996 ops->truncate (1);
997 ops->quick_push (oelast);
998 return;
1001 else if (integer_onep (oelast->op)
1002 || (FLOAT_TYPE_P (type)
1003 && !HONOR_SNANS (type)
1004 && real_onep (oelast->op)))
1006 if (ops->length () != 1)
1008 if (dump_file && (dump_flags & TDF_DETAILS))
1009 fprintf (dump_file, "Found * 1, removing\n");
1010 ops->pop ();
1011 reassociate_stats.ops_eliminated++;
1012 return;
1015 break;
1016 case BIT_XOR_EXPR:
1017 case PLUS_EXPR:
1018 case MINUS_EXPR:
1019 if (integer_zerop (oelast->op)
1020 || (FLOAT_TYPE_P (type)
1021 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1022 && fold_real_zero_addition_p (type, oelast->op,
1023 opcode == MINUS_EXPR)))
1025 if (ops->length () != 1)
1027 if (dump_file && (dump_flags & TDF_DETAILS))
1028 fprintf (dump_file, "Found [|^+] 0, removing\n");
1029 ops->pop ();
1030 reassociate_stats.ops_eliminated++;
1031 return;
1034 break;
1035 default:
1036 break;
1042 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1043 bool, bool);
1045 /* Structure for tracking and counting operands. */
1046 struct oecount {
1047 int cnt;
1048 int id;
1049 enum tree_code oecode;
1050 tree op;
1054 /* The heap for the oecount hashtable and the sorted list of operands. */
1055 static vec<oecount> cvec;
1058 /* Oecount hashtable helpers. */
1060 struct oecount_hasher : int_hash <int, 0, 1>
1062 static inline hashval_t hash (int);
1063 static inline bool equal (int, int);
1066 /* Hash function for oecount. */
1068 inline hashval_t
1069 oecount_hasher::hash (int p)
1071 const oecount *c = &cvec[p - 42];
1072 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1075 /* Comparison function for oecount. */
1077 inline bool
1078 oecount_hasher::equal (int p1, int p2)
1080 const oecount *c1 = &cvec[p1 - 42];
1081 const oecount *c2 = &cvec[p2 - 42];
1082 return (c1->oecode == c2->oecode
1083 && c1->op == c2->op);
1086 /* Comparison function for qsort sorting oecount elements by count. */
1088 static int
1089 oecount_cmp (const void *p1, const void *p2)
1091 const oecount *c1 = (const oecount *)p1;
1092 const oecount *c2 = (const oecount *)p2;
1093 if (c1->cnt != c2->cnt)
1094 return c1->cnt - c2->cnt;
1095 else
1096 /* If counts are identical, use unique IDs to stabilize qsort. */
1097 return c1->id - c2->id;
1100 /* Return TRUE iff STMT represents a builtin call that raises OP
1101 to some exponent. */
1103 static bool
1104 stmt_is_power_of_op (gimple *stmt, tree op)
1106 if (!is_gimple_call (stmt))
1107 return false;
1109 switch (gimple_call_combined_fn (stmt))
1111 CASE_CFN_POW:
1112 CASE_CFN_POWI:
1113 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1115 default:
1116 return false;
1120 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1121 in place and return the result. Assumes that stmt_is_power_of_op
1122 was previously called for STMT and returned TRUE. */
1124 static HOST_WIDE_INT
1125 decrement_power (gimple *stmt)
1127 REAL_VALUE_TYPE c, cint;
1128 HOST_WIDE_INT power;
1129 tree arg1;
1131 switch (gimple_call_combined_fn (stmt))
1133 CASE_CFN_POW:
1134 arg1 = gimple_call_arg (stmt, 1);
1135 c = TREE_REAL_CST (arg1);
1136 power = real_to_integer (&c) - 1;
1137 real_from_integer (&cint, VOIDmode, power, SIGNED);
1138 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1139 return power;
1141 CASE_CFN_POWI:
1142 arg1 = gimple_call_arg (stmt, 1);
1143 power = TREE_INT_CST_LOW (arg1) - 1;
1144 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1145 return power;
1147 default:
1148 gcc_unreachable ();
1152 /* Replace SSA defined by STMT and replace all its uses with new
1153 SSA. Also return the new SSA. */
1155 static tree
1156 make_new_ssa_for_def (gimple *stmt)
1158 gimple *use_stmt;
1159 use_operand_p use;
1160 imm_use_iterator iter;
1161 tree new_lhs;
1162 tree lhs = gimple_get_lhs (stmt);
1164 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1165 gimple_set_lhs (stmt, new_lhs);
1167 /* Also need to update GIMPLE_DEBUGs. */
1168 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1170 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1171 SET_USE (use, new_lhs);
1172 update_stmt (use_stmt);
1174 return new_lhs;
1177 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1178 uses with new SSAs. Also do this for the stmt that defines DEF
1179 if *DEF is not OP. */
1181 static void
1182 make_new_ssa_for_all_defs (tree *def, tree op,
1183 vec<gimple *> &stmts_to_fix)
1185 unsigned i;
1186 gimple *stmt;
1188 if (*def != op
1189 && TREE_CODE (*def) == SSA_NAME
1190 && (stmt = SSA_NAME_DEF_STMT (*def))
1191 && gimple_code (stmt) != GIMPLE_NOP)
1192 *def = make_new_ssa_for_def (stmt);
1194 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1195 make_new_ssa_for_def (stmt);
1198 /* Find the single immediate use of STMT's LHS, and replace it
1199 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1200 replace *DEF with OP as well. */
1202 static void
1203 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1205 tree lhs;
1206 gimple *use_stmt;
1207 use_operand_p use;
1208 gimple_stmt_iterator gsi;
1210 if (is_gimple_call (stmt))
1211 lhs = gimple_call_lhs (stmt);
1212 else
1213 lhs = gimple_assign_lhs (stmt);
1215 gcc_assert (has_single_use (lhs));
1216 single_imm_use (lhs, &use, &use_stmt);
1217 if (lhs == *def)
1218 *def = op;
1219 SET_USE (use, op);
1220 if (TREE_CODE (op) != SSA_NAME)
1221 update_stmt (use_stmt);
1222 gsi = gsi_for_stmt (stmt);
1223 unlink_stmt_vdef (stmt);
1224 reassoc_remove_stmt (&gsi);
1225 release_defs (stmt);
1228 /* Walks the linear chain with result *DEF searching for an operation
1229 with operand OP and code OPCODE removing that from the chain. *DEF
1230 is updated if there is only one operand but no operation left. */
1232 static void
1233 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1235 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1236 /* PR72835 - Record the stmt chain that has to be updated such that
1237 we dont use the same LHS when the values computed are different. */
1238 auto_vec<gimple *, 64> stmts_to_fix;
1242 tree name;
1244 if (opcode == MULT_EXPR)
1246 if (stmt_is_power_of_op (stmt, op))
1248 if (decrement_power (stmt) == 1)
1250 if (stmts_to_fix.length () > 0)
1251 stmts_to_fix.pop ();
1252 propagate_op_to_single_use (op, stmt, def);
1254 break;
1256 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1258 if (gimple_assign_rhs1 (stmt) == op)
1260 tree cst = build_minus_one_cst (TREE_TYPE (op));
1261 if (stmts_to_fix.length () > 0)
1262 stmts_to_fix.pop ();
1263 propagate_op_to_single_use (cst, stmt, def);
1264 break;
1266 else if (integer_minus_onep (op)
1267 || real_minus_onep (op))
1269 gimple_assign_set_rhs_code
1270 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1271 break;
1276 name = gimple_assign_rhs1 (stmt);
1278 /* If this is the operation we look for and one of the operands
1279 is ours simply propagate the other operand into the stmts
1280 single use. */
1281 if (gimple_assign_rhs_code (stmt) == opcode
1282 && (name == op
1283 || gimple_assign_rhs2 (stmt) == op))
1285 if (name == op)
1286 name = gimple_assign_rhs2 (stmt);
1287 if (stmts_to_fix.length () > 0)
1288 stmts_to_fix.pop ();
1289 propagate_op_to_single_use (name, stmt, def);
1290 break;
1293 /* We might have a multiply of two __builtin_pow* calls, and
1294 the operand might be hiding in the rightmost one. Likewise
1295 this can happen for a negate. */
1296 if (opcode == MULT_EXPR
1297 && gimple_assign_rhs_code (stmt) == opcode
1298 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1299 && has_single_use (gimple_assign_rhs2 (stmt)))
1301 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1302 if (stmt_is_power_of_op (stmt2, op))
1304 if (decrement_power (stmt2) == 1)
1305 propagate_op_to_single_use (op, stmt2, def);
1306 else
1307 stmts_to_fix.safe_push (stmt2);
1308 break;
1310 else if (is_gimple_assign (stmt2)
1311 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1313 if (gimple_assign_rhs1 (stmt2) == op)
1315 tree cst = build_minus_one_cst (TREE_TYPE (op));
1316 propagate_op_to_single_use (cst, stmt2, def);
1317 break;
1319 else if (integer_minus_onep (op)
1320 || real_minus_onep (op))
1322 stmts_to_fix.safe_push (stmt2);
1323 gimple_assign_set_rhs_code
1324 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1325 break;
1330 /* Continue walking the chain. */
1331 gcc_assert (name != op
1332 && TREE_CODE (name) == SSA_NAME);
1333 stmt = SSA_NAME_DEF_STMT (name);
1334 stmts_to_fix.safe_push (stmt);
1336 while (1);
1338 if (stmts_to_fix.length () > 0)
1339 make_new_ssa_for_all_defs (def, op, stmts_to_fix);
1342 /* Returns true if statement S1 dominates statement S2. Like
1343 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1345 static bool
1346 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1348 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1350 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1351 SSA_NAME. Assume it lives at the beginning of function and
1352 thus dominates everything. */
1353 if (!bb1 || s1 == s2)
1354 return true;
1356 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1357 if (!bb2)
1358 return false;
1360 if (bb1 == bb2)
1362 /* PHIs in the same basic block are assumed to be
1363 executed all in parallel, if only one stmt is a PHI,
1364 it dominates the other stmt in the same basic block. */
1365 if (gimple_code (s1) == GIMPLE_PHI)
1366 return true;
1368 if (gimple_code (s2) == GIMPLE_PHI)
1369 return false;
1371 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1373 if (gimple_uid (s1) < gimple_uid (s2))
1374 return true;
1376 if (gimple_uid (s1) > gimple_uid (s2))
1377 return false;
1379 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1380 unsigned int uid = gimple_uid (s1);
1381 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1383 gimple *s = gsi_stmt (gsi);
1384 if (gimple_uid (s) != uid)
1385 break;
1386 if (s == s2)
1387 return true;
1390 return false;
1393 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1396 /* Insert STMT after INSERT_POINT. */
1398 static void
1399 insert_stmt_after (gimple *stmt, gimple *insert_point)
1401 gimple_stmt_iterator gsi;
1402 basic_block bb;
1404 if (gimple_code (insert_point) == GIMPLE_PHI)
1405 bb = gimple_bb (insert_point);
1406 else if (!stmt_ends_bb_p (insert_point))
1408 gsi = gsi_for_stmt (insert_point);
1409 gimple_set_uid (stmt, gimple_uid (insert_point));
1410 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1411 return;
1413 else
1414 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1415 thus if it must end a basic block, it should be a call that can
1416 throw, or some assignment that can throw. If it throws, the LHS
1417 of it will not be initialized though, so only valid places using
1418 the SSA_NAME should be dominated by the fallthru edge. */
1419 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1420 gsi = gsi_after_labels (bb);
1421 if (gsi_end_p (gsi))
1423 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1424 gimple_set_uid (stmt,
1425 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1427 else
1428 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1429 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1432 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1433 the result. Places the statement after the definition of either
1434 OP1 or OP2. Returns the new statement. */
1436 static gimple *
1437 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1439 gimple *op1def = NULL, *op2def = NULL;
1440 gimple_stmt_iterator gsi;
1441 tree op;
1442 gassign *sum;
1444 /* Create the addition statement. */
1445 op = make_ssa_name (type);
1446 sum = gimple_build_assign (op, opcode, op1, op2);
1448 /* Find an insertion place and insert. */
1449 if (TREE_CODE (op1) == SSA_NAME)
1450 op1def = SSA_NAME_DEF_STMT (op1);
1451 if (TREE_CODE (op2) == SSA_NAME)
1452 op2def = SSA_NAME_DEF_STMT (op2);
1453 if ((!op1def || gimple_nop_p (op1def))
1454 && (!op2def || gimple_nop_p (op2def)))
1456 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1457 if (gsi_end_p (gsi))
1459 gimple_stmt_iterator gsi2
1460 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1461 gimple_set_uid (sum,
1462 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1464 else
1465 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1466 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1468 else
1470 gimple *insert_point;
1471 if ((!op1def || gimple_nop_p (op1def))
1472 || (op2def && !gimple_nop_p (op2def)
1473 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1474 insert_point = op2def;
1475 else
1476 insert_point = op1def;
1477 insert_stmt_after (sum, insert_point);
1479 update_stmt (sum);
1481 return sum;
1484 /* Perform un-distribution of divisions and multiplications.
1485 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1486 to (A + B) / X for real X.
1488 The algorithm is organized as follows.
1490 - First we walk the addition chain *OPS looking for summands that
1491 are defined by a multiplication or a real division. This results
1492 in the candidates bitmap with relevant indices into *OPS.
1494 - Second we build the chains of multiplications or divisions for
1495 these candidates, counting the number of occurrences of (operand, code)
1496 pairs in all of the candidates chains.
1498 - Third we sort the (operand, code) pairs by number of occurrence and
1499 process them starting with the pair with the most uses.
1501 * For each such pair we walk the candidates again to build a
1502 second candidate bitmap noting all multiplication/division chains
1503 that have at least one occurrence of (operand, code).
1505 * We build an alternate addition chain only covering these
1506 candidates with one (operand, code) operation removed from their
1507 multiplication/division chain.
1509 * The first candidate gets replaced by the alternate addition chain
1510 multiplied/divided by the operand.
1512 * All candidate chains get disabled for further processing and
1513 processing of (operand, code) pairs continues.
1515 The alternate addition chains built are re-processed by the main
1516 reassociation algorithm which allows optimizing a * x * y + b * y * x
1517 to (a + b ) * x * y in one invocation of the reassociation pass. */
1519 static bool
1520 undistribute_ops_list (enum tree_code opcode,
1521 vec<operand_entry *> *ops, struct loop *loop)
1523 unsigned int length = ops->length ();
1524 operand_entry *oe1;
1525 unsigned i, j;
1526 unsigned nr_candidates, nr_candidates2;
1527 sbitmap_iterator sbi0;
1528 vec<operand_entry *> *subops;
1529 bool changed = false;
1530 int next_oecount_id = 0;
1532 if (length <= 1
1533 || opcode != PLUS_EXPR)
1534 return false;
1536 /* Build a list of candidates to process. */
1537 auto_sbitmap candidates (length);
1538 bitmap_clear (candidates);
1539 nr_candidates = 0;
1540 FOR_EACH_VEC_ELT (*ops, i, oe1)
1542 enum tree_code dcode;
1543 gimple *oe1def;
1545 if (TREE_CODE (oe1->op) != SSA_NAME)
1546 continue;
1547 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1548 if (!is_gimple_assign (oe1def))
1549 continue;
1550 dcode = gimple_assign_rhs_code (oe1def);
1551 if ((dcode != MULT_EXPR
1552 && dcode != RDIV_EXPR)
1553 || !is_reassociable_op (oe1def, dcode, loop))
1554 continue;
1556 bitmap_set_bit (candidates, i);
1557 nr_candidates++;
1560 if (nr_candidates < 2)
1561 return false;
1563 if (dump_file && (dump_flags & TDF_DETAILS))
1565 fprintf (dump_file, "searching for un-distribute opportunities ");
1566 print_generic_expr (dump_file,
1567 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1568 fprintf (dump_file, " %d\n", nr_candidates);
1571 /* Build linearized sub-operand lists and the counting table. */
1572 cvec.create (0);
1574 hash_table<oecount_hasher> ctable (15);
1576 /* ??? Macro arguments cannot have multi-argument template types in
1577 them. This typedef is needed to workaround that limitation. */
1578 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1579 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1580 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1582 gimple *oedef;
1583 enum tree_code oecode;
1584 unsigned j;
1586 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1587 oecode = gimple_assign_rhs_code (oedef);
1588 linearize_expr_tree (&subops[i], oedef,
1589 associative_tree_code (oecode), false);
1591 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1593 oecount c;
1594 int *slot;
1595 int idx;
1596 c.oecode = oecode;
1597 c.cnt = 1;
1598 c.id = next_oecount_id++;
1599 c.op = oe1->op;
1600 cvec.safe_push (c);
1601 idx = cvec.length () + 41;
1602 slot = ctable.find_slot (idx, INSERT);
1603 if (!*slot)
1605 *slot = idx;
1607 else
1609 cvec.pop ();
1610 cvec[*slot - 42].cnt++;
1615 /* Sort the counting table. */
1616 cvec.qsort (oecount_cmp);
1618 if (dump_file && (dump_flags & TDF_DETAILS))
1620 oecount *c;
1621 fprintf (dump_file, "Candidates:\n");
1622 FOR_EACH_VEC_ELT (cvec, j, c)
1624 fprintf (dump_file, " %u %s: ", c->cnt,
1625 c->oecode == MULT_EXPR
1626 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1627 print_generic_expr (dump_file, c->op, 0);
1628 fprintf (dump_file, "\n");
1632 /* Process the (operand, code) pairs in order of most occurrence. */
1633 auto_sbitmap candidates2 (length);
1634 while (!cvec.is_empty ())
1636 oecount *c = &cvec.last ();
1637 if (c->cnt < 2)
1638 break;
1640 /* Now collect the operands in the outer chain that contain
1641 the common operand in their inner chain. */
1642 bitmap_clear (candidates2);
1643 nr_candidates2 = 0;
1644 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1646 gimple *oedef;
1647 enum tree_code oecode;
1648 unsigned j;
1649 tree op = (*ops)[i]->op;
1651 /* If we undistributed in this chain already this may be
1652 a constant. */
1653 if (TREE_CODE (op) != SSA_NAME)
1654 continue;
1656 oedef = SSA_NAME_DEF_STMT (op);
1657 oecode = gimple_assign_rhs_code (oedef);
1658 if (oecode != c->oecode)
1659 continue;
1661 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1663 if (oe1->op == c->op)
1665 bitmap_set_bit (candidates2, i);
1666 ++nr_candidates2;
1667 break;
1672 if (nr_candidates2 >= 2)
1674 operand_entry *oe1, *oe2;
1675 gimple *prod;
1676 int first = bitmap_first_set_bit (candidates2);
1678 /* Build the new addition chain. */
1679 oe1 = (*ops)[first];
1680 if (dump_file && (dump_flags & TDF_DETAILS))
1682 fprintf (dump_file, "Building (");
1683 print_generic_expr (dump_file, oe1->op, 0);
1685 zero_one_operation (&oe1->op, c->oecode, c->op);
1686 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1688 gimple *sum;
1689 oe2 = (*ops)[i];
1690 if (dump_file && (dump_flags & TDF_DETAILS))
1692 fprintf (dump_file, " + ");
1693 print_generic_expr (dump_file, oe2->op, 0);
1695 zero_one_operation (&oe2->op, c->oecode, c->op);
1696 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1697 oe1->op, oe2->op, opcode);
1698 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1699 oe2->rank = 0;
1700 oe1->op = gimple_get_lhs (sum);
1703 /* Apply the multiplication/division. */
1704 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1705 oe1->op, c->op, c->oecode);
1706 if (dump_file && (dump_flags & TDF_DETAILS))
1708 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1709 print_generic_expr (dump_file, c->op, 0);
1710 fprintf (dump_file, "\n");
1713 /* Record it in the addition chain and disable further
1714 undistribution with this op. */
1715 oe1->op = gimple_assign_lhs (prod);
1716 oe1->rank = get_rank (oe1->op);
1717 subops[first].release ();
1719 changed = true;
1722 cvec.pop ();
1725 for (i = 0; i < ops->length (); ++i)
1726 subops[i].release ();
1727 free (subops);
1728 cvec.release ();
1730 return changed;
1733 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1734 expression, examine the other OPS to see if any of them are comparisons
1735 of the same values, which we may be able to combine or eliminate.
1736 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1738 static bool
1739 eliminate_redundant_comparison (enum tree_code opcode,
1740 vec<operand_entry *> *ops,
1741 unsigned int currindex,
1742 operand_entry *curr)
1744 tree op1, op2;
1745 enum tree_code lcode, rcode;
1746 gimple *def1, *def2;
1747 int i;
1748 operand_entry *oe;
1750 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1751 return false;
1753 /* Check that CURR is a comparison. */
1754 if (TREE_CODE (curr->op) != SSA_NAME)
1755 return false;
1756 def1 = SSA_NAME_DEF_STMT (curr->op);
1757 if (!is_gimple_assign (def1))
1758 return false;
1759 lcode = gimple_assign_rhs_code (def1);
1760 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1761 return false;
1762 op1 = gimple_assign_rhs1 (def1);
1763 op2 = gimple_assign_rhs2 (def1);
1765 /* Now look for a similar comparison in the remaining OPS. */
1766 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1768 tree t;
1770 if (TREE_CODE (oe->op) != SSA_NAME)
1771 continue;
1772 def2 = SSA_NAME_DEF_STMT (oe->op);
1773 if (!is_gimple_assign (def2))
1774 continue;
1775 rcode = gimple_assign_rhs_code (def2);
1776 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1777 continue;
1779 /* If we got here, we have a match. See if we can combine the
1780 two comparisons. */
1781 if (opcode == BIT_IOR_EXPR)
1782 t = maybe_fold_or_comparisons (lcode, op1, op2,
1783 rcode, gimple_assign_rhs1 (def2),
1784 gimple_assign_rhs2 (def2));
1785 else
1786 t = maybe_fold_and_comparisons (lcode, op1, op2,
1787 rcode, gimple_assign_rhs1 (def2),
1788 gimple_assign_rhs2 (def2));
1789 if (!t)
1790 continue;
1792 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1793 always give us a boolean_type_node value back. If the original
1794 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1795 we need to convert. */
1796 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1797 t = fold_convert (TREE_TYPE (curr->op), t);
1799 if (TREE_CODE (t) != INTEGER_CST
1800 && !operand_equal_p (t, curr->op, 0))
1802 enum tree_code subcode;
1803 tree newop1, newop2;
1804 if (!COMPARISON_CLASS_P (t))
1805 continue;
1806 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1807 STRIP_USELESS_TYPE_CONVERSION (newop1);
1808 STRIP_USELESS_TYPE_CONVERSION (newop2);
1809 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1810 continue;
1813 if (dump_file && (dump_flags & TDF_DETAILS))
1815 fprintf (dump_file, "Equivalence: ");
1816 print_generic_expr (dump_file, curr->op, 0);
1817 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1818 print_generic_expr (dump_file, oe->op, 0);
1819 fprintf (dump_file, " -> ");
1820 print_generic_expr (dump_file, t, 0);
1821 fprintf (dump_file, "\n");
1824 /* Now we can delete oe, as it has been subsumed by the new combined
1825 expression t. */
1826 ops->ordered_remove (i);
1827 reassociate_stats.ops_eliminated ++;
1829 /* If t is the same as curr->op, we're done. Otherwise we must
1830 replace curr->op with t. Special case is if we got a constant
1831 back, in which case we add it to the end instead of in place of
1832 the current entry. */
1833 if (TREE_CODE (t) == INTEGER_CST)
1835 ops->ordered_remove (currindex);
1836 add_to_ops_vec (ops, t);
1838 else if (!operand_equal_p (t, curr->op, 0))
1840 gimple *sum;
1841 enum tree_code subcode;
1842 tree newop1;
1843 tree newop2;
1844 gcc_assert (COMPARISON_CLASS_P (t));
1845 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1846 STRIP_USELESS_TYPE_CONVERSION (newop1);
1847 STRIP_USELESS_TYPE_CONVERSION (newop2);
1848 gcc_checking_assert (is_gimple_val (newop1)
1849 && is_gimple_val (newop2));
1850 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1851 curr->op = gimple_get_lhs (sum);
1853 return true;
1856 return false;
1860 /* Transform repeated addition of same values into multiply with
1861 constant. */
1862 static bool
1863 transform_add_to_multiply (vec<operand_entry *> *ops)
1865 operand_entry *oe;
1866 tree op = NULL_TREE;
1867 int j;
1868 int i, start = -1, end = 0, count = 0;
1869 auto_vec<std::pair <int, int> > indxs;
1870 bool changed = false;
1872 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1873 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1874 || !flag_unsafe_math_optimizations))
1875 return false;
1877 /* Look for repeated operands. */
1878 FOR_EACH_VEC_ELT (*ops, i, oe)
1880 if (start == -1)
1882 count = 1;
1883 op = oe->op;
1884 start = i;
1886 else if (operand_equal_p (oe->op, op, 0))
1888 count++;
1889 end = i;
1891 else
1893 if (count > 1)
1894 indxs.safe_push (std::make_pair (start, end));
1895 count = 1;
1896 op = oe->op;
1897 start = i;
1901 if (count > 1)
1902 indxs.safe_push (std::make_pair (start, end));
1904 for (j = indxs.length () - 1; j >= 0; --j)
1906 /* Convert repeated operand addition to multiplication. */
1907 start = indxs[j].first;
1908 end = indxs[j].second;
1909 op = (*ops)[start]->op;
1910 count = end - start + 1;
1911 for (i = end; i >= start; --i)
1912 ops->unordered_remove (i);
1913 tree tmp = make_ssa_name (TREE_TYPE (op));
1914 tree cst = build_int_cst (integer_type_node, count);
1915 gassign *mul_stmt
1916 = gimple_build_assign (tmp, MULT_EXPR,
1917 op, fold_convert (TREE_TYPE (op), cst));
1918 gimple_set_visited (mul_stmt, true);
1919 add_to_ops_vec (ops, tmp, mul_stmt);
1920 changed = true;
1923 return changed;
1927 /* Perform various identities and other optimizations on the list of
1928 operand entries, stored in OPS. The tree code for the binary
1929 operation between all the operands is OPCODE. */
1931 static void
1932 optimize_ops_list (enum tree_code opcode,
1933 vec<operand_entry *> *ops)
1935 unsigned int length = ops->length ();
1936 unsigned int i;
1937 operand_entry *oe;
1938 operand_entry *oelast = NULL;
1939 bool iterate = false;
1941 if (length == 1)
1942 return;
1944 oelast = ops->last ();
1946 /* If the last two are constants, pop the constants off, merge them
1947 and try the next two. */
1948 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1950 operand_entry *oelm1 = (*ops)[length - 2];
1952 if (oelm1->rank == 0
1953 && is_gimple_min_invariant (oelm1->op)
1954 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1955 TREE_TYPE (oelast->op)))
1957 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1958 oelm1->op, oelast->op);
1960 if (folded && is_gimple_min_invariant (folded))
1962 if (dump_file && (dump_flags & TDF_DETAILS))
1963 fprintf (dump_file, "Merging constants\n");
1965 ops->pop ();
1966 ops->pop ();
1968 add_to_ops_vec (ops, folded);
1969 reassociate_stats.constants_eliminated++;
1971 optimize_ops_list (opcode, ops);
1972 return;
1977 eliminate_using_constants (opcode, ops);
1978 oelast = NULL;
1980 for (i = 0; ops->iterate (i, &oe);)
1982 bool done = false;
1984 if (eliminate_not_pairs (opcode, ops, i, oe))
1985 return;
1986 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1987 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1988 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1990 if (done)
1991 return;
1992 iterate = true;
1993 oelast = NULL;
1994 continue;
1996 oelast = oe;
1997 i++;
2000 length = ops->length ();
2001 oelast = ops->last ();
2003 if (iterate)
2004 optimize_ops_list (opcode, ops);
2007 /* The following functions are subroutines to optimize_range_tests and allow
2008 it to try to change a logical combination of comparisons into a range
2009 test.
2011 For example, both
2012 X == 2 || X == 5 || X == 3 || X == 4
2014 X >= 2 && X <= 5
2015 are converted to
2016 (unsigned) (X - 2) <= 3
2018 For more information see comments above fold_test_range in fold-const.c,
2019 this implementation is for GIMPLE. */
2021 struct range_entry
2023 tree exp;
2024 tree low;
2025 tree high;
2026 bool in_p;
2027 bool strict_overflow_p;
2028 unsigned int idx, next;
2031 /* This is similar to make_range in fold-const.c, but on top of
2032 GIMPLE instead of trees. If EXP is non-NULL, it should be
2033 an SSA_NAME and STMT argument is ignored, otherwise STMT
2034 argument should be a GIMPLE_COND. */
2036 static void
2037 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2039 int in_p;
2040 tree low, high;
2041 bool is_bool, strict_overflow_p;
2043 r->exp = NULL_TREE;
2044 r->in_p = false;
2045 r->strict_overflow_p = false;
2046 r->low = NULL_TREE;
2047 r->high = NULL_TREE;
2048 if (exp != NULL_TREE
2049 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2050 return;
2052 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2053 and see if we can refine the range. Some of the cases below may not
2054 happen, but it doesn't seem worth worrying about this. We "continue"
2055 the outer loop when we've changed something; otherwise we "break"
2056 the switch, which will "break" the while. */
2057 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2058 high = low;
2059 in_p = 0;
2060 strict_overflow_p = false;
2061 is_bool = false;
2062 if (exp == NULL_TREE)
2063 is_bool = true;
2064 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2066 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2067 is_bool = true;
2068 else
2069 return;
2071 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2072 is_bool = true;
2074 while (1)
2076 enum tree_code code;
2077 tree arg0, arg1, exp_type;
2078 tree nexp;
2079 location_t loc;
2081 if (exp != NULL_TREE)
2083 if (TREE_CODE (exp) != SSA_NAME
2084 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2085 break;
2087 stmt = SSA_NAME_DEF_STMT (exp);
2088 if (!is_gimple_assign (stmt))
2089 break;
2091 code = gimple_assign_rhs_code (stmt);
2092 arg0 = gimple_assign_rhs1 (stmt);
2093 arg1 = gimple_assign_rhs2 (stmt);
2094 exp_type = TREE_TYPE (exp);
2096 else
2098 code = gimple_cond_code (stmt);
2099 arg0 = gimple_cond_lhs (stmt);
2100 arg1 = gimple_cond_rhs (stmt);
2101 exp_type = boolean_type_node;
2104 if (TREE_CODE (arg0) != SSA_NAME)
2105 break;
2106 loc = gimple_location (stmt);
2107 switch (code)
2109 case BIT_NOT_EXPR:
2110 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2111 /* Ensure the range is either +[-,0], +[0,0],
2112 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2113 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2114 or similar expression of unconditional true or
2115 false, it should not be negated. */
2116 && ((high && integer_zerop (high))
2117 || (low && integer_onep (low))))
2119 in_p = !in_p;
2120 exp = arg0;
2121 continue;
2123 break;
2124 case SSA_NAME:
2125 exp = arg0;
2126 continue;
2127 CASE_CONVERT:
2128 if (is_bool)
2129 goto do_default;
2130 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2132 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2133 is_bool = true;
2134 else
2135 return;
2137 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2138 is_bool = true;
2139 goto do_default;
2140 case EQ_EXPR:
2141 case NE_EXPR:
2142 case LT_EXPR:
2143 case LE_EXPR:
2144 case GE_EXPR:
2145 case GT_EXPR:
2146 is_bool = true;
2147 /* FALLTHRU */
2148 default:
2149 if (!is_bool)
2150 return;
2151 do_default:
2152 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2153 &low, &high, &in_p,
2154 &strict_overflow_p);
2155 if (nexp != NULL_TREE)
2157 exp = nexp;
2158 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2159 continue;
2161 break;
2163 break;
2165 if (is_bool)
2167 r->exp = exp;
2168 r->in_p = in_p;
2169 r->low = low;
2170 r->high = high;
2171 r->strict_overflow_p = strict_overflow_p;
2175 /* Comparison function for qsort. Sort entries
2176 without SSA_NAME exp first, then with SSA_NAMEs sorted
2177 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2178 by increasing ->low and if ->low is the same, by increasing
2179 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2180 maximum. */
2182 static int
2183 range_entry_cmp (const void *a, const void *b)
2185 const struct range_entry *p = (const struct range_entry *) a;
2186 const struct range_entry *q = (const struct range_entry *) b;
2188 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2190 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2192 /* Group range_entries for the same SSA_NAME together. */
2193 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2194 return -1;
2195 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2196 return 1;
2197 /* If ->low is different, NULL low goes first, then by
2198 ascending low. */
2199 if (p->low != NULL_TREE)
2201 if (q->low != NULL_TREE)
2203 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2204 p->low, q->low);
2205 if (tem && integer_onep (tem))
2206 return -1;
2207 tem = fold_binary (GT_EXPR, boolean_type_node,
2208 p->low, q->low);
2209 if (tem && integer_onep (tem))
2210 return 1;
2212 else
2213 return 1;
2215 else if (q->low != NULL_TREE)
2216 return -1;
2217 /* If ->high is different, NULL high goes last, before that by
2218 ascending high. */
2219 if (p->high != NULL_TREE)
2221 if (q->high != NULL_TREE)
2223 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2224 p->high, q->high);
2225 if (tem && integer_onep (tem))
2226 return -1;
2227 tem = fold_binary (GT_EXPR, boolean_type_node,
2228 p->high, q->high);
2229 if (tem && integer_onep (tem))
2230 return 1;
2232 else
2233 return -1;
2235 else if (q->high != NULL_TREE)
2236 return 1;
2237 /* If both ranges are the same, sort below by ascending idx. */
2239 else
2240 return 1;
2242 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2243 return -1;
2245 if (p->idx < q->idx)
2246 return -1;
2247 else
2249 gcc_checking_assert (p->idx > q->idx);
2250 return 1;
2254 /* Helper routine of optimize_range_test.
2255 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2256 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2257 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2258 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2259 an array of COUNT pointers to other ranges. Return
2260 true if the range merge has been successful.
2261 If OPCODE is ERROR_MARK, this is called from within
2262 maybe_optimize_range_tests and is performing inter-bb range optimization.
2263 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2264 oe->rank. */
2266 static bool
2267 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2268 struct range_entry **otherrangep,
2269 unsigned int count, enum tree_code opcode,
2270 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2271 bool in_p, tree low, tree high, bool strict_overflow_p)
2273 operand_entry *oe = (*ops)[range->idx];
2274 tree op = oe->op;
2275 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2276 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2277 location_t loc = gimple_location (stmt);
2278 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2279 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2280 in_p, low, high);
2281 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2282 gimple_stmt_iterator gsi;
2283 unsigned int i, uid;
2285 if (tem == NULL_TREE)
2286 return false;
2288 /* If op is default def SSA_NAME, there is no place to insert the
2289 new comparison. Give up, unless we can use OP itself as the
2290 range test. */
2291 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2293 if (op == range->exp
2294 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2295 || TREE_CODE (optype) == BOOLEAN_TYPE)
2296 && (op == tem
2297 || (TREE_CODE (tem) == EQ_EXPR
2298 && TREE_OPERAND (tem, 0) == op
2299 && integer_onep (TREE_OPERAND (tem, 1))))
2300 && opcode != BIT_IOR_EXPR
2301 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2303 stmt = NULL;
2304 tem = op;
2306 else
2307 return false;
2310 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2311 warning_at (loc, OPT_Wstrict_overflow,
2312 "assuming signed overflow does not occur "
2313 "when simplifying range test");
2315 if (dump_file && (dump_flags & TDF_DETAILS))
2317 struct range_entry *r;
2318 fprintf (dump_file, "Optimizing range tests ");
2319 print_generic_expr (dump_file, range->exp, 0);
2320 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2321 print_generic_expr (dump_file, range->low, 0);
2322 fprintf (dump_file, ", ");
2323 print_generic_expr (dump_file, range->high, 0);
2324 fprintf (dump_file, "]");
2325 for (i = 0; i < count; i++)
2327 if (otherrange)
2328 r = otherrange + i;
2329 else
2330 r = otherrangep[i];
2331 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2332 print_generic_expr (dump_file, r->low, 0);
2333 fprintf (dump_file, ", ");
2334 print_generic_expr (dump_file, r->high, 0);
2335 fprintf (dump_file, "]");
2337 fprintf (dump_file, "\n into ");
2338 print_generic_expr (dump_file, tem, 0);
2339 fprintf (dump_file, "\n");
2342 if (opcode == BIT_IOR_EXPR
2343 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2344 tem = invert_truthvalue_loc (loc, tem);
2346 tem = fold_convert_loc (loc, optype, tem);
2347 if (stmt)
2349 gsi = gsi_for_stmt (stmt);
2350 uid = gimple_uid (stmt);
2352 else
2354 gsi = gsi_none ();
2355 uid = 0;
2357 if (stmt == NULL)
2358 gcc_checking_assert (tem == op);
2359 /* In rare cases range->exp can be equal to lhs of stmt.
2360 In that case we have to insert after the stmt rather then before
2361 it. If stmt is a PHI, insert it at the start of the basic block. */
2362 else if (op != range->exp)
2364 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2365 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2366 GSI_SAME_STMT);
2367 gsi_prev (&gsi);
2369 else if (gimple_code (stmt) != GIMPLE_PHI)
2371 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2372 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2373 GSI_CONTINUE_LINKING);
2375 else
2377 gsi = gsi_after_labels (gimple_bb (stmt));
2378 if (!gsi_end_p (gsi))
2379 uid = gimple_uid (gsi_stmt (gsi));
2380 else
2382 gsi = gsi_start_bb (gimple_bb (stmt));
2383 uid = 1;
2384 while (!gsi_end_p (gsi))
2386 uid = gimple_uid (gsi_stmt (gsi));
2387 gsi_next (&gsi);
2390 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2391 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2392 GSI_SAME_STMT);
2393 if (gsi_end_p (gsi))
2394 gsi = gsi_last_bb (gimple_bb (stmt));
2395 else
2396 gsi_prev (&gsi);
2398 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2399 if (gimple_uid (gsi_stmt (gsi)))
2400 break;
2401 else
2402 gimple_set_uid (gsi_stmt (gsi), uid);
2404 oe->op = tem;
2405 range->exp = exp;
2406 range->low = low;
2407 range->high = high;
2408 range->in_p = in_p;
2409 range->strict_overflow_p = false;
2411 for (i = 0; i < count; i++)
2413 if (otherrange)
2414 range = otherrange + i;
2415 else
2416 range = otherrangep[i];
2417 oe = (*ops)[range->idx];
2418 /* Now change all the other range test immediate uses, so that
2419 those tests will be optimized away. */
2420 if (opcode == ERROR_MARK)
2422 if (oe->op)
2423 oe->op = build_int_cst (TREE_TYPE (oe->op),
2424 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2425 else
2426 oe->op = (oe->rank == BIT_IOR_EXPR
2427 ? boolean_false_node : boolean_true_node);
2429 else
2430 oe->op = error_mark_node;
2431 range->exp = NULL_TREE;
2432 range->low = NULL_TREE;
2433 range->high = NULL_TREE;
2435 return true;
2438 /* Optimize X == CST1 || X == CST2
2439 if popcount (CST1 ^ CST2) == 1 into
2440 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2441 Similarly for ranges. E.g.
2442 X != 2 && X != 3 && X != 10 && X != 11
2443 will be transformed by the previous optimization into
2444 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2445 and this loop can transform that into
2446 !(((X & ~8) - 2U) <= 1U). */
2448 static bool
2449 optimize_range_tests_xor (enum tree_code opcode, tree type,
2450 tree lowi, tree lowj, tree highi, tree highj,
2451 vec<operand_entry *> *ops,
2452 struct range_entry *rangei,
2453 struct range_entry *rangej)
2455 tree lowxor, highxor, tem, exp;
2456 /* Check lowi ^ lowj == highi ^ highj and
2457 popcount (lowi ^ lowj) == 1. */
2458 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2459 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2460 return false;
2461 if (!integer_pow2p (lowxor))
2462 return false;
2463 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2464 if (!tree_int_cst_equal (lowxor, highxor))
2465 return false;
2467 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2468 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2469 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2470 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2471 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2472 NULL, rangei->in_p, lowj, highj,
2473 rangei->strict_overflow_p
2474 || rangej->strict_overflow_p))
2475 return true;
2476 return false;
2479 /* Optimize X == CST1 || X == CST2
2480 if popcount (CST2 - CST1) == 1 into
2481 ((X - CST1) & ~(CST2 - CST1)) == 0.
2482 Similarly for ranges. E.g.
2483 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2484 || X == 75 || X == 45
2485 will be transformed by the previous optimization into
2486 (X - 43U) <= 3U || (X - 75U) <= 3U
2487 and this loop can transform that into
2488 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2489 static bool
2490 optimize_range_tests_diff (enum tree_code opcode, tree type,
2491 tree lowi, tree lowj, tree highi, tree highj,
2492 vec<operand_entry *> *ops,
2493 struct range_entry *rangei,
2494 struct range_entry *rangej)
2496 tree tem1, tem2, mask;
2497 /* Check highi - lowi == highj - lowj. */
2498 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2499 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2500 return false;
2501 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2502 if (!tree_int_cst_equal (tem1, tem2))
2503 return false;
2504 /* Check popcount (lowj - lowi) == 1. */
2505 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2506 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2507 return false;
2508 if (!integer_pow2p (tem1))
2509 return false;
2511 type = unsigned_type_for (type);
2512 tem1 = fold_convert (type, tem1);
2513 tem2 = fold_convert (type, tem2);
2514 lowi = fold_convert (type, lowi);
2515 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2516 tem1 = fold_binary (MINUS_EXPR, type,
2517 fold_convert (type, rangei->exp), lowi);
2518 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2519 lowj = build_int_cst (type, 0);
2520 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2521 NULL, rangei->in_p, lowj, tem2,
2522 rangei->strict_overflow_p
2523 || rangej->strict_overflow_p))
2524 return true;
2525 return false;
2528 /* It does some common checks for function optimize_range_tests_xor and
2529 optimize_range_tests_diff.
2530 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2531 Else it calls optimize_range_tests_diff. */
2533 static bool
2534 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2535 bool optimize_xor, vec<operand_entry *> *ops,
2536 struct range_entry *ranges)
2538 int i, j;
2539 bool any_changes = false;
2540 for (i = first; i < length; i++)
2542 tree lowi, highi, lowj, highj, type, tem;
2544 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2545 continue;
2546 type = TREE_TYPE (ranges[i].exp);
2547 if (!INTEGRAL_TYPE_P (type))
2548 continue;
2549 lowi = ranges[i].low;
2550 if (lowi == NULL_TREE)
2551 lowi = TYPE_MIN_VALUE (type);
2552 highi = ranges[i].high;
2553 if (highi == NULL_TREE)
2554 continue;
2555 for (j = i + 1; j < length && j < i + 64; j++)
2557 bool changes;
2558 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2559 continue;
2560 lowj = ranges[j].low;
2561 if (lowj == NULL_TREE)
2562 continue;
2563 highj = ranges[j].high;
2564 if (highj == NULL_TREE)
2565 highj = TYPE_MAX_VALUE (type);
2566 /* Check lowj > highi. */
2567 tem = fold_binary (GT_EXPR, boolean_type_node,
2568 lowj, highi);
2569 if (tem == NULL_TREE || !integer_onep (tem))
2570 continue;
2571 if (optimize_xor)
2572 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2573 highi, highj, ops,
2574 ranges + i, ranges + j);
2575 else
2576 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2577 highi, highj, ops,
2578 ranges + i, ranges + j);
2579 if (changes)
2581 any_changes = true;
2582 break;
2586 return any_changes;
2589 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2590 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2591 EXP on success, NULL otherwise. */
2593 static tree
2594 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2595 wide_int *mask, tree *totallowp)
2597 tree tem = int_const_binop (MINUS_EXPR, high, low);
2598 if (tem == NULL_TREE
2599 || TREE_CODE (tem) != INTEGER_CST
2600 || TREE_OVERFLOW (tem)
2601 || tree_int_cst_sgn (tem) == -1
2602 || compare_tree_int (tem, prec) != -1)
2603 return NULL_TREE;
2605 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2606 *mask = wi::shifted_mask (0, max, false, prec);
2607 if (TREE_CODE (exp) == BIT_AND_EXPR
2608 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2610 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2611 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2612 if (wi::popcount (msk) == 1
2613 && wi::ltu_p (msk, prec - max))
2615 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2616 max += msk.to_uhwi ();
2617 exp = TREE_OPERAND (exp, 0);
2618 if (integer_zerop (low)
2619 && TREE_CODE (exp) == PLUS_EXPR
2620 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2622 tree ret = TREE_OPERAND (exp, 0);
2623 STRIP_NOPS (ret);
2624 widest_int bias
2625 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2626 TYPE_PRECISION (TREE_TYPE (low))));
2627 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2628 if (totallowp)
2630 *totallowp = tbias;
2631 return ret;
2633 else if (!tree_int_cst_lt (totallow, tbias))
2634 return NULL_TREE;
2635 bias = wi::to_widest (tbias);
2636 bias -= wi::to_widest (totallow);
2637 if (bias >= 0 && bias < prec - max)
2639 *mask = wi::lshift (*mask, bias);
2640 return ret;
2645 if (totallowp)
2646 return exp;
2647 if (!tree_int_cst_lt (totallow, low))
2648 return exp;
2649 tem = int_const_binop (MINUS_EXPR, low, totallow);
2650 if (tem == NULL_TREE
2651 || TREE_CODE (tem) != INTEGER_CST
2652 || TREE_OVERFLOW (tem)
2653 || compare_tree_int (tem, prec - max) == 1)
2654 return NULL_TREE;
2656 *mask = wi::lshift (*mask, wi::to_widest (tem));
2657 return exp;
2660 /* Attempt to optimize small range tests using bit test.
2661 E.g.
2662 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2663 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2664 has been by earlier optimizations optimized into:
2665 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2666 As all the 43 through 82 range is less than 64 numbers,
2667 for 64-bit word targets optimize that into:
2668 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2670 static bool
2671 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2672 vec<operand_entry *> *ops,
2673 struct range_entry *ranges)
2675 int i, j;
2676 bool any_changes = false;
2677 int prec = GET_MODE_BITSIZE (word_mode);
2678 auto_vec<struct range_entry *, 64> candidates;
2680 for (i = first; i < length - 2; i++)
2682 tree lowi, highi, lowj, highj, type;
2684 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2685 continue;
2686 type = TREE_TYPE (ranges[i].exp);
2687 if (!INTEGRAL_TYPE_P (type))
2688 continue;
2689 lowi = ranges[i].low;
2690 if (lowi == NULL_TREE)
2691 lowi = TYPE_MIN_VALUE (type);
2692 highi = ranges[i].high;
2693 if (highi == NULL_TREE)
2694 continue;
2695 wide_int mask;
2696 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2697 highi, &mask, &lowi);
2698 if (exp == NULL_TREE)
2699 continue;
2700 bool strict_overflow_p = ranges[i].strict_overflow_p;
2701 candidates.truncate (0);
2702 int end = MIN (i + 64, length);
2703 for (j = i + 1; j < end; j++)
2705 tree exp2;
2706 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2707 continue;
2708 if (ranges[j].exp == exp)
2710 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2712 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2713 if (exp2 == exp)
2715 else if (TREE_CODE (exp2) == PLUS_EXPR)
2717 exp2 = TREE_OPERAND (exp2, 0);
2718 STRIP_NOPS (exp2);
2719 if (exp2 != exp)
2720 continue;
2722 else
2723 continue;
2725 else
2726 continue;
2727 lowj = ranges[j].low;
2728 if (lowj == NULL_TREE)
2729 continue;
2730 highj = ranges[j].high;
2731 if (highj == NULL_TREE)
2732 highj = TYPE_MAX_VALUE (type);
2733 wide_int mask2;
2734 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2735 highj, &mask2, NULL);
2736 if (exp2 != exp)
2737 continue;
2738 mask |= mask2;
2739 strict_overflow_p |= ranges[j].strict_overflow_p;
2740 candidates.safe_push (&ranges[j]);
2743 /* If we need otherwise 3 or more comparisons, use a bit test. */
2744 if (candidates.length () >= 2)
2746 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2747 wi::to_widest (lowi)
2748 + prec - 1 - wi::clz (mask));
2749 operand_entry *oe = (*ops)[ranges[i].idx];
2750 tree op = oe->op;
2751 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2752 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2753 location_t loc = gimple_location (stmt);
2754 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2756 /* See if it isn't cheaper to pretend the minimum value of the
2757 range is 0, if maximum value is small enough.
2758 We can avoid then subtraction of the minimum value, but the
2759 mask constant could be perhaps more expensive. */
2760 if (compare_tree_int (lowi, 0) > 0
2761 && compare_tree_int (high, prec) < 0)
2763 int cost_diff;
2764 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2765 rtx reg = gen_raw_REG (word_mode, 10000);
2766 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2767 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2768 GEN_INT (-m)), speed_p);
2769 rtx r = immed_wide_int_const (mask, word_mode);
2770 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2771 word_mode, speed_p);
2772 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2773 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2774 word_mode, speed_p);
2775 if (cost_diff > 0)
2777 mask = wi::lshift (mask, m);
2778 lowi = build_zero_cst (TREE_TYPE (lowi));
2782 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2783 false, lowi, high);
2784 if (tem == NULL_TREE || is_gimple_val (tem))
2785 continue;
2786 tree etype = unsigned_type_for (TREE_TYPE (exp));
2787 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2788 fold_convert_loc (loc, etype, exp),
2789 fold_convert_loc (loc, etype, lowi));
2790 exp = fold_convert_loc (loc, integer_type_node, exp);
2791 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2792 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2793 build_int_cst (word_type, 1), exp);
2794 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2795 wide_int_to_tree (word_type, mask));
2796 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2797 build_zero_cst (word_type));
2798 if (is_gimple_val (exp))
2799 continue;
2801 /* The shift might have undefined behavior if TEM is true,
2802 but reassociate_bb isn't prepared to have basic blocks
2803 split when it is running. So, temporarily emit a code
2804 with BIT_IOR_EXPR instead of &&, and fix it up in
2805 branch_fixup. */
2806 gimple_seq seq;
2807 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2808 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2809 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2810 gimple_seq seq2;
2811 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2812 gimple_seq_add_seq_without_update (&seq, seq2);
2813 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2814 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2815 gimple *g = gimple_build_assign (make_ssa_name (optype),
2816 BIT_IOR_EXPR, tem, exp);
2817 gimple_set_location (g, loc);
2818 gimple_seq_add_stmt_without_update (&seq, g);
2819 exp = gimple_assign_lhs (g);
2820 tree val = build_zero_cst (optype);
2821 if (update_range_test (&ranges[i], NULL, candidates.address (),
2822 candidates.length (), opcode, ops, exp,
2823 seq, false, val, val, strict_overflow_p))
2825 any_changes = true;
2826 reassoc_branch_fixups.safe_push (tem);
2828 else
2829 gimple_seq_discard (seq);
2832 return any_changes;
2835 /* Attempt to optimize for signed a and b where b is known to be >= 0:
2836 a >= 0 && a < b into (unsigned) a < (unsigned) b
2837 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
2839 static bool
2840 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
2841 vec<operand_entry *> *ops,
2842 struct range_entry *ranges)
2844 int i;
2845 bool any_changes = false;
2846 hash_map<tree, int> *map = NULL;
2848 for (i = first; i < length; i++)
2850 if (ranges[i].exp == NULL_TREE
2851 || TREE_CODE (ranges[i].exp) != SSA_NAME
2852 || !ranges[i].in_p)
2853 continue;
2855 tree type = TREE_TYPE (ranges[i].exp);
2856 if (!INTEGRAL_TYPE_P (type)
2857 || TYPE_UNSIGNED (type)
2858 || ranges[i].low == NULL_TREE
2859 || !integer_zerop (ranges[i].low)
2860 || ranges[i].high != NULL_TREE)
2861 continue;
2862 /* EXP >= 0 here. */
2863 if (map == NULL)
2864 map = new hash_map <tree, int>;
2865 map->put (ranges[i].exp, i);
2868 if (map == NULL)
2869 return false;
2871 for (i = 0; i < length; i++)
2873 if (ranges[i].low == NULL_TREE
2874 || ranges[i].high == NULL_TREE
2875 || !integer_zerop (ranges[i].low)
2876 || !integer_zerop (ranges[i].high))
2877 continue;
2879 gimple *stmt;
2880 tree_code ccode;
2881 tree rhs1, rhs2;
2882 if (ranges[i].exp)
2884 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
2885 continue;
2886 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
2887 if (!is_gimple_assign (stmt))
2888 continue;
2889 ccode = gimple_assign_rhs_code (stmt);
2890 rhs1 = gimple_assign_rhs1 (stmt);
2891 rhs2 = gimple_assign_rhs2 (stmt);
2893 else
2895 operand_entry *oe = (*ops)[ranges[i].idx];
2896 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2897 if (gimple_code (stmt) != GIMPLE_COND)
2898 continue;
2899 ccode = gimple_cond_code (stmt);
2900 rhs1 = gimple_cond_lhs (stmt);
2901 rhs2 = gimple_cond_rhs (stmt);
2904 if (TREE_CODE (rhs1) != SSA_NAME
2905 || rhs2 == NULL_TREE
2906 || TREE_CODE (rhs2) != SSA_NAME)
2907 continue;
2909 switch (ccode)
2911 case GT_EXPR:
2912 case GE_EXPR:
2913 if (!ranges[i].in_p)
2914 std::swap (rhs1, rhs2);
2915 ccode = swap_tree_comparison (ccode);
2916 break;
2917 case LT_EXPR:
2918 case LE_EXPR:
2919 if (ranges[i].in_p)
2920 std::swap (rhs1, rhs2);
2921 break;
2922 default:
2923 continue;
2926 int *idx = map->get (rhs1);
2927 if (idx == NULL)
2928 continue;
2930 wide_int nz = get_nonzero_bits (rhs2);
2931 if (wi::neg_p (nz))
2932 continue;
2934 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
2935 and RHS2 is known to be RHS2 >= 0. */
2936 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
2938 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2939 if ((ranges[*idx].strict_overflow_p
2940 || ranges[i].strict_overflow_p)
2941 && issue_strict_overflow_warning (wc))
2942 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
2943 "assuming signed overflow does not occur "
2944 "when simplifying range test");
2946 if (dump_file && (dump_flags & TDF_DETAILS))
2948 struct range_entry *r = &ranges[*idx];
2949 fprintf (dump_file, "Optimizing range test ");
2950 print_generic_expr (dump_file, r->exp, 0);
2951 fprintf (dump_file, " +[");
2952 print_generic_expr (dump_file, r->low, 0);
2953 fprintf (dump_file, ", ");
2954 print_generic_expr (dump_file, r->high, 0);
2955 fprintf (dump_file, "] and comparison ");
2956 print_generic_expr (dump_file, rhs1, 0);
2957 fprintf (dump_file, " %s ", op_symbol_code (ccode));
2958 print_generic_expr (dump_file, rhs2, 0);
2959 fprintf (dump_file, "\n into (");
2960 print_generic_expr (dump_file, utype, 0);
2961 fprintf (dump_file, ") ");
2962 print_generic_expr (dump_file, rhs1, 0);
2963 fprintf (dump_file, " %s (", op_symbol_code (ccode));
2964 print_generic_expr (dump_file, utype, 0);
2965 fprintf (dump_file, ") ");
2966 print_generic_expr (dump_file, rhs2, 0);
2967 fprintf (dump_file, "\n");
2970 if (ranges[i].in_p)
2971 std::swap (rhs1, rhs2);
2973 unsigned int uid = gimple_uid (stmt);
2974 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2975 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
2976 gimple_set_uid (g, uid);
2977 rhs1 = gimple_assign_lhs (g);
2978 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2979 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
2980 gimple_set_uid (g, uid);
2981 rhs2 = gimple_assign_lhs (g);
2982 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2983 if (tree_swap_operands_p (rhs1, rhs2))
2985 std::swap (rhs1, rhs2);
2986 ccode = swap_tree_comparison (ccode);
2988 if (gimple_code (stmt) == GIMPLE_COND)
2990 gcond *c = as_a <gcond *> (stmt);
2991 gimple_cond_set_code (c, ccode);
2992 gimple_cond_set_lhs (c, rhs1);
2993 gimple_cond_set_rhs (c, rhs2);
2994 update_stmt (stmt);
2996 else
2998 operand_entry *oe = (*ops)[ranges[i].idx];
2999 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3000 if (!INTEGRAL_TYPE_P (ctype)
3001 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3002 && TYPE_PRECISION (ctype) != 1))
3003 ctype = boolean_type_node;
3004 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3005 gimple_set_uid (g, uid);
3006 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3007 if (oe->op && ctype != TREE_TYPE (oe->op))
3009 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3010 NOP_EXPR, gimple_assign_lhs (g));
3011 gimple_set_uid (g, uid);
3012 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3014 ranges[i].exp = gimple_assign_lhs (g);
3015 oe->op = ranges[i].exp;
3016 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3017 ranges[i].high = ranges[i].low;
3019 ranges[i].strict_overflow_p = false;
3020 operand_entry *oe = (*ops)[ranges[*idx].idx];
3021 /* Now change all the other range test immediate uses, so that
3022 those tests will be optimized away. */
3023 if (opcode == ERROR_MARK)
3025 if (oe->op)
3026 oe->op = build_int_cst (TREE_TYPE (oe->op),
3027 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3028 else
3029 oe->op = (oe->rank == BIT_IOR_EXPR
3030 ? boolean_false_node : boolean_true_node);
3032 else
3033 oe->op = error_mark_node;
3034 ranges[*idx].exp = NULL_TREE;
3035 ranges[*idx].low = NULL_TREE;
3036 ranges[*idx].high = NULL_TREE;
3037 any_changes = true;
3040 delete map;
3041 return any_changes;
3044 /* Optimize range tests, similarly how fold_range_test optimizes
3045 it on trees. The tree code for the binary
3046 operation between all the operands is OPCODE.
3047 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3048 maybe_optimize_range_tests for inter-bb range optimization.
3049 In that case if oe->op is NULL, oe->id is bb->index whose
3050 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3051 the actual opcode. */
3053 static bool
3054 optimize_range_tests (enum tree_code opcode,
3055 vec<operand_entry *> *ops)
3057 unsigned int length = ops->length (), i, j, first;
3058 operand_entry *oe;
3059 struct range_entry *ranges;
3060 bool any_changes = false;
3062 if (length == 1)
3063 return false;
3065 ranges = XNEWVEC (struct range_entry, length);
3066 for (i = 0; i < length; i++)
3068 oe = (*ops)[i];
3069 ranges[i].idx = i;
3070 init_range_entry (ranges + i, oe->op,
3071 oe->op
3072 ? NULL
3073 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3074 /* For | invert it now, we will invert it again before emitting
3075 the optimized expression. */
3076 if (opcode == BIT_IOR_EXPR
3077 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3078 ranges[i].in_p = !ranges[i].in_p;
3081 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3082 for (i = 0; i < length; i++)
3083 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3084 break;
3086 /* Try to merge ranges. */
3087 for (first = i; i < length; i++)
3089 tree low = ranges[i].low;
3090 tree high = ranges[i].high;
3091 int in_p = ranges[i].in_p;
3092 bool strict_overflow_p = ranges[i].strict_overflow_p;
3093 int update_fail_count = 0;
3095 for (j = i + 1; j < length; j++)
3097 if (ranges[i].exp != ranges[j].exp)
3098 break;
3099 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3100 ranges[j].in_p, ranges[j].low, ranges[j].high))
3101 break;
3102 strict_overflow_p |= ranges[j].strict_overflow_p;
3105 if (j == i + 1)
3106 continue;
3108 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3109 opcode, ops, ranges[i].exp, NULL, in_p,
3110 low, high, strict_overflow_p))
3112 i = j - 1;
3113 any_changes = true;
3115 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3116 while update_range_test would fail. */
3117 else if (update_fail_count == 64)
3118 i = j - 1;
3119 else
3120 ++update_fail_count;
3123 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3124 ops, ranges);
3126 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3127 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3128 ops, ranges);
3129 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3130 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3131 ops, ranges);
3132 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3133 ranges);
3135 if (any_changes && opcode != ERROR_MARK)
3137 j = 0;
3138 FOR_EACH_VEC_ELT (*ops, i, oe)
3140 if (oe->op == error_mark_node)
3141 continue;
3142 else if (i != j)
3143 (*ops)[j] = oe;
3144 j++;
3146 ops->truncate (j);
3149 XDELETEVEC (ranges);
3150 return any_changes;
3153 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3154 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3155 otherwise the comparison code. */
3157 static tree_code
3158 ovce_extract_ops (tree var, gassign **rets, bool *reti)
3160 if (TREE_CODE (var) != SSA_NAME)
3161 return ERROR_MARK;
3163 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3164 if (stmt == NULL)
3165 return ERROR_MARK;
3167 /* ??? If we start creating more COND_EXPR, we could perform
3168 this same optimization with them. For now, simplify. */
3169 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3170 return ERROR_MARK;
3172 tree cond = gimple_assign_rhs1 (stmt);
3173 tree_code cmp = TREE_CODE (cond);
3174 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3175 return ERROR_MARK;
3177 /* ??? For now, allow only canonical true and false result vectors.
3178 We could expand this to other constants should the need arise,
3179 but at the moment we don't create them. */
3180 tree t = gimple_assign_rhs2 (stmt);
3181 tree f = gimple_assign_rhs3 (stmt);
3182 bool inv;
3183 if (integer_all_onesp (t))
3184 inv = false;
3185 else if (integer_all_onesp (f))
3187 cmp = invert_tree_comparison (cmp, false);
3188 inv = true;
3190 else
3191 return ERROR_MARK;
3192 if (!integer_zerop (f))
3193 return ERROR_MARK;
3195 /* Success! */
3196 if (rets)
3197 *rets = stmt;
3198 if (reti)
3199 *reti = inv;
3200 return cmp;
3203 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3204 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3206 static bool
3207 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3209 unsigned int length = ops->length (), i, j;
3210 bool any_changes = false;
3212 if (length == 1)
3213 return false;
3215 for (i = 0; i < length; ++i)
3217 tree elt0 = (*ops)[i]->op;
3219 gassign *stmt0;
3220 bool invert;
3221 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3222 if (cmp0 == ERROR_MARK)
3223 continue;
3225 for (j = i + 1; j < length; ++j)
3227 tree &elt1 = (*ops)[j]->op;
3229 gassign *stmt1;
3230 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3231 if (cmp1 == ERROR_MARK)
3232 continue;
3234 tree cond0 = gimple_assign_rhs1 (stmt0);
3235 tree x0 = TREE_OPERAND (cond0, 0);
3236 tree y0 = TREE_OPERAND (cond0, 1);
3238 tree cond1 = gimple_assign_rhs1 (stmt1);
3239 tree x1 = TREE_OPERAND (cond1, 0);
3240 tree y1 = TREE_OPERAND (cond1, 1);
3242 tree comb;
3243 if (opcode == BIT_AND_EXPR)
3244 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3245 else if (opcode == BIT_IOR_EXPR)
3246 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3247 else
3248 gcc_unreachable ();
3249 if (comb == NULL)
3250 continue;
3252 /* Success! */
3253 if (dump_file && (dump_flags & TDF_DETAILS))
3255 fprintf (dump_file, "Transforming ");
3256 print_generic_expr (dump_file, cond0, 0);
3257 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3258 print_generic_expr (dump_file, cond1, 0);
3259 fprintf (dump_file, " into ");
3260 print_generic_expr (dump_file, comb, 0);
3261 fputc ('\n', dump_file);
3264 gimple_assign_set_rhs1 (stmt0, comb);
3265 if (invert)
3266 std::swap (*gimple_assign_rhs2_ptr (stmt0),
3267 *gimple_assign_rhs3_ptr (stmt0));
3268 update_stmt (stmt0);
3270 elt1 = error_mark_node;
3271 any_changes = true;
3275 if (any_changes)
3277 operand_entry *oe;
3278 j = 0;
3279 FOR_EACH_VEC_ELT (*ops, i, oe)
3281 if (oe->op == error_mark_node)
3282 continue;
3283 else if (i != j)
3284 (*ops)[j] = oe;
3285 j++;
3287 ops->truncate (j);
3290 return any_changes;
3293 /* Return true if STMT is a cast like:
3294 <bb N>:
3296 _123 = (int) _234;
3298 <bb M>:
3299 # _345 = PHI <_123(N), 1(...), 1(...)>
3300 where _234 has bool type, _123 has single use and
3301 bb N has a single successor M. This is commonly used in
3302 the last block of a range test.
3304 Also Return true if STMT is tcc_compare like:
3305 <bb N>:
3307 _234 = a_2(D) == 2;
3309 <bb M>:
3310 # _345 = PHI <_234(N), 1(...), 1(...)>
3311 _346 = (int) _345;
3312 where _234 has booltype, single use and
3313 bb N has a single successor M. This is commonly used in
3314 the last block of a range test. */
3316 static bool
3317 final_range_test_p (gimple *stmt)
3319 basic_block bb, rhs_bb, lhs_bb;
3320 edge e;
3321 tree lhs, rhs;
3322 use_operand_p use_p;
3323 gimple *use_stmt;
3325 if (!gimple_assign_cast_p (stmt)
3326 && (!is_gimple_assign (stmt)
3327 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3328 != tcc_comparison)))
3329 return false;
3330 bb = gimple_bb (stmt);
3331 if (!single_succ_p (bb))
3332 return false;
3333 e = single_succ_edge (bb);
3334 if (e->flags & EDGE_COMPLEX)
3335 return false;
3337 lhs = gimple_assign_lhs (stmt);
3338 rhs = gimple_assign_rhs1 (stmt);
3339 if (gimple_assign_cast_p (stmt)
3340 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3341 || TREE_CODE (rhs) != SSA_NAME
3342 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3343 return false;
3345 if (!gimple_assign_cast_p (stmt)
3346 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3347 return false;
3349 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3350 if (!single_imm_use (lhs, &use_p, &use_stmt))
3351 return false;
3353 if (gimple_code (use_stmt) != GIMPLE_PHI
3354 || gimple_bb (use_stmt) != e->dest)
3355 return false;
3357 /* And that the rhs is defined in the same loop. */
3358 if (gimple_assign_cast_p (stmt))
3360 if (TREE_CODE (rhs) != SSA_NAME
3361 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3362 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3363 return false;
3365 else
3367 if (TREE_CODE (lhs) != SSA_NAME
3368 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3369 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3370 return false;
3373 return true;
3376 /* Return true if BB is suitable basic block for inter-bb range test
3377 optimization. If BACKWARD is true, BB should be the only predecessor
3378 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3379 or compared with to find a common basic block to which all conditions
3380 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3381 be the only predecessor of BB. */
3383 static bool
3384 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3385 bool backward)
3387 edge_iterator ei, ei2;
3388 edge e, e2;
3389 gimple *stmt;
3390 gphi_iterator gsi;
3391 bool other_edge_seen = false;
3392 bool is_cond;
3394 if (test_bb == bb)
3395 return false;
3396 /* Check last stmt first. */
3397 stmt = last_stmt (bb);
3398 if (stmt == NULL
3399 || (gimple_code (stmt) != GIMPLE_COND
3400 && (backward || !final_range_test_p (stmt)))
3401 || gimple_visited_p (stmt)
3402 || stmt_could_throw_p (stmt)
3403 || *other_bb == bb)
3404 return false;
3405 is_cond = gimple_code (stmt) == GIMPLE_COND;
3406 if (is_cond)
3408 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3409 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3410 to *OTHER_BB (if not set yet, try to find it out). */
3411 if (EDGE_COUNT (bb->succs) != 2)
3412 return false;
3413 FOR_EACH_EDGE (e, ei, bb->succs)
3415 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3416 return false;
3417 if (e->dest == test_bb)
3419 if (backward)
3420 continue;
3421 else
3422 return false;
3424 if (e->dest == bb)
3425 return false;
3426 if (*other_bb == NULL)
3428 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3429 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3430 return false;
3431 else if (e->dest == e2->dest)
3432 *other_bb = e->dest;
3433 if (*other_bb == NULL)
3434 return false;
3436 if (e->dest == *other_bb)
3437 other_edge_seen = true;
3438 else if (backward)
3439 return false;
3441 if (*other_bb == NULL || !other_edge_seen)
3442 return false;
3444 else if (single_succ (bb) != *other_bb)
3445 return false;
3447 /* Now check all PHIs of *OTHER_BB. */
3448 e = find_edge (bb, *other_bb);
3449 e2 = find_edge (test_bb, *other_bb);
3450 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3452 gphi *phi = gsi.phi ();
3453 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3454 corresponding to BB and TEST_BB predecessor must be the same. */
3455 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3456 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3458 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3459 one of the PHIs should have the lhs of the last stmt in
3460 that block as PHI arg and that PHI should have 0 or 1
3461 corresponding to it in all other range test basic blocks
3462 considered. */
3463 if (!is_cond)
3465 if (gimple_phi_arg_def (phi, e->dest_idx)
3466 == gimple_assign_lhs (stmt)
3467 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3468 || integer_onep (gimple_phi_arg_def (phi,
3469 e2->dest_idx))))
3470 continue;
3472 else
3474 gimple *test_last = last_stmt (test_bb);
3475 if (gimple_code (test_last) != GIMPLE_COND
3476 && gimple_phi_arg_def (phi, e2->dest_idx)
3477 == gimple_assign_lhs (test_last)
3478 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3479 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3480 continue;
3483 return false;
3486 return true;
3489 /* Return true if BB doesn't have side-effects that would disallow
3490 range test optimization, all SSA_NAMEs set in the bb are consumed
3491 in the bb and there are no PHIs. */
3493 static bool
3494 no_side_effect_bb (basic_block bb)
3496 gimple_stmt_iterator gsi;
3497 gimple *last;
3499 if (!gimple_seq_empty_p (phi_nodes (bb)))
3500 return false;
3501 last = last_stmt (bb);
3502 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3504 gimple *stmt = gsi_stmt (gsi);
3505 tree lhs;
3506 imm_use_iterator imm_iter;
3507 use_operand_p use_p;
3509 if (is_gimple_debug (stmt))
3510 continue;
3511 if (gimple_has_side_effects (stmt))
3512 return false;
3513 if (stmt == last)
3514 return true;
3515 if (!is_gimple_assign (stmt))
3516 return false;
3517 lhs = gimple_assign_lhs (stmt);
3518 if (TREE_CODE (lhs) != SSA_NAME)
3519 return false;
3520 if (gimple_assign_rhs_could_trap_p (stmt))
3521 return false;
3522 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3524 gimple *use_stmt = USE_STMT (use_p);
3525 if (is_gimple_debug (use_stmt))
3526 continue;
3527 if (gimple_bb (use_stmt) != bb)
3528 return false;
3531 return false;
3534 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3535 return true and fill in *OPS recursively. */
3537 static bool
3538 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3539 struct loop *loop)
3541 gimple *stmt = SSA_NAME_DEF_STMT (var);
3542 tree rhs[2];
3543 int i;
3545 if (!is_reassociable_op (stmt, code, loop))
3546 return false;
3548 rhs[0] = gimple_assign_rhs1 (stmt);
3549 rhs[1] = gimple_assign_rhs2 (stmt);
3550 gimple_set_visited (stmt, true);
3551 for (i = 0; i < 2; i++)
3552 if (TREE_CODE (rhs[i]) == SSA_NAME
3553 && !get_ops (rhs[i], code, ops, loop)
3554 && has_single_use (rhs[i]))
3556 operand_entry *oe = operand_entry_pool.allocate ();
3558 oe->op = rhs[i];
3559 oe->rank = code;
3560 oe->id = 0;
3561 oe->count = 1;
3562 oe->stmt_to_insert = NULL;
3563 ops->safe_push (oe);
3565 return true;
3568 /* Find the ops that were added by get_ops starting from VAR, see if
3569 they were changed during update_range_test and if yes, create new
3570 stmts. */
3572 static tree
3573 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3574 unsigned int *pidx, struct loop *loop)
3576 gimple *stmt = SSA_NAME_DEF_STMT (var);
3577 tree rhs[4];
3578 int i;
3580 if (!is_reassociable_op (stmt, code, loop))
3581 return NULL;
3583 rhs[0] = gimple_assign_rhs1 (stmt);
3584 rhs[1] = gimple_assign_rhs2 (stmt);
3585 rhs[2] = rhs[0];
3586 rhs[3] = rhs[1];
3587 for (i = 0; i < 2; i++)
3588 if (TREE_CODE (rhs[i]) == SSA_NAME)
3590 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3591 if (rhs[2 + i] == NULL_TREE)
3593 if (has_single_use (rhs[i]))
3594 rhs[2 + i] = ops[(*pidx)++]->op;
3595 else
3596 rhs[2 + i] = rhs[i];
3599 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3600 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3602 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3603 var = make_ssa_name (TREE_TYPE (var));
3604 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3605 rhs[2], rhs[3]);
3606 gimple_set_uid (g, gimple_uid (stmt));
3607 gimple_set_visited (g, true);
3608 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3610 return var;
3613 /* Structure to track the initial value passed to get_ops and
3614 the range in the ops vector for each basic block. */
3616 struct inter_bb_range_test_entry
3618 tree op;
3619 unsigned int first_idx, last_idx;
3622 /* Inter-bb range test optimization.
3624 Returns TRUE if a gimple conditional is optimized to a true/false,
3625 otherwise return FALSE.
3627 This indicates to the caller that it should run a CFG cleanup pass
3628 once reassociation is completed. */
3630 static bool
3631 maybe_optimize_range_tests (gimple *stmt)
3633 basic_block first_bb = gimple_bb (stmt);
3634 basic_block last_bb = first_bb;
3635 basic_block other_bb = NULL;
3636 basic_block bb;
3637 edge_iterator ei;
3638 edge e;
3639 auto_vec<operand_entry *> ops;
3640 auto_vec<inter_bb_range_test_entry> bbinfo;
3641 bool any_changes = false;
3642 bool cfg_cleanup_needed = false;
3644 /* Consider only basic blocks that end with GIMPLE_COND or
3645 a cast statement satisfying final_range_test_p. All
3646 but the last bb in the first_bb .. last_bb range
3647 should end with GIMPLE_COND. */
3648 if (gimple_code (stmt) == GIMPLE_COND)
3650 if (EDGE_COUNT (first_bb->succs) != 2)
3651 return cfg_cleanup_needed;
3653 else if (final_range_test_p (stmt))
3654 other_bb = single_succ (first_bb);
3655 else
3656 return cfg_cleanup_needed;
3658 if (stmt_could_throw_p (stmt))
3659 return cfg_cleanup_needed;
3661 /* As relative ordering of post-dominator sons isn't fixed,
3662 maybe_optimize_range_tests can be called first on any
3663 bb in the range we want to optimize. So, start searching
3664 backwards, if first_bb can be set to a predecessor. */
3665 while (single_pred_p (first_bb))
3667 basic_block pred_bb = single_pred (first_bb);
3668 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3669 break;
3670 if (!no_side_effect_bb (first_bb))
3671 break;
3672 first_bb = pred_bb;
3674 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3675 Before starting forward search in last_bb successors, find
3676 out the other_bb. */
3677 if (first_bb == last_bb)
3679 other_bb = NULL;
3680 /* As non-GIMPLE_COND last stmt always terminates the range,
3681 if forward search didn't discover anything, just give up. */
3682 if (gimple_code (stmt) != GIMPLE_COND)
3683 return cfg_cleanup_needed;
3684 /* Look at both successors. Either it ends with a GIMPLE_COND
3685 and satisfies suitable_cond_bb, or ends with a cast and
3686 other_bb is that cast's successor. */
3687 FOR_EACH_EDGE (e, ei, first_bb->succs)
3688 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3689 || e->dest == first_bb)
3690 return cfg_cleanup_needed;
3691 else if (single_pred_p (e->dest))
3693 stmt = last_stmt (e->dest);
3694 if (stmt
3695 && gimple_code (stmt) == GIMPLE_COND
3696 && EDGE_COUNT (e->dest->succs) == 2)
3698 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3699 break;
3700 else
3701 other_bb = NULL;
3703 else if (stmt
3704 && final_range_test_p (stmt)
3705 && find_edge (first_bb, single_succ (e->dest)))
3707 other_bb = single_succ (e->dest);
3708 if (other_bb == first_bb)
3709 other_bb = NULL;
3712 if (other_bb == NULL)
3713 return cfg_cleanup_needed;
3715 /* Now do the forward search, moving last_bb to successor bbs
3716 that aren't other_bb. */
3717 while (EDGE_COUNT (last_bb->succs) == 2)
3719 FOR_EACH_EDGE (e, ei, last_bb->succs)
3720 if (e->dest != other_bb)
3721 break;
3722 if (e == NULL)
3723 break;
3724 if (!single_pred_p (e->dest))
3725 break;
3726 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3727 break;
3728 if (!no_side_effect_bb (e->dest))
3729 break;
3730 last_bb = e->dest;
3732 if (first_bb == last_bb)
3733 return cfg_cleanup_needed;
3734 /* Here basic blocks first_bb through last_bb's predecessor
3735 end with GIMPLE_COND, all of them have one of the edges to
3736 other_bb and another to another block in the range,
3737 all blocks except first_bb don't have side-effects and
3738 last_bb ends with either GIMPLE_COND, or cast satisfying
3739 final_range_test_p. */
3740 for (bb = last_bb; ; bb = single_pred (bb))
3742 enum tree_code code;
3743 tree lhs, rhs;
3744 inter_bb_range_test_entry bb_ent;
3746 bb_ent.op = NULL_TREE;
3747 bb_ent.first_idx = ops.length ();
3748 bb_ent.last_idx = bb_ent.first_idx;
3749 e = find_edge (bb, other_bb);
3750 stmt = last_stmt (bb);
3751 gimple_set_visited (stmt, true);
3752 if (gimple_code (stmt) != GIMPLE_COND)
3754 use_operand_p use_p;
3755 gimple *phi;
3756 edge e2;
3757 unsigned int d;
3759 lhs = gimple_assign_lhs (stmt);
3760 rhs = gimple_assign_rhs1 (stmt);
3761 gcc_assert (bb == last_bb);
3763 /* stmt is
3764 _123 = (int) _234;
3766 _234 = a_2(D) == 2;
3768 followed by:
3769 <bb M>:
3770 # _345 = PHI <_123(N), 1(...), 1(...)>
3772 or 0 instead of 1. If it is 0, the _234
3773 range test is anded together with all the
3774 other range tests, if it is 1, it is ored with
3775 them. */
3776 single_imm_use (lhs, &use_p, &phi);
3777 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3778 e2 = find_edge (first_bb, other_bb);
3779 d = e2->dest_idx;
3780 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3781 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3782 code = BIT_AND_EXPR;
3783 else
3785 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3786 code = BIT_IOR_EXPR;
3789 /* If _234 SSA_NAME_DEF_STMT is
3790 _234 = _567 | _789;
3791 (or &, corresponding to 1/0 in the phi arguments,
3792 push into ops the individual range test arguments
3793 of the bitwise or resp. and, recursively. */
3794 if (TREE_CODE (rhs) == SSA_NAME
3795 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3796 != tcc_comparison)
3797 && !get_ops (rhs, code, &ops,
3798 loop_containing_stmt (stmt))
3799 && has_single_use (rhs))
3801 /* Otherwise, push the _234 range test itself. */
3802 operand_entry *oe = operand_entry_pool.allocate ();
3804 oe->op = rhs;
3805 oe->rank = code;
3806 oe->id = 0;
3807 oe->count = 1;
3808 oe->stmt_to_insert = NULL;
3809 ops.safe_push (oe);
3810 bb_ent.last_idx++;
3811 bb_ent.op = rhs;
3813 else if (is_gimple_assign (stmt)
3814 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3815 == tcc_comparison)
3816 && !get_ops (lhs, code, &ops,
3817 loop_containing_stmt (stmt))
3818 && has_single_use (lhs))
3820 operand_entry *oe = operand_entry_pool.allocate ();
3821 oe->op = lhs;
3822 oe->rank = code;
3823 oe->id = 0;
3824 oe->count = 1;
3825 ops.safe_push (oe);
3826 bb_ent.last_idx++;
3827 bb_ent.op = lhs;
3829 else
3831 bb_ent.last_idx = ops.length ();
3832 bb_ent.op = rhs;
3834 bbinfo.safe_push (bb_ent);
3835 continue;
3837 /* Otherwise stmt is GIMPLE_COND. */
3838 code = gimple_cond_code (stmt);
3839 lhs = gimple_cond_lhs (stmt);
3840 rhs = gimple_cond_rhs (stmt);
3841 if (TREE_CODE (lhs) == SSA_NAME
3842 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3843 && ((code != EQ_EXPR && code != NE_EXPR)
3844 || rhs != boolean_false_node
3845 /* Either push into ops the individual bitwise
3846 or resp. and operands, depending on which
3847 edge is other_bb. */
3848 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3849 ^ (code == EQ_EXPR))
3850 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3851 loop_containing_stmt (stmt))))
3853 /* Or push the GIMPLE_COND stmt itself. */
3854 operand_entry *oe = operand_entry_pool.allocate ();
3856 oe->op = NULL;
3857 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3858 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3859 /* oe->op = NULL signs that there is no SSA_NAME
3860 for the range test, and oe->id instead is the
3861 basic block number, at which's end the GIMPLE_COND
3862 is. */
3863 oe->id = bb->index;
3864 oe->count = 1;
3865 oe->stmt_to_insert = NULL;
3866 ops.safe_push (oe);
3867 bb_ent.op = NULL;
3868 bb_ent.last_idx++;
3870 else if (ops.length () > bb_ent.first_idx)
3872 bb_ent.op = lhs;
3873 bb_ent.last_idx = ops.length ();
3875 bbinfo.safe_push (bb_ent);
3876 if (bb == first_bb)
3877 break;
3879 if (ops.length () > 1)
3880 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3881 if (any_changes)
3883 unsigned int idx, max_idx = 0;
3884 /* update_ops relies on has_single_use predicates returning the
3885 same values as it did during get_ops earlier. Additionally it
3886 never removes statements, only adds new ones and it should walk
3887 from the single imm use and check the predicate already before
3888 making those changes.
3889 On the other side, the handling of GIMPLE_COND directly can turn
3890 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3891 it needs to be done in a separate loop afterwards. */
3892 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3894 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3895 && bbinfo[idx].op != NULL_TREE)
3897 tree new_op;
3899 max_idx = idx;
3900 stmt = last_stmt (bb);
3901 new_op = update_ops (bbinfo[idx].op,
3902 (enum tree_code)
3903 ops[bbinfo[idx].first_idx]->rank,
3904 ops, &bbinfo[idx].first_idx,
3905 loop_containing_stmt (stmt));
3906 if (new_op == NULL_TREE)
3908 gcc_assert (bb == last_bb);
3909 new_op = ops[bbinfo[idx].first_idx++]->op;
3911 if (bbinfo[idx].op != new_op)
3913 imm_use_iterator iter;
3914 use_operand_p use_p;
3915 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
3917 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3918 if (is_gimple_debug (use_stmt))
3919 continue;
3920 else if (gimple_code (use_stmt) == GIMPLE_COND
3921 || gimple_code (use_stmt) == GIMPLE_PHI)
3922 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3923 SET_USE (use_p, new_op);
3924 else if ((is_gimple_assign (use_stmt)
3925 && (TREE_CODE_CLASS
3926 (gimple_assign_rhs_code (use_stmt))
3927 == tcc_comparison)))
3928 cast_or_tcc_cmp_stmt = use_stmt;
3929 else if (gimple_assign_cast_p (use_stmt))
3930 cast_or_tcc_cmp_stmt = use_stmt;
3931 else
3932 gcc_unreachable ();
3934 if (cast_or_tcc_cmp_stmt)
3936 gcc_assert (bb == last_bb);
3937 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
3938 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3939 enum tree_code rhs_code
3940 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
3941 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
3942 : CONVERT_EXPR;
3943 gassign *g;
3944 if (is_gimple_min_invariant (new_op))
3946 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3947 g = gimple_build_assign (new_lhs, new_op);
3949 else
3950 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3951 gimple_stmt_iterator gsi
3952 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
3953 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
3954 gimple_set_visited (g, true);
3955 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3956 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3957 if (is_gimple_debug (use_stmt))
3958 continue;
3959 else if (gimple_code (use_stmt) == GIMPLE_COND
3960 || gimple_code (use_stmt) == GIMPLE_PHI)
3961 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3962 SET_USE (use_p, new_lhs);
3963 else
3964 gcc_unreachable ();
3968 if (bb == first_bb)
3969 break;
3971 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3973 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3974 && bbinfo[idx].op == NULL_TREE
3975 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3977 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3979 if (idx > max_idx)
3980 max_idx = idx;
3982 /* If we collapse the conditional to a true/false
3983 condition, then bubble that knowledge up to our caller. */
3984 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3986 gimple_cond_make_false (cond_stmt);
3987 cfg_cleanup_needed = true;
3989 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3991 gimple_cond_make_true (cond_stmt);
3992 cfg_cleanup_needed = true;
3994 else
3996 gimple_cond_set_code (cond_stmt, NE_EXPR);
3997 gimple_cond_set_lhs (cond_stmt,
3998 ops[bbinfo[idx].first_idx]->op);
3999 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4001 update_stmt (cond_stmt);
4003 if (bb == first_bb)
4004 break;
4007 /* The above changes could result in basic blocks after the first
4008 modified one, up to and including last_bb, to be executed even if
4009 they would not be in the original program. If the value ranges of
4010 assignment lhs' in those bbs were dependent on the conditions
4011 guarding those basic blocks which now can change, the VRs might
4012 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4013 are only used within the same bb, it should be not a big deal if
4014 we just reset all the VRs in those bbs. See PR68671. */
4015 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4016 reset_flow_sensitive_info_in_bb (bb);
4018 return cfg_cleanup_needed;
4021 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4022 of STMT in it's operands. This is also known as a "destructive
4023 update" operation. */
4025 static bool
4026 is_phi_for_stmt (gimple *stmt, tree operand)
4028 gimple *def_stmt;
4029 gphi *def_phi;
4030 tree lhs;
4031 use_operand_p arg_p;
4032 ssa_op_iter i;
4034 if (TREE_CODE (operand) != SSA_NAME)
4035 return false;
4037 lhs = gimple_assign_lhs (stmt);
4039 def_stmt = SSA_NAME_DEF_STMT (operand);
4040 def_phi = dyn_cast <gphi *> (def_stmt);
4041 if (!def_phi)
4042 return false;
4044 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4045 if (lhs == USE_FROM_PTR (arg_p))
4046 return true;
4047 return false;
4050 /* Remove def stmt of VAR if VAR has zero uses and recurse
4051 on rhs1 operand if so. */
4053 static void
4054 remove_visited_stmt_chain (tree var)
4056 gimple *stmt;
4057 gimple_stmt_iterator gsi;
4059 while (1)
4061 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4062 return;
4063 stmt = SSA_NAME_DEF_STMT (var);
4064 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4066 var = gimple_assign_rhs1 (stmt);
4067 gsi = gsi_for_stmt (stmt);
4068 reassoc_remove_stmt (&gsi);
4069 release_defs (stmt);
4071 else
4072 return;
4076 /* This function checks three consequtive operands in
4077 passed operands vector OPS starting from OPINDEX and
4078 swaps two operands if it is profitable for binary operation
4079 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4081 We pair ops with the same rank if possible.
4083 The alternative we try is to see if STMT is a destructive
4084 update style statement, which is like:
4085 b = phi (a, ...)
4086 a = c + b;
4087 In that case, we want to use the destructive update form to
4088 expose the possible vectorizer sum reduction opportunity.
4089 In that case, the third operand will be the phi node. This
4090 check is not performed if STMT is null.
4092 We could, of course, try to be better as noted above, and do a
4093 lot of work to try to find these opportunities in >3 operand
4094 cases, but it is unlikely to be worth it. */
4096 static void
4097 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4098 unsigned int opindex, gimple *stmt)
4100 operand_entry *oe1, *oe2, *oe3;
4102 oe1 = ops[opindex];
4103 oe2 = ops[opindex + 1];
4104 oe3 = ops[opindex + 2];
4106 if ((oe1->rank == oe2->rank
4107 && oe2->rank != oe3->rank)
4108 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4109 && !is_phi_for_stmt (stmt, oe1->op)
4110 && !is_phi_for_stmt (stmt, oe2->op)))
4111 std::swap (*oe1, *oe3);
4112 else if ((oe1->rank == oe3->rank
4113 && oe2->rank != oe3->rank)
4114 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4115 && !is_phi_for_stmt (stmt, oe1->op)
4116 && !is_phi_for_stmt (stmt, oe3->op)))
4117 std::swap (*oe1, *oe2);
4120 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4121 two definitions, otherwise return STMT. */
4123 static inline gimple *
4124 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4126 if (TREE_CODE (rhs1) == SSA_NAME
4127 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4128 stmt = SSA_NAME_DEF_STMT (rhs1);
4129 if (TREE_CODE (rhs2) == SSA_NAME
4130 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4131 stmt = SSA_NAME_DEF_STMT (rhs2);
4132 return stmt;
4135 /* If the stmt that defines operand has to be inserted, insert it
4136 before the use. */
4137 static void
4138 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4140 gcc_assert (is_gimple_assign (stmt_to_insert));
4141 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4142 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4143 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4144 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4145 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4147 /* If the insert point is not stmt, then insert_point would be
4148 the point where operand rhs1 or rhs2 is defined. In this case,
4149 stmt_to_insert has to be inserted afterwards. This would
4150 only happen when the stmt insertion point is flexible. */
4151 if (stmt == insert_point)
4152 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4153 else
4154 insert_stmt_after (stmt_to_insert, insert_point);
4158 /* Recursively rewrite our linearized statements so that the operators
4159 match those in OPS[OPINDEX], putting the computation in rank
4160 order. Return new lhs. */
4162 static tree
4163 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4164 vec<operand_entry *> ops, bool changed)
4166 tree rhs1 = gimple_assign_rhs1 (stmt);
4167 tree rhs2 = gimple_assign_rhs2 (stmt);
4168 tree lhs = gimple_assign_lhs (stmt);
4169 operand_entry *oe;
4171 /* The final recursion case for this function is that you have
4172 exactly two operations left.
4173 If we had exactly one op in the entire list to start with, we
4174 would have never called this function, and the tail recursion
4175 rewrites them one at a time. */
4176 if (opindex + 2 == ops.length ())
4178 operand_entry *oe1, *oe2;
4180 oe1 = ops[opindex];
4181 oe2 = ops[opindex + 1];
4183 if (rhs1 != oe1->op || rhs2 != oe2->op)
4185 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4186 unsigned int uid = gimple_uid (stmt);
4188 if (dump_file && (dump_flags & TDF_DETAILS))
4190 fprintf (dump_file, "Transforming ");
4191 print_gimple_stmt (dump_file, stmt, 0, 0);
4194 /* If the stmt that defines operand has to be inserted, insert it
4195 before the use. */
4196 if (oe1->stmt_to_insert)
4197 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4198 if (oe2->stmt_to_insert)
4199 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4200 /* Even when changed is false, reassociation could have e.g. removed
4201 some redundant operations, so unless we are just swapping the
4202 arguments or unless there is no change at all (then we just
4203 return lhs), force creation of a new SSA_NAME. */
4204 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4206 gimple *insert_point
4207 = find_insert_point (stmt, oe1->op, oe2->op);
4208 lhs = make_ssa_name (TREE_TYPE (lhs));
4209 stmt
4210 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4211 oe1->op, oe2->op);
4212 gimple_set_uid (stmt, uid);
4213 gimple_set_visited (stmt, true);
4214 if (insert_point == gsi_stmt (gsi))
4215 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4216 else
4217 insert_stmt_after (stmt, insert_point);
4219 else
4221 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4222 == stmt);
4223 gimple_assign_set_rhs1 (stmt, oe1->op);
4224 gimple_assign_set_rhs2 (stmt, oe2->op);
4225 update_stmt (stmt);
4228 if (rhs1 != oe1->op && rhs1 != oe2->op)
4229 remove_visited_stmt_chain (rhs1);
4231 if (dump_file && (dump_flags & TDF_DETAILS))
4233 fprintf (dump_file, " into ");
4234 print_gimple_stmt (dump_file, stmt, 0, 0);
4237 return lhs;
4240 /* If we hit here, we should have 3 or more ops left. */
4241 gcc_assert (opindex + 2 < ops.length ());
4243 /* Rewrite the next operator. */
4244 oe = ops[opindex];
4246 /* If the stmt that defines operand has to be inserted, insert it
4247 before the use. */
4248 if (oe->stmt_to_insert)
4249 insert_stmt_before_use (stmt, oe->stmt_to_insert);
4251 /* Recurse on the LHS of the binary operator, which is guaranteed to
4252 be the non-leaf side. */
4253 tree new_rhs1
4254 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4255 changed || oe->op != rhs2);
4257 if (oe->op != rhs2 || new_rhs1 != rhs1)
4259 if (dump_file && (dump_flags & TDF_DETAILS))
4261 fprintf (dump_file, "Transforming ");
4262 print_gimple_stmt (dump_file, stmt, 0, 0);
4265 /* If changed is false, this is either opindex == 0
4266 or all outer rhs2's were equal to corresponding oe->op,
4267 and powi_result is NULL.
4268 That means lhs is equivalent before and after reassociation.
4269 Otherwise ensure the old lhs SSA_NAME is not reused and
4270 create a new stmt as well, so that any debug stmts will be
4271 properly adjusted. */
4272 if (changed)
4274 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4275 unsigned int uid = gimple_uid (stmt);
4276 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4278 lhs = make_ssa_name (TREE_TYPE (lhs));
4279 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4280 new_rhs1, oe->op);
4281 gimple_set_uid (stmt, uid);
4282 gimple_set_visited (stmt, true);
4283 if (insert_point == gsi_stmt (gsi))
4284 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4285 else
4286 insert_stmt_after (stmt, insert_point);
4288 else
4290 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4291 == stmt);
4292 gimple_assign_set_rhs1 (stmt, new_rhs1);
4293 gimple_assign_set_rhs2 (stmt, oe->op);
4294 update_stmt (stmt);
4297 if (dump_file && (dump_flags & TDF_DETAILS))
4299 fprintf (dump_file, " into ");
4300 print_gimple_stmt (dump_file, stmt, 0, 0);
4303 return lhs;
4306 /* Find out how many cycles we need to compute statements chain.
4307 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4308 maximum number of independent statements we may execute per cycle. */
4310 static int
4311 get_required_cycles (int ops_num, int cpu_width)
4313 int res;
4314 int elog;
4315 unsigned int rest;
4317 /* While we have more than 2 * cpu_width operands
4318 we may reduce number of operands by cpu_width
4319 per cycle. */
4320 res = ops_num / (2 * cpu_width);
4322 /* Remained operands count may be reduced twice per cycle
4323 until we have only one operand. */
4324 rest = (unsigned)(ops_num - res * cpu_width);
4325 elog = exact_log2 (rest);
4326 if (elog >= 0)
4327 res += elog;
4328 else
4329 res += floor_log2 (rest) + 1;
4331 return res;
4334 /* Returns an optimal number of registers to use for computation of
4335 given statements. */
4337 static int
4338 get_reassociation_width (int ops_num, enum tree_code opc,
4339 machine_mode mode)
4341 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4342 int width;
4343 int width_min;
4344 int cycles_best;
4346 if (param_width > 0)
4347 width = param_width;
4348 else
4349 width = targetm.sched.reassociation_width (opc, mode);
4351 if (width == 1)
4352 return width;
4354 /* Get the minimal time required for sequence computation. */
4355 cycles_best = get_required_cycles (ops_num, width);
4357 /* Check if we may use less width and still compute sequence for
4358 the same time. It will allow us to reduce registers usage.
4359 get_required_cycles is monotonically increasing with lower width
4360 so we can perform a binary search for the minimal width that still
4361 results in the optimal cycle count. */
4362 width_min = 1;
4363 while (width > width_min)
4365 int width_mid = (width + width_min) / 2;
4367 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4368 width = width_mid;
4369 else if (width_min < width_mid)
4370 width_min = width_mid;
4371 else
4372 break;
4375 return width;
4378 /* Recursively rewrite our linearized statements so that the operators
4379 match those in OPS[OPINDEX], putting the computation in rank
4380 order and trying to allow operations to be executed in
4381 parallel. */
4383 static void
4384 rewrite_expr_tree_parallel (gassign *stmt, int width,
4385 vec<operand_entry *> ops)
4387 enum tree_code opcode = gimple_assign_rhs_code (stmt);
4388 int op_num = ops.length ();
4389 int stmt_num = op_num - 1;
4390 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4391 int op_index = op_num - 1;
4392 int stmt_index = 0;
4393 int ready_stmts_end = 0;
4394 int i = 0;
4395 gimple *stmt1 = NULL, *stmt2 = NULL;
4396 tree last_rhs1 = gimple_assign_rhs1 (stmt);
4398 /* We start expression rewriting from the top statements.
4399 So, in this loop we create a full list of statements
4400 we will work with. */
4401 stmts[stmt_num - 1] = stmt;
4402 for (i = stmt_num - 2; i >= 0; i--)
4403 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4405 for (i = 0; i < stmt_num; i++)
4407 tree op1, op2;
4409 /* Determine whether we should use results of
4410 already handled statements or not. */
4411 if (ready_stmts_end == 0
4412 && (i - stmt_index >= width || op_index < 1))
4413 ready_stmts_end = i;
4415 /* Now we choose operands for the next statement. Non zero
4416 value in ready_stmts_end means here that we should use
4417 the result of already generated statements as new operand. */
4418 if (ready_stmts_end > 0)
4420 op1 = gimple_assign_lhs (stmts[stmt_index++]);
4421 if (ready_stmts_end > stmt_index)
4422 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4423 else if (op_index >= 0)
4425 operand_entry *oe = ops[op_index--];
4426 stmt2 = oe->stmt_to_insert;
4427 op2 = oe->op;
4429 else
4431 gcc_assert (stmt_index < i);
4432 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4435 if (stmt_index >= ready_stmts_end)
4436 ready_stmts_end = 0;
4438 else
4440 if (op_index > 1)
4441 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4442 operand_entry *oe2 = ops[op_index--];
4443 operand_entry *oe1 = ops[op_index--];
4444 op2 = oe2->op;
4445 stmt2 = oe2->stmt_to_insert;
4446 op1 = oe1->op;
4447 stmt1 = oe1->stmt_to_insert;
4450 /* If we emit the last statement then we should put
4451 operands into the last statement. It will also
4452 break the loop. */
4453 if (op_index < 0 && stmt_index == i)
4454 i = stmt_num - 1;
4456 if (dump_file && (dump_flags & TDF_DETAILS))
4458 fprintf (dump_file, "Transforming ");
4459 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4462 /* If the stmt that defines operand has to be inserted, insert it
4463 before the use. */
4464 if (stmt1)
4465 insert_stmt_before_use (stmts[i], stmt1);
4466 if (stmt2)
4467 insert_stmt_before_use (stmts[i], stmt2);
4468 stmt1 = stmt2 = NULL;
4470 /* We keep original statement only for the last one. All
4471 others are recreated. */
4472 if (i == stmt_num - 1)
4474 gimple_assign_set_rhs1 (stmts[i], op1);
4475 gimple_assign_set_rhs2 (stmts[i], op2);
4476 update_stmt (stmts[i]);
4478 else
4480 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4482 if (dump_file && (dump_flags & TDF_DETAILS))
4484 fprintf (dump_file, " into ");
4485 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4489 remove_visited_stmt_chain (last_rhs1);
4492 /* Transform STMT, which is really (A +B) + (C + D) into the left
4493 linear form, ((A+B)+C)+D.
4494 Recurse on D if necessary. */
4496 static void
4497 linearize_expr (gimple *stmt)
4499 gimple_stmt_iterator gsi;
4500 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4501 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4502 gimple *oldbinrhs = binrhs;
4503 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4504 gimple *newbinrhs = NULL;
4505 struct loop *loop = loop_containing_stmt (stmt);
4506 tree lhs = gimple_assign_lhs (stmt);
4508 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4509 && is_reassociable_op (binrhs, rhscode, loop));
4511 gsi = gsi_for_stmt (stmt);
4513 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4514 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4515 gimple_assign_rhs_code (binrhs),
4516 gimple_assign_lhs (binlhs),
4517 gimple_assign_rhs2 (binrhs));
4518 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4519 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4520 gimple_set_uid (binrhs, gimple_uid (stmt));
4522 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4523 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4525 if (dump_file && (dump_flags & TDF_DETAILS))
4527 fprintf (dump_file, "Linearized: ");
4528 print_gimple_stmt (dump_file, stmt, 0, 0);
4531 reassociate_stats.linearized++;
4532 update_stmt (stmt);
4534 gsi = gsi_for_stmt (oldbinrhs);
4535 reassoc_remove_stmt (&gsi);
4536 release_defs (oldbinrhs);
4538 gimple_set_visited (stmt, true);
4539 gimple_set_visited (binlhs, true);
4540 gimple_set_visited (binrhs, true);
4542 /* Tail recurse on the new rhs if it still needs reassociation. */
4543 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4544 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4545 want to change the algorithm while converting to tuples. */
4546 linearize_expr (stmt);
4549 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4550 it. Otherwise, return NULL. */
4552 static gimple *
4553 get_single_immediate_use (tree lhs)
4555 use_operand_p immuse;
4556 gimple *immusestmt;
4558 if (TREE_CODE (lhs) == SSA_NAME
4559 && single_imm_use (lhs, &immuse, &immusestmt)
4560 && is_gimple_assign (immusestmt))
4561 return immusestmt;
4563 return NULL;
4566 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4567 representing the negated value. Insertions of any necessary
4568 instructions go before GSI.
4569 This function is recursive in that, if you hand it "a_5" as the
4570 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4571 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4573 static tree
4574 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4576 gimple *negatedefstmt = NULL;
4577 tree resultofnegate;
4578 gimple_stmt_iterator gsi;
4579 unsigned int uid;
4581 /* If we are trying to negate a name, defined by an add, negate the
4582 add operands instead. */
4583 if (TREE_CODE (tonegate) == SSA_NAME)
4584 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4585 if (TREE_CODE (tonegate) == SSA_NAME
4586 && is_gimple_assign (negatedefstmt)
4587 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4588 && has_single_use (gimple_assign_lhs (negatedefstmt))
4589 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4591 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4592 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4593 tree lhs = gimple_assign_lhs (negatedefstmt);
4594 gimple *g;
4596 gsi = gsi_for_stmt (negatedefstmt);
4597 rhs1 = negate_value (rhs1, &gsi);
4599 gsi = gsi_for_stmt (negatedefstmt);
4600 rhs2 = negate_value (rhs2, &gsi);
4602 gsi = gsi_for_stmt (negatedefstmt);
4603 lhs = make_ssa_name (TREE_TYPE (lhs));
4604 gimple_set_visited (negatedefstmt, true);
4605 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4606 gimple_set_uid (g, gimple_uid (negatedefstmt));
4607 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4608 return lhs;
4611 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4612 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4613 NULL_TREE, true, GSI_SAME_STMT);
4614 gsi = *gsip;
4615 uid = gimple_uid (gsi_stmt (gsi));
4616 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4618 gimple *stmt = gsi_stmt (gsi);
4619 if (gimple_uid (stmt) != 0)
4620 break;
4621 gimple_set_uid (stmt, uid);
4623 return resultofnegate;
4626 /* Return true if we should break up the subtract in STMT into an add
4627 with negate. This is true when we the subtract operands are really
4628 adds, or the subtract itself is used in an add expression. In
4629 either case, breaking up the subtract into an add with negate
4630 exposes the adds to reassociation. */
4632 static bool
4633 should_break_up_subtract (gimple *stmt)
4635 tree lhs = gimple_assign_lhs (stmt);
4636 tree binlhs = gimple_assign_rhs1 (stmt);
4637 tree binrhs = gimple_assign_rhs2 (stmt);
4638 gimple *immusestmt;
4639 struct loop *loop = loop_containing_stmt (stmt);
4641 if (TREE_CODE (binlhs) == SSA_NAME
4642 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4643 return true;
4645 if (TREE_CODE (binrhs) == SSA_NAME
4646 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4647 return true;
4649 if (TREE_CODE (lhs) == SSA_NAME
4650 && (immusestmt = get_single_immediate_use (lhs))
4651 && is_gimple_assign (immusestmt)
4652 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4653 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4654 return true;
4655 return false;
4658 /* Transform STMT from A - B into A + -B. */
4660 static void
4661 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4663 tree rhs1 = gimple_assign_rhs1 (stmt);
4664 tree rhs2 = gimple_assign_rhs2 (stmt);
4666 if (dump_file && (dump_flags & TDF_DETAILS))
4668 fprintf (dump_file, "Breaking up subtract ");
4669 print_gimple_stmt (dump_file, stmt, 0, 0);
4672 rhs2 = negate_value (rhs2, gsip);
4673 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4674 update_stmt (stmt);
4677 /* Determine whether STMT is a builtin call that raises an SSA name
4678 to an integer power and has only one use. If so, and this is early
4679 reassociation and unsafe math optimizations are permitted, place
4680 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4681 If any of these conditions does not hold, return FALSE. */
4683 static bool
4684 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4686 tree arg1;
4687 REAL_VALUE_TYPE c, cint;
4689 switch (gimple_call_combined_fn (stmt))
4691 CASE_CFN_POW:
4692 if (flag_errno_math)
4693 return false;
4695 *base = gimple_call_arg (stmt, 0);
4696 arg1 = gimple_call_arg (stmt, 1);
4698 if (TREE_CODE (arg1) != REAL_CST)
4699 return false;
4701 c = TREE_REAL_CST (arg1);
4703 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4704 return false;
4706 *exponent = real_to_integer (&c);
4707 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4708 if (!real_identical (&c, &cint))
4709 return false;
4711 break;
4713 CASE_CFN_POWI:
4714 *base = gimple_call_arg (stmt, 0);
4715 arg1 = gimple_call_arg (stmt, 1);
4717 if (!tree_fits_shwi_p (arg1))
4718 return false;
4720 *exponent = tree_to_shwi (arg1);
4721 break;
4723 default:
4724 return false;
4727 /* Expanding negative exponents is generally unproductive, so we don't
4728 complicate matters with those. Exponents of zero and one should
4729 have been handled by expression folding. */
4730 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4731 return false;
4733 return true;
4736 /* Try to derive and add operand entry for OP to *OPS. Return false if
4737 unsuccessful. */
4739 static bool
4740 try_special_add_to_ops (vec<operand_entry *> *ops,
4741 enum tree_code code,
4742 tree op, gimple* def_stmt)
4744 tree base = NULL_TREE;
4745 HOST_WIDE_INT exponent = 0;
4747 if (TREE_CODE (op) != SSA_NAME
4748 || ! has_single_use (op))
4749 return false;
4751 if (code == MULT_EXPR
4752 && reassoc_insert_powi_p
4753 && flag_unsafe_math_optimizations
4754 && is_gimple_call (def_stmt)
4755 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
4757 add_repeat_to_ops_vec (ops, base, exponent);
4758 gimple_set_visited (def_stmt, true);
4759 return true;
4761 else if (code == MULT_EXPR
4762 && is_gimple_assign (def_stmt)
4763 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
4764 && !HONOR_SNANS (TREE_TYPE (op))
4765 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
4766 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
4768 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4769 tree cst = build_minus_one_cst (TREE_TYPE (op));
4770 add_to_ops_vec (ops, rhs1);
4771 add_to_ops_vec (ops, cst);
4772 gimple_set_visited (def_stmt, true);
4773 return true;
4776 return false;
4779 /* Recursively linearize a binary expression that is the RHS of STMT.
4780 Place the operands of the expression tree in the vector named OPS. */
4782 static void
4783 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4784 bool is_associative, bool set_visited)
4786 tree binlhs = gimple_assign_rhs1 (stmt);
4787 tree binrhs = gimple_assign_rhs2 (stmt);
4788 gimple *binlhsdef = NULL, *binrhsdef = NULL;
4789 bool binlhsisreassoc = false;
4790 bool binrhsisreassoc = false;
4791 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4792 struct loop *loop = loop_containing_stmt (stmt);
4794 if (set_visited)
4795 gimple_set_visited (stmt, true);
4797 if (TREE_CODE (binlhs) == SSA_NAME)
4799 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4800 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4801 && !stmt_could_throw_p (binlhsdef));
4804 if (TREE_CODE (binrhs) == SSA_NAME)
4806 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4807 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4808 && !stmt_could_throw_p (binrhsdef));
4811 /* If the LHS is not reassociable, but the RHS is, we need to swap
4812 them. If neither is reassociable, there is nothing we can do, so
4813 just put them in the ops vector. If the LHS is reassociable,
4814 linearize it. If both are reassociable, then linearize the RHS
4815 and the LHS. */
4817 if (!binlhsisreassoc)
4819 /* If this is not a associative operation like division, give up. */
4820 if (!is_associative)
4822 add_to_ops_vec (ops, binrhs);
4823 return;
4826 if (!binrhsisreassoc)
4828 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4829 add_to_ops_vec (ops, binrhs);
4831 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
4832 add_to_ops_vec (ops, binlhs);
4834 return;
4837 if (dump_file && (dump_flags & TDF_DETAILS))
4839 fprintf (dump_file, "swapping operands of ");
4840 print_gimple_stmt (dump_file, stmt, 0, 0);
4843 swap_ssa_operands (stmt,
4844 gimple_assign_rhs1_ptr (stmt),
4845 gimple_assign_rhs2_ptr (stmt));
4846 update_stmt (stmt);
4848 if (dump_file && (dump_flags & TDF_DETAILS))
4850 fprintf (dump_file, " is now ");
4851 print_gimple_stmt (dump_file, stmt, 0, 0);
4854 /* We want to make it so the lhs is always the reassociative op,
4855 so swap. */
4856 std::swap (binlhs, binrhs);
4858 else if (binrhsisreassoc)
4860 linearize_expr (stmt);
4861 binlhs = gimple_assign_rhs1 (stmt);
4862 binrhs = gimple_assign_rhs2 (stmt);
4865 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4866 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4867 rhscode, loop));
4868 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4869 is_associative, set_visited);
4871 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4872 add_to_ops_vec (ops, binrhs);
4875 /* Repropagate the negates back into subtracts, since no other pass
4876 currently does it. */
4878 static void
4879 repropagate_negates (void)
4881 unsigned int i = 0;
4882 tree negate;
4884 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4886 gimple *user = get_single_immediate_use (negate);
4888 if (!user || !is_gimple_assign (user))
4889 continue;
4891 /* The negate operand can be either operand of a PLUS_EXPR
4892 (it can be the LHS if the RHS is a constant for example).
4894 Force the negate operand to the RHS of the PLUS_EXPR, then
4895 transform the PLUS_EXPR into a MINUS_EXPR. */
4896 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4898 /* If the negated operand appears on the LHS of the
4899 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4900 to force the negated operand to the RHS of the PLUS_EXPR. */
4901 if (gimple_assign_rhs1 (user) == negate)
4903 swap_ssa_operands (user,
4904 gimple_assign_rhs1_ptr (user),
4905 gimple_assign_rhs2_ptr (user));
4908 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4909 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4910 if (gimple_assign_rhs2 (user) == negate)
4912 tree rhs1 = gimple_assign_rhs1 (user);
4913 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
4914 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4915 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4916 update_stmt (user);
4919 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4921 if (gimple_assign_rhs1 (user) == negate)
4923 /* We have
4924 x = -a
4925 y = x - b
4926 which we transform into
4927 x = a + b
4928 y = -x .
4929 This pushes down the negate which we possibly can merge
4930 into some other operation, hence insert it into the
4931 plus_negates vector. */
4932 gimple *feed = SSA_NAME_DEF_STMT (negate);
4933 tree a = gimple_assign_rhs1 (feed);
4934 tree b = gimple_assign_rhs2 (user);
4935 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4936 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4937 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4938 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4939 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4940 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4941 user = gsi_stmt (gsi2);
4942 update_stmt (user);
4943 reassoc_remove_stmt (&gsi);
4944 release_defs (feed);
4945 plus_negates.safe_push (gimple_assign_lhs (user));
4947 else
4949 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4950 rid of one operation. */
4951 gimple *feed = SSA_NAME_DEF_STMT (negate);
4952 tree a = gimple_assign_rhs1 (feed);
4953 tree rhs1 = gimple_assign_rhs1 (user);
4954 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4955 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4956 update_stmt (gsi_stmt (gsi));
4962 /* Returns true if OP is of a type for which we can do reassociation.
4963 That is for integral or non-saturating fixed-point types, and for
4964 floating point type when associative-math is enabled. */
4966 static bool
4967 can_reassociate_p (tree op)
4969 tree type = TREE_TYPE (op);
4970 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4971 || NON_SAT_FIXED_POINT_TYPE_P (type)
4972 || (flag_associative_math && FLOAT_TYPE_P (type)))
4973 return true;
4974 return false;
4977 /* Break up subtract operations in block BB.
4979 We do this top down because we don't know whether the subtract is
4980 part of a possible chain of reassociation except at the top.
4982 IE given
4983 d = f + g
4984 c = a + e
4985 b = c - d
4986 q = b - r
4987 k = t - q
4989 we want to break up k = t - q, but we won't until we've transformed q
4990 = b - r, which won't be broken up until we transform b = c - d.
4992 En passant, clear the GIMPLE visited flag on every statement
4993 and set UIDs within each basic block. */
4995 static void
4996 break_up_subtract_bb (basic_block bb)
4998 gimple_stmt_iterator gsi;
4999 basic_block son;
5000 unsigned int uid = 1;
5002 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5004 gimple *stmt = gsi_stmt (gsi);
5005 gimple_set_visited (stmt, false);
5006 gimple_set_uid (stmt, uid++);
5008 if (!is_gimple_assign (stmt)
5009 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5010 continue;
5012 /* Look for simple gimple subtract operations. */
5013 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5015 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5016 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5017 continue;
5019 /* Check for a subtract used only in an addition. If this
5020 is the case, transform it into add of a negate for better
5021 reassociation. IE transform C = A-B into C = A + -B if C
5022 is only used in an addition. */
5023 if (should_break_up_subtract (stmt))
5024 break_up_subtract (stmt, &gsi);
5026 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5027 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5028 plus_negates.safe_push (gimple_assign_lhs (stmt));
5030 for (son = first_dom_son (CDI_DOMINATORS, bb);
5031 son;
5032 son = next_dom_son (CDI_DOMINATORS, son))
5033 break_up_subtract_bb (son);
5036 /* Used for repeated factor analysis. */
5037 struct repeat_factor
5039 /* An SSA name that occurs in a multiply chain. */
5040 tree factor;
5042 /* Cached rank of the factor. */
5043 unsigned rank;
5045 /* Number of occurrences of the factor in the chain. */
5046 HOST_WIDE_INT count;
5048 /* An SSA name representing the product of this factor and
5049 all factors appearing later in the repeated factor vector. */
5050 tree repr;
5054 static vec<repeat_factor> repeat_factor_vec;
5056 /* Used for sorting the repeat factor vector. Sort primarily by
5057 ascending occurrence count, secondarily by descending rank. */
5059 static int
5060 compare_repeat_factors (const void *x1, const void *x2)
5062 const repeat_factor *rf1 = (const repeat_factor *) x1;
5063 const repeat_factor *rf2 = (const repeat_factor *) x2;
5065 if (rf1->count != rf2->count)
5066 return rf1->count - rf2->count;
5068 return rf2->rank - rf1->rank;
5071 /* Look for repeated operands in OPS in the multiply tree rooted at
5072 STMT. Replace them with an optimal sequence of multiplies and powi
5073 builtin calls, and remove the used operands from OPS. Return an
5074 SSA name representing the value of the replacement sequence. */
5076 static tree
5077 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5079 unsigned i, j, vec_len;
5080 int ii;
5081 operand_entry *oe;
5082 repeat_factor *rf1, *rf2;
5083 repeat_factor rfnew;
5084 tree result = NULL_TREE;
5085 tree target_ssa, iter_result;
5086 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5087 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5088 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5089 gimple *mul_stmt, *pow_stmt;
5091 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5092 target. */
5093 if (!powi_fndecl)
5094 return NULL_TREE;
5096 /* Allocate the repeated factor vector. */
5097 repeat_factor_vec.create (10);
5099 /* Scan the OPS vector for all SSA names in the product and build
5100 up a vector of occurrence counts for each factor. */
5101 FOR_EACH_VEC_ELT (*ops, i, oe)
5103 if (TREE_CODE (oe->op) == SSA_NAME)
5105 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5107 if (rf1->factor == oe->op)
5109 rf1->count += oe->count;
5110 break;
5114 if (j >= repeat_factor_vec.length ())
5116 rfnew.factor = oe->op;
5117 rfnew.rank = oe->rank;
5118 rfnew.count = oe->count;
5119 rfnew.repr = NULL_TREE;
5120 repeat_factor_vec.safe_push (rfnew);
5125 /* Sort the repeated factor vector by (a) increasing occurrence count,
5126 and (b) decreasing rank. */
5127 repeat_factor_vec.qsort (compare_repeat_factors);
5129 /* It is generally best to combine as many base factors as possible
5130 into a product before applying __builtin_powi to the result.
5131 However, the sort order chosen for the repeated factor vector
5132 allows us to cache partial results for the product of the base
5133 factors for subsequent use. When we already have a cached partial
5134 result from a previous iteration, it is best to make use of it
5135 before looking for another __builtin_pow opportunity.
5137 As an example, consider x * x * y * y * y * z * z * z * z.
5138 We want to first compose the product x * y * z, raise it to the
5139 second power, then multiply this by y * z, and finally multiply
5140 by z. This can be done in 5 multiplies provided we cache y * z
5141 for use in both expressions:
5143 t1 = y * z
5144 t2 = t1 * x
5145 t3 = t2 * t2
5146 t4 = t1 * t3
5147 result = t4 * z
5149 If we instead ignored the cached y * z and first multiplied by
5150 the __builtin_pow opportunity z * z, we would get the inferior:
5152 t1 = y * z
5153 t2 = t1 * x
5154 t3 = t2 * t2
5155 t4 = z * z
5156 t5 = t3 * t4
5157 result = t5 * y */
5159 vec_len = repeat_factor_vec.length ();
5161 /* Repeatedly look for opportunities to create a builtin_powi call. */
5162 while (true)
5164 HOST_WIDE_INT power;
5166 /* First look for the largest cached product of factors from
5167 preceding iterations. If found, create a builtin_powi for
5168 it if the minimum occurrence count for its factors is at
5169 least 2, or just use this cached product as our next
5170 multiplicand if the minimum occurrence count is 1. */
5171 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5173 if (rf1->repr && rf1->count > 0)
5174 break;
5177 if (j < vec_len)
5179 power = rf1->count;
5181 if (power == 1)
5183 iter_result = rf1->repr;
5185 if (dump_file && (dump_flags & TDF_DETAILS))
5187 unsigned elt;
5188 repeat_factor *rf;
5189 fputs ("Multiplying by cached product ", dump_file);
5190 for (elt = j; elt < vec_len; elt++)
5192 rf = &repeat_factor_vec[elt];
5193 print_generic_expr (dump_file, rf->factor, 0);
5194 if (elt < vec_len - 1)
5195 fputs (" * ", dump_file);
5197 fputs ("\n", dump_file);
5200 else
5202 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5203 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5204 build_int_cst (integer_type_node,
5205 power));
5206 gimple_call_set_lhs (pow_stmt, iter_result);
5207 gimple_set_location (pow_stmt, gimple_location (stmt));
5208 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5209 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5211 if (dump_file && (dump_flags & TDF_DETAILS))
5213 unsigned elt;
5214 repeat_factor *rf;
5215 fputs ("Building __builtin_pow call for cached product (",
5216 dump_file);
5217 for (elt = j; elt < vec_len; elt++)
5219 rf = &repeat_factor_vec[elt];
5220 print_generic_expr (dump_file, rf->factor, 0);
5221 if (elt < vec_len - 1)
5222 fputs (" * ", dump_file);
5224 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5225 power);
5229 else
5231 /* Otherwise, find the first factor in the repeated factor
5232 vector whose occurrence count is at least 2. If no such
5233 factor exists, there are no builtin_powi opportunities
5234 remaining. */
5235 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5237 if (rf1->count >= 2)
5238 break;
5241 if (j >= vec_len)
5242 break;
5244 power = rf1->count;
5246 if (dump_file && (dump_flags & TDF_DETAILS))
5248 unsigned elt;
5249 repeat_factor *rf;
5250 fputs ("Building __builtin_pow call for (", dump_file);
5251 for (elt = j; elt < vec_len; elt++)
5253 rf = &repeat_factor_vec[elt];
5254 print_generic_expr (dump_file, rf->factor, 0);
5255 if (elt < vec_len - 1)
5256 fputs (" * ", dump_file);
5258 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5261 reassociate_stats.pows_created++;
5263 /* Visit each element of the vector in reverse order (so that
5264 high-occurrence elements are visited first, and within the
5265 same occurrence count, lower-ranked elements are visited
5266 first). Form a linear product of all elements in this order
5267 whose occurrencce count is at least that of element J.
5268 Record the SSA name representing the product of each element
5269 with all subsequent elements in the vector. */
5270 if (j == vec_len - 1)
5271 rf1->repr = rf1->factor;
5272 else
5274 for (ii = vec_len - 2; ii >= (int)j; ii--)
5276 tree op1, op2;
5278 rf1 = &repeat_factor_vec[ii];
5279 rf2 = &repeat_factor_vec[ii + 1];
5281 /* Init the last factor's representative to be itself. */
5282 if (!rf2->repr)
5283 rf2->repr = rf2->factor;
5285 op1 = rf1->factor;
5286 op2 = rf2->repr;
5288 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5289 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5290 op1, op2);
5291 gimple_set_location (mul_stmt, gimple_location (stmt));
5292 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5293 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5294 rf1->repr = target_ssa;
5296 /* Don't reprocess the multiply we just introduced. */
5297 gimple_set_visited (mul_stmt, true);
5301 /* Form a call to __builtin_powi for the maximum product
5302 just formed, raised to the power obtained earlier. */
5303 rf1 = &repeat_factor_vec[j];
5304 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5305 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5306 build_int_cst (integer_type_node,
5307 power));
5308 gimple_call_set_lhs (pow_stmt, iter_result);
5309 gimple_set_location (pow_stmt, gimple_location (stmt));
5310 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5311 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5314 /* If we previously formed at least one other builtin_powi call,
5315 form the product of this one and those others. */
5316 if (result)
5318 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5319 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5320 result, iter_result);
5321 gimple_set_location (mul_stmt, gimple_location (stmt));
5322 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5323 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5324 gimple_set_visited (mul_stmt, true);
5325 result = new_result;
5327 else
5328 result = iter_result;
5330 /* Decrement the occurrence count of each element in the product
5331 by the count found above, and remove this many copies of each
5332 factor from OPS. */
5333 for (i = j; i < vec_len; i++)
5335 unsigned k = power;
5336 unsigned n;
5338 rf1 = &repeat_factor_vec[i];
5339 rf1->count -= power;
5341 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5343 if (oe->op == rf1->factor)
5345 if (oe->count <= k)
5347 ops->ordered_remove (n);
5348 k -= oe->count;
5350 if (k == 0)
5351 break;
5353 else
5355 oe->count -= k;
5356 break;
5363 /* At this point all elements in the repeated factor vector have a
5364 remaining occurrence count of 0 or 1, and those with a count of 1
5365 don't have cached representatives. Re-sort the ops vector and
5366 clean up. */
5367 ops->qsort (sort_by_operand_rank);
5368 repeat_factor_vec.release ();
5370 /* Return the final product computed herein. Note that there may
5371 still be some elements with single occurrence count left in OPS;
5372 those will be handled by the normal reassociation logic. */
5373 return result;
5376 /* Attempt to optimize
5377 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5378 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5380 static void
5381 attempt_builtin_copysign (vec<operand_entry *> *ops)
5383 operand_entry *oe;
5384 unsigned int i;
5385 unsigned int length = ops->length ();
5386 tree cst = ops->last ()->op;
5388 if (length == 1 || TREE_CODE (cst) != REAL_CST)
5389 return;
5391 FOR_EACH_VEC_ELT (*ops, i, oe)
5393 if (TREE_CODE (oe->op) == SSA_NAME
5394 && has_single_use (oe->op))
5396 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5397 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5399 tree arg0, arg1;
5400 switch (gimple_call_combined_fn (old_call))
5402 CASE_CFN_COPYSIGN:
5403 arg0 = gimple_call_arg (old_call, 0);
5404 arg1 = gimple_call_arg (old_call, 1);
5405 /* The first argument of copysign must be a constant,
5406 otherwise there's nothing to do. */
5407 if (TREE_CODE (arg0) == REAL_CST)
5409 tree type = TREE_TYPE (arg0);
5410 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5411 /* If we couldn't fold to a single constant, skip it.
5412 That happens e.g. for inexact multiplication when
5413 -frounding-math. */
5414 if (mul == NULL_TREE)
5415 break;
5416 /* Instead of adjusting OLD_CALL, let's build a new
5417 call to not leak the LHS and prevent keeping bogus
5418 debug statements. DCE will clean up the old call. */
5419 gcall *new_call;
5420 if (gimple_call_internal_p (old_call))
5421 new_call = gimple_build_call_internal
5422 (IFN_COPYSIGN, 2, mul, arg1);
5423 else
5424 new_call = gimple_build_call
5425 (gimple_call_fndecl (old_call), 2, mul, arg1);
5426 tree lhs = make_ssa_name (type);
5427 gimple_call_set_lhs (new_call, lhs);
5428 gimple_set_location (new_call,
5429 gimple_location (old_call));
5430 insert_stmt_after (new_call, old_call);
5431 /* We've used the constant, get rid of it. */
5432 ops->pop ();
5433 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5434 /* Handle the CST1 < 0 case by negating the result. */
5435 if (cst1_neg)
5437 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5438 gimple *negate_stmt
5439 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5440 insert_stmt_after (negate_stmt, new_call);
5441 oe->op = negrhs;
5443 else
5444 oe->op = lhs;
5445 if (dump_file && (dump_flags & TDF_DETAILS))
5447 fprintf (dump_file, "Optimizing copysign: ");
5448 print_generic_expr (dump_file, cst, 0);
5449 fprintf (dump_file, " * COPYSIGN (");
5450 print_generic_expr (dump_file, arg0, 0);
5451 fprintf (dump_file, ", ");
5452 print_generic_expr (dump_file, arg1, 0);
5453 fprintf (dump_file, ") into %sCOPYSIGN (",
5454 cst1_neg ? "-" : "");
5455 print_generic_expr (dump_file, mul, 0);
5456 fprintf (dump_file, ", ");
5457 print_generic_expr (dump_file, arg1, 0);
5458 fprintf (dump_file, "\n");
5460 return;
5462 break;
5463 default:
5464 break;
5471 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5473 static void
5474 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5476 tree rhs1;
5478 if (dump_file && (dump_flags & TDF_DETAILS))
5480 fprintf (dump_file, "Transforming ");
5481 print_gimple_stmt (dump_file, stmt, 0, 0);
5484 rhs1 = gimple_assign_rhs1 (stmt);
5485 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5486 update_stmt (stmt);
5487 remove_visited_stmt_chain (rhs1);
5489 if (dump_file && (dump_flags & TDF_DETAILS))
5491 fprintf (dump_file, " into ");
5492 print_gimple_stmt (dump_file, stmt, 0, 0);
5496 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5498 static void
5499 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5500 tree rhs1, tree rhs2)
5502 if (dump_file && (dump_flags & TDF_DETAILS))
5504 fprintf (dump_file, "Transforming ");
5505 print_gimple_stmt (dump_file, stmt, 0, 0);
5508 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5509 update_stmt (gsi_stmt (*gsi));
5510 remove_visited_stmt_chain (rhs1);
5512 if (dump_file && (dump_flags & TDF_DETAILS))
5514 fprintf (dump_file, " into ");
5515 print_gimple_stmt (dump_file, stmt, 0, 0);
5519 /* Reassociate expressions in basic block BB and its post-dominator as
5520 children.
5522 Bubble up return status from maybe_optimize_range_tests. */
5524 static bool
5525 reassociate_bb (basic_block bb)
5527 gimple_stmt_iterator gsi;
5528 basic_block son;
5529 gimple *stmt = last_stmt (bb);
5530 bool cfg_cleanup_needed = false;
5532 if (stmt && !gimple_visited_p (stmt))
5533 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5535 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5537 stmt = gsi_stmt (gsi);
5539 if (is_gimple_assign (stmt)
5540 && !stmt_could_throw_p (stmt))
5542 tree lhs, rhs1, rhs2;
5543 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5545 /* If this is not a gimple binary expression, there is
5546 nothing for us to do with it. */
5547 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5548 continue;
5550 /* If this was part of an already processed statement,
5551 we don't need to touch it again. */
5552 if (gimple_visited_p (stmt))
5554 /* This statement might have become dead because of previous
5555 reassociations. */
5556 if (has_zero_uses (gimple_get_lhs (stmt)))
5558 reassoc_remove_stmt (&gsi);
5559 release_defs (stmt);
5560 /* We might end up removing the last stmt above which
5561 places the iterator to the end of the sequence.
5562 Reset it to the last stmt in this case which might
5563 be the end of the sequence as well if we removed
5564 the last statement of the sequence. In which case
5565 we need to bail out. */
5566 if (gsi_end_p (gsi))
5568 gsi = gsi_last_bb (bb);
5569 if (gsi_end_p (gsi))
5570 break;
5573 continue;
5576 lhs = gimple_assign_lhs (stmt);
5577 rhs1 = gimple_assign_rhs1 (stmt);
5578 rhs2 = gimple_assign_rhs2 (stmt);
5580 /* For non-bit or min/max operations we can't associate
5581 all types. Verify that here. */
5582 if (rhs_code != BIT_IOR_EXPR
5583 && rhs_code != BIT_AND_EXPR
5584 && rhs_code != BIT_XOR_EXPR
5585 && rhs_code != MIN_EXPR
5586 && rhs_code != MAX_EXPR
5587 && (!can_reassociate_p (lhs)
5588 || !can_reassociate_p (rhs1)
5589 || !can_reassociate_p (rhs2)))
5590 continue;
5592 if (associative_tree_code (rhs_code))
5594 auto_vec<operand_entry *> ops;
5595 tree powi_result = NULL_TREE;
5596 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5598 /* There may be no immediate uses left by the time we
5599 get here because we may have eliminated them all. */
5600 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5601 continue;
5603 gimple_set_visited (stmt, true);
5604 linearize_expr_tree (&ops, stmt, true, true);
5605 ops.qsort (sort_by_operand_rank);
5606 optimize_ops_list (rhs_code, &ops);
5607 if (undistribute_ops_list (rhs_code, &ops,
5608 loop_containing_stmt (stmt)))
5610 ops.qsort (sort_by_operand_rank);
5611 optimize_ops_list (rhs_code, &ops);
5614 if (rhs_code == PLUS_EXPR
5615 && transform_add_to_multiply (&ops))
5616 ops.qsort (sort_by_operand_rank);
5618 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5620 if (is_vector)
5621 optimize_vec_cond_expr (rhs_code, &ops);
5622 else
5623 optimize_range_tests (rhs_code, &ops);
5626 if (rhs_code == MULT_EXPR && !is_vector)
5628 attempt_builtin_copysign (&ops);
5630 if (reassoc_insert_powi_p
5631 && flag_unsafe_math_optimizations)
5632 powi_result = attempt_builtin_powi (stmt, &ops);
5635 operand_entry *last;
5636 bool negate_result = false;
5637 if (ops.length () > 1
5638 && rhs_code == MULT_EXPR)
5640 last = ops.last ();
5641 if ((integer_minus_onep (last->op)
5642 || real_minus_onep (last->op))
5643 && !HONOR_SNANS (TREE_TYPE (lhs))
5644 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5645 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5647 ops.pop ();
5648 negate_result = true;
5652 tree new_lhs = lhs;
5653 /* If the operand vector is now empty, all operands were
5654 consumed by the __builtin_powi optimization. */
5655 if (ops.length () == 0)
5656 transform_stmt_to_copy (&gsi, stmt, powi_result);
5657 else if (ops.length () == 1)
5659 tree last_op = ops.last ()->op;
5661 /* If the stmt that defines operand has to be inserted, insert it
5662 before the use. */
5663 if (ops.last ()->stmt_to_insert)
5664 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5665 if (powi_result)
5666 transform_stmt_to_multiply (&gsi, stmt, last_op,
5667 powi_result);
5668 else
5669 transform_stmt_to_copy (&gsi, stmt, last_op);
5671 else
5673 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5674 int ops_num = ops.length ();
5675 int width = get_reassociation_width (ops_num, rhs_code, mode);
5677 if (dump_file && (dump_flags & TDF_DETAILS))
5678 fprintf (dump_file,
5679 "Width = %d was chosen for reassociation\n", width);
5681 if (width > 1
5682 && ops.length () > 3)
5683 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5684 width, ops);
5685 else
5687 /* When there are three operands left, we want
5688 to make sure the ones that get the double
5689 binary op are chosen wisely. */
5690 int len = ops.length ();
5691 if (len >= 3)
5692 swap_ops_for_binary_stmt (ops, len - 3, stmt);
5694 new_lhs = rewrite_expr_tree (stmt, 0, ops,
5695 powi_result != NULL
5696 || negate_result);
5699 /* If we combined some repeated factors into a
5700 __builtin_powi call, multiply that result by the
5701 reassociated operands. */
5702 if (powi_result)
5704 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5705 tree type = TREE_TYPE (lhs);
5706 tree target_ssa = make_temp_ssa_name (type, NULL,
5707 "reassocpow");
5708 gimple_set_lhs (lhs_stmt, target_ssa);
5709 update_stmt (lhs_stmt);
5710 if (lhs != new_lhs)
5712 target_ssa = new_lhs;
5713 new_lhs = lhs;
5715 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5716 powi_result, target_ssa);
5717 gimple_set_location (mul_stmt, gimple_location (stmt));
5718 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5719 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5723 if (negate_result)
5725 stmt = SSA_NAME_DEF_STMT (lhs);
5726 tree tmp = make_ssa_name (TREE_TYPE (lhs));
5727 gimple_set_lhs (stmt, tmp);
5728 if (lhs != new_lhs)
5729 tmp = new_lhs;
5730 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
5731 tmp);
5732 gimple_set_uid (neg_stmt, gimple_uid (stmt));
5733 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
5734 update_stmt (stmt);
5739 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5740 son;
5741 son = next_dom_son (CDI_POST_DOMINATORS, son))
5742 cfg_cleanup_needed |= reassociate_bb (son);
5744 return cfg_cleanup_needed;
5747 /* Add jumps around shifts for range tests turned into bit tests.
5748 For each SSA_NAME VAR we have code like:
5749 VAR = ...; // final stmt of range comparison
5750 // bit test here...;
5751 OTHERVAR = ...; // final stmt of the bit test sequence
5752 RES = VAR | OTHERVAR;
5753 Turn the above into:
5754 VAR = ...;
5755 if (VAR != 0)
5756 goto <l3>;
5757 else
5758 goto <l2>;
5759 <l2>:
5760 // bit test here...;
5761 OTHERVAR = ...;
5762 <l3>:
5763 # RES = PHI<1(l1), OTHERVAR(l2)>; */
5765 static void
5766 branch_fixup (void)
5768 tree var;
5769 unsigned int i;
5771 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
5773 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
5774 gimple *use_stmt;
5775 use_operand_p use;
5776 bool ok = single_imm_use (var, &use, &use_stmt);
5777 gcc_assert (ok
5778 && is_gimple_assign (use_stmt)
5779 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5780 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5782 basic_block cond_bb = gimple_bb (def_stmt);
5783 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5784 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5786 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5787 gimple *g = gimple_build_cond (NE_EXPR, var,
5788 build_zero_cst (TREE_TYPE (var)),
5789 NULL_TREE, NULL_TREE);
5790 location_t loc = gimple_location (use_stmt);
5791 gimple_set_location (g, loc);
5792 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5794 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5795 etrue->probability = REG_BR_PROB_BASE / 2;
5796 etrue->count = cond_bb->count / 2;
5797 edge efalse = find_edge (cond_bb, then_bb);
5798 efalse->flags = EDGE_FALSE_VALUE;
5799 efalse->probability -= etrue->probability;
5800 efalse->count -= etrue->count;
5801 then_bb->count -= etrue->count;
5803 tree othervar = NULL_TREE;
5804 if (gimple_assign_rhs1 (use_stmt) == var)
5805 othervar = gimple_assign_rhs2 (use_stmt);
5806 else if (gimple_assign_rhs2 (use_stmt) == var)
5807 othervar = gimple_assign_rhs1 (use_stmt);
5808 else
5809 gcc_unreachable ();
5810 tree lhs = gimple_assign_lhs (use_stmt);
5811 gphi *phi = create_phi_node (lhs, merge_bb);
5812 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5813 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5814 gsi = gsi_for_stmt (use_stmt);
5815 gsi_remove (&gsi, true);
5817 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5818 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5820 reassoc_branch_fixups.release ();
5823 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5824 void debug_ops_vector (vec<operand_entry *> ops);
5826 /* Dump the operand entry vector OPS to FILE. */
5828 void
5829 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5831 operand_entry *oe;
5832 unsigned int i;
5834 FOR_EACH_VEC_ELT (ops, i, oe)
5836 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5837 print_generic_expr (file, oe->op, 0);
5838 fprintf (file, "\n");
5842 /* Dump the operand entry vector OPS to STDERR. */
5844 DEBUG_FUNCTION void
5845 debug_ops_vector (vec<operand_entry *> ops)
5847 dump_ops_vector (stderr, ops);
5850 /* Bubble up return status from reassociate_bb. */
5852 static bool
5853 do_reassoc (void)
5855 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5856 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5859 /* Initialize the reassociation pass. */
5861 static void
5862 init_reassoc (void)
5864 int i;
5865 long rank = 2;
5866 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5868 /* Find the loops, so that we can prevent moving calculations in
5869 them. */
5870 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5872 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5874 next_operand_entry_id = 0;
5876 /* Reverse RPO (Reverse Post Order) will give us something where
5877 deeper loops come later. */
5878 pre_and_rev_post_order_compute (NULL, bbs, false);
5879 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5880 operand_rank = new hash_map<tree, long>;
5882 /* Give each default definition a distinct rank. This includes
5883 parameters and the static chain. Walk backwards over all
5884 SSA names so that we get proper rank ordering according
5885 to tree_swap_operands_p. */
5886 for (i = num_ssa_names - 1; i > 0; --i)
5888 tree name = ssa_name (i);
5889 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5890 insert_operand_rank (name, ++rank);
5893 /* Set up rank for each BB */
5894 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5895 bb_rank[bbs[i]] = ++rank << 16;
5897 free (bbs);
5898 calculate_dominance_info (CDI_POST_DOMINATORS);
5899 plus_negates = vNULL;
5902 /* Cleanup after the reassociation pass, and print stats if
5903 requested. */
5905 static void
5906 fini_reassoc (void)
5908 statistics_counter_event (cfun, "Linearized",
5909 reassociate_stats.linearized);
5910 statistics_counter_event (cfun, "Constants eliminated",
5911 reassociate_stats.constants_eliminated);
5912 statistics_counter_event (cfun, "Ops eliminated",
5913 reassociate_stats.ops_eliminated);
5914 statistics_counter_event (cfun, "Statements rewritten",
5915 reassociate_stats.rewritten);
5916 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5917 reassociate_stats.pows_encountered);
5918 statistics_counter_event (cfun, "Built-in powi calls created",
5919 reassociate_stats.pows_created);
5921 delete operand_rank;
5922 operand_entry_pool.release ();
5923 free (bb_rank);
5924 plus_negates.release ();
5925 free_dominance_info (CDI_POST_DOMINATORS);
5926 loop_optimizer_finalize ();
5929 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
5930 insertion of __builtin_powi calls.
5932 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5933 optimization of a gimple conditional. Otherwise returns zero. */
5935 static unsigned int
5936 execute_reassoc (bool insert_powi_p)
5938 reassoc_insert_powi_p = insert_powi_p;
5940 init_reassoc ();
5942 bool cfg_cleanup_needed;
5943 cfg_cleanup_needed = do_reassoc ();
5944 repropagate_negates ();
5945 branch_fixup ();
5947 fini_reassoc ();
5948 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
5951 namespace {
5953 const pass_data pass_data_reassoc =
5955 GIMPLE_PASS, /* type */
5956 "reassoc", /* name */
5957 OPTGROUP_NONE, /* optinfo_flags */
5958 TV_TREE_REASSOC, /* tv_id */
5959 ( PROP_cfg | PROP_ssa ), /* properties_required */
5960 0, /* properties_provided */
5961 0, /* properties_destroyed */
5962 0, /* todo_flags_start */
5963 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5966 class pass_reassoc : public gimple_opt_pass
5968 public:
5969 pass_reassoc (gcc::context *ctxt)
5970 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
5973 /* opt_pass methods: */
5974 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5975 void set_pass_param (unsigned int n, bool param)
5977 gcc_assert (n == 0);
5978 insert_powi_p = param;
5980 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5981 virtual unsigned int execute (function *)
5982 { return execute_reassoc (insert_powi_p); }
5984 private:
5985 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
5986 point 3a in the pass header comment. */
5987 bool insert_powi_p;
5988 }; // class pass_reassoc
5990 } // anon namespace
5992 gimple_opt_pass *
5993 make_pass_reassoc (gcc::context *ctxt)
5995 return new pass_reassoc (ctxt);