2017-02-20 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob36ef86cf44b9e6ce5dce520d4b0b7c4d02c486ab
1 /* Reassociation for trees.
2 Copyright (C) 2005-2017 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "params.h"
52 #include "builtins.h"
53 #include "gimplify.h"
54 #include "case-cfn-macros.h"
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
60 It consists of five steps:
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_*
70 3. Optimization of the operand lists, eliminating things like a +
71 -a, a & a, etc.
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
80 5. Repropagate negates, as nothing else will clean it up ATM.
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
101 a (1), b (1), c (1), d (2), e (2)
104 We start with our merge worklist empty, and the ops list with all of
105 those on it.
107 You want to first merge all leaves of the same rank, as much as
108 possible.
110 So first build a binary op of
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
122 Then build a binary op of d + e
123 mergetmp2 = d + e
125 and put mergetmp2 on the merge worklist.
127 so merge worklist = {mergetmp, c, mergetmp2}
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
132 So we have
134 build binary op
135 mergetmp3 = mergetmp + c
137 worklist = {mergetmp2, mergetmp3}
139 mergetmp4 = mergetmp2 + mergetmp3
141 worklist = {mergetmp4}
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
152 So why don't we do this?
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
160 mergetmp = op1 + op2
161 newstmt = mergetmp + op3
163 instead of
164 mergetmp = op2 + op3
165 newstmt = mergetmp + op1
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
169 can do.
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179 static bool reassoc_insert_powi_p;
181 /* Statistics */
182 static struct
184 int linearized;
185 int constants_eliminated;
186 int ops_eliminated;
187 int rewritten;
188 int pows_encountered;
189 int pows_created;
190 } reassociate_stats;
192 /* Operator, rank pair. */
193 struct operand_entry
195 unsigned int rank;
196 int id;
197 tree op;
198 unsigned int count;
199 gimple *stmt_to_insert;
202 static object_allocator<operand_entry> operand_entry_pool
203 ("operand entry pool");
205 /* This is used to assign a unique ID to each struct operand_entry
206 so that qsort results are identical on different hosts. */
207 static int next_operand_entry_id;
209 /* Starting rank number for a given basic block, so that we can rank
210 operations using unmovable instructions in that BB based on the bb
211 depth. */
212 static long *bb_rank;
214 /* Operand->rank hashtable. */
215 static hash_map<tree, long> *operand_rank;
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218 all basic blocks the CFG should be adjusted - basic blocks
219 split right after that SSA_NAME's definition statement and before
220 the only use, which must be a bit ior. */
221 static vec<tree> reassoc_branch_fixups;
223 /* Forward decls. */
224 static long get_rank (tree);
225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228 possibly added by gsi_remove. */
230 bool
231 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
233 gimple *stmt = gsi_stmt (*gsi);
235 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
236 return gsi_remove (gsi, true);
238 gimple_stmt_iterator prev = *gsi;
239 gsi_prev (&prev);
240 unsigned uid = gimple_uid (stmt);
241 basic_block bb = gimple_bb (stmt);
242 bool ret = gsi_remove (gsi, true);
243 if (!gsi_end_p (prev))
244 gsi_next (&prev);
245 else
246 prev = gsi_start_bb (bb);
247 gimple *end_stmt = gsi_stmt (*gsi);
248 while ((stmt = gsi_stmt (prev)) != end_stmt)
250 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
251 gimple_set_uid (stmt, uid);
252 gsi_next (&prev);
254 return ret;
257 /* Bias amount for loop-carried phis. We want this to be larger than
258 the depth of any reassociation tree we can see, but not larger than
259 the rank difference between two blocks. */
260 #define PHI_LOOP_BIAS (1 << 15)
262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
263 an innermost loop, and the phi has only a single use which is inside
264 the loop, then the rank is the block rank of the loop latch plus an
265 extra bias for the loop-carried dependence. This causes expressions
266 calculated into an accumulator variable to be independent for each
267 iteration of the loop. If STMT is some other phi, the rank is the
268 block rank of its containing block. */
269 static long
270 phi_rank (gimple *stmt)
272 basic_block bb = gimple_bb (stmt);
273 struct loop *father = bb->loop_father;
274 tree res;
275 unsigned i;
276 use_operand_p use;
277 gimple *use_stmt;
279 /* We only care about real loops (those with a latch). */
280 if (!father->latch)
281 return bb_rank[bb->index];
283 /* Interesting phis must be in headers of innermost loops. */
284 if (bb != father->header
285 || father->inner)
286 return bb_rank[bb->index];
288 /* Ignore virtual SSA_NAMEs. */
289 res = gimple_phi_result (stmt);
290 if (virtual_operand_p (res))
291 return bb_rank[bb->index];
293 /* The phi definition must have a single use, and that use must be
294 within the loop. Otherwise this isn't an accumulator pattern. */
295 if (!single_imm_use (res, &use, &use_stmt)
296 || gimple_bb (use_stmt)->loop_father != father)
297 return bb_rank[bb->index];
299 /* Look for phi arguments from within the loop. If found, bias this phi. */
300 for (i = 0; i < gimple_phi_num_args (stmt); i++)
302 tree arg = gimple_phi_arg_def (stmt, i);
303 if (TREE_CODE (arg) == SSA_NAME
304 && !SSA_NAME_IS_DEFAULT_DEF (arg))
306 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
307 if (gimple_bb (def_stmt)->loop_father == father)
308 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
312 /* Must be an uninteresting phi. */
313 return bb_rank[bb->index];
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317 loop-carried dependence of an innermost loop, return TRUE; else
318 return FALSE. */
319 static bool
320 loop_carried_phi (tree exp)
322 gimple *phi_stmt;
323 long block_rank;
325 if (TREE_CODE (exp) != SSA_NAME
326 || SSA_NAME_IS_DEFAULT_DEF (exp))
327 return false;
329 phi_stmt = SSA_NAME_DEF_STMT (exp);
331 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
332 return false;
334 /* Non-loop-carried phis have block rank. Loop-carried phis have
335 an additional bias added in. If this phi doesn't have block rank,
336 it's biased and should not be propagated. */
337 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
339 if (phi_rank (phi_stmt) != block_rank)
340 return true;
342 return false;
345 /* Return the maximum of RANK and the rank that should be propagated
346 from expression OP. For most operands, this is just the rank of OP.
347 For loop-carried phis, the value is zero to avoid undoing the bias
348 in favor of the phi. */
349 static long
350 propagate_rank (long rank, tree op)
352 long op_rank;
354 if (loop_carried_phi (op))
355 return rank;
357 op_rank = get_rank (op);
359 return MAX (rank, op_rank);
362 /* Look up the operand rank structure for expression E. */
364 static inline long
365 find_operand_rank (tree e)
367 long *slot = operand_rank->get (e);
368 return slot ? *slot : -1;
371 /* Insert {E,RANK} into the operand rank hashtable. */
373 static inline void
374 insert_operand_rank (tree e, long rank)
376 gcc_assert (rank > 0);
377 gcc_assert (!operand_rank->put (e, rank));
380 /* Given an expression E, return the rank of the expression. */
382 static long
383 get_rank (tree e)
385 /* SSA_NAME's have the rank of the expression they are the result
387 For globals and uninitialized values, the rank is 0.
388 For function arguments, use the pre-setup rank.
389 For PHI nodes, stores, asm statements, etc, we use the rank of
390 the BB.
391 For simple operations, the rank is the maximum rank of any of
392 its operands, or the bb_rank, whichever is less.
393 I make no claims that this is optimal, however, it gives good
394 results. */
396 /* We make an exception to the normal ranking system to break
397 dependences of accumulator variables in loops. Suppose we
398 have a simple one-block loop containing:
400 x_1 = phi(x_0, x_2)
401 b = a + x_1
402 c = b + d
403 x_2 = c + e
405 As shown, each iteration of the calculation into x is fully
406 dependent upon the iteration before it. We would prefer to
407 see this in the form:
409 x_1 = phi(x_0, x_2)
410 b = a + d
411 c = b + e
412 x_2 = c + x_1
414 If the loop is unrolled, the calculations of b and c from
415 different iterations can be interleaved.
417 To obtain this result during reassociation, we bias the rank
418 of the phi definition x_1 upward, when it is recognized as an
419 accumulator pattern. The artificial rank causes it to be
420 added last, providing the desired independence. */
422 if (TREE_CODE (e) == SSA_NAME)
424 ssa_op_iter iter;
425 gimple *stmt;
426 long rank;
427 tree op;
429 if (SSA_NAME_IS_DEFAULT_DEF (e))
430 return find_operand_rank (e);
432 stmt = SSA_NAME_DEF_STMT (e);
433 if (gimple_code (stmt) == GIMPLE_PHI)
434 return phi_rank (stmt);
436 if (!is_gimple_assign (stmt))
437 return bb_rank[gimple_bb (stmt)->index];
439 /* If we already have a rank for this expression, use that. */
440 rank = find_operand_rank (e);
441 if (rank != -1)
442 return rank;
444 /* Otherwise, find the maximum rank for the operands. As an
445 exception, remove the bias from loop-carried phis when propagating
446 the rank so that dependent operations are not also biased. */
447 /* Simply walk over all SSA uses - this takes advatage of the
448 fact that non-SSA operands are is_gimple_min_invariant and
449 thus have rank 0. */
450 rank = 0;
451 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
452 rank = propagate_rank (rank, op);
454 if (dump_file && (dump_flags & TDF_DETAILS))
456 fprintf (dump_file, "Rank for ");
457 print_generic_expr (dump_file, e, 0);
458 fprintf (dump_file, " is %ld\n", (rank + 1));
461 /* Note the rank in the hashtable so we don't recompute it. */
462 insert_operand_rank (e, (rank + 1));
463 return (rank + 1);
466 /* Constants, globals, etc., are rank 0 */
467 return 0;
471 /* We want integer ones to end up last no matter what, since they are
472 the ones we can do the most with. */
473 #define INTEGER_CONST_TYPE 1 << 3
474 #define FLOAT_CONST_TYPE 1 << 2
475 #define OTHER_CONST_TYPE 1 << 1
477 /* Classify an invariant tree into integer, float, or other, so that
478 we can sort them to be near other constants of the same type. */
479 static inline int
480 constant_type (tree t)
482 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
483 return INTEGER_CONST_TYPE;
484 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
485 return FLOAT_CONST_TYPE;
486 else
487 return OTHER_CONST_TYPE;
490 /* qsort comparison function to sort operand entries PA and PB by rank
491 so that the sorted array is ordered by rank in decreasing order. */
492 static int
493 sort_by_operand_rank (const void *pa, const void *pb)
495 const operand_entry *oea = *(const operand_entry *const *)pa;
496 const operand_entry *oeb = *(const operand_entry *const *)pb;
498 /* It's nicer for optimize_expression if constants that are likely
499 to fold when added/multiplied//whatever are put next to each
500 other. Since all constants have rank 0, order them by type. */
501 if (oeb->rank == 0 && oea->rank == 0)
503 if (constant_type (oeb->op) != constant_type (oea->op))
504 return constant_type (oeb->op) - constant_type (oea->op);
505 else
506 /* To make sorting result stable, we use unique IDs to determine
507 order. */
508 return oeb->id - oea->id;
511 /* Lastly, make sure the versions that are the same go next to each
512 other. */
513 if ((oeb->rank - oea->rank == 0)
514 && TREE_CODE (oea->op) == SSA_NAME
515 && TREE_CODE (oeb->op) == SSA_NAME)
517 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
518 versions of removed SSA_NAMEs, so if possible, prefer to sort
519 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
520 See PR60418. */
521 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
522 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
523 && !oea->stmt_to_insert
524 && !oeb->stmt_to_insert
525 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
527 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
528 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
529 basic_block bba = gimple_bb (stmta);
530 basic_block bbb = gimple_bb (stmtb);
531 if (bbb != bba)
533 if (bb_rank[bbb->index] != bb_rank[bba->index])
534 return bb_rank[bbb->index] - bb_rank[bba->index];
536 else
538 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
539 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
540 if (da != db)
541 return da ? 1 : -1;
545 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
546 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
547 else
548 return oeb->id - oea->id;
551 if (oeb->rank != oea->rank)
552 return oeb->rank - oea->rank;
553 else
554 return oeb->id - oea->id;
557 /* Add an operand entry to *OPS for the tree operand OP. */
559 static void
560 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
562 operand_entry *oe = operand_entry_pool.allocate ();
564 oe->op = op;
565 oe->rank = get_rank (op);
566 oe->id = next_operand_entry_id++;
567 oe->count = 1;
568 oe->stmt_to_insert = stmt_to_insert;
569 ops->safe_push (oe);
572 /* Add an operand entry to *OPS for the tree operand OP with repeat
573 count REPEAT. */
575 static void
576 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
577 HOST_WIDE_INT repeat)
579 operand_entry *oe = operand_entry_pool.allocate ();
581 oe->op = op;
582 oe->rank = get_rank (op);
583 oe->id = next_operand_entry_id++;
584 oe->count = repeat;
585 oe->stmt_to_insert = NULL;
586 ops->safe_push (oe);
588 reassociate_stats.pows_encountered++;
591 /* Return true if STMT is reassociable operation containing a binary
592 operation with tree code CODE, and is inside LOOP. */
594 static bool
595 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
597 basic_block bb = gimple_bb (stmt);
599 if (gimple_bb (stmt) == NULL)
600 return false;
602 if (!flow_bb_inside_loop_p (loop, bb))
603 return false;
605 if (is_gimple_assign (stmt)
606 && gimple_assign_rhs_code (stmt) == code
607 && has_single_use (gimple_assign_lhs (stmt)))
609 tree rhs1 = gimple_assign_rhs1 (stmt);
610 tree rhs2 = gimple_assign_rhs1 (stmt);
611 if (TREE_CODE (rhs1) == SSA_NAME
612 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
613 return false;
614 if (rhs2
615 && TREE_CODE (rhs2) == SSA_NAME
616 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
617 return false;
618 return true;
621 return false;
625 /* Return true if STMT is a nop-conversion. */
627 static bool
628 gimple_nop_conversion_p (gimple *stmt)
630 if (gassign *ass = dyn_cast <gassign *> (stmt))
632 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
633 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
634 TREE_TYPE (gimple_assign_rhs1 (ass))))
635 return true;
637 return false;
640 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
641 operand of the negate operation. Otherwise, return NULL. */
643 static tree
644 get_unary_op (tree name, enum tree_code opcode)
646 gimple *stmt = SSA_NAME_DEF_STMT (name);
648 /* Look through nop conversions (sign changes). */
649 if (gimple_nop_conversion_p (stmt)
650 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
651 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
653 if (!is_gimple_assign (stmt))
654 return NULL_TREE;
656 if (gimple_assign_rhs_code (stmt) == opcode)
657 return gimple_assign_rhs1 (stmt);
658 return NULL_TREE;
661 /* Return true if OP1 and OP2 have the same value if casted to either type. */
663 static bool
664 ops_equal_values_p (tree op1, tree op2)
666 if (op1 == op2)
667 return true;
669 tree orig_op1 = op1;
670 if (TREE_CODE (op1) == SSA_NAME)
672 gimple *stmt = SSA_NAME_DEF_STMT (op1);
673 if (gimple_nop_conversion_p (stmt))
675 op1 = gimple_assign_rhs1 (stmt);
676 if (op1 == op2)
677 return true;
681 if (TREE_CODE (op2) == SSA_NAME)
683 gimple *stmt = SSA_NAME_DEF_STMT (op2);
684 if (gimple_nop_conversion_p (stmt))
686 op2 = gimple_assign_rhs1 (stmt);
687 if (op1 == op2
688 || orig_op1 == op2)
689 return true;
693 return false;
697 /* If CURR and LAST are a pair of ops that OPCODE allows us to
698 eliminate through equivalences, do so, remove them from OPS, and
699 return true. Otherwise, return false. */
701 static bool
702 eliminate_duplicate_pair (enum tree_code opcode,
703 vec<operand_entry *> *ops,
704 bool *all_done,
705 unsigned int i,
706 operand_entry *curr,
707 operand_entry *last)
710 /* If we have two of the same op, and the opcode is & |, min, or max,
711 we can eliminate one of them.
712 If we have two of the same op, and the opcode is ^, we can
713 eliminate both of them. */
715 if (last && last->op == curr->op)
717 switch (opcode)
719 case MAX_EXPR:
720 case MIN_EXPR:
721 case BIT_IOR_EXPR:
722 case BIT_AND_EXPR:
723 if (dump_file && (dump_flags & TDF_DETAILS))
725 fprintf (dump_file, "Equivalence: ");
726 print_generic_expr (dump_file, curr->op, 0);
727 fprintf (dump_file, " [&|minmax] ");
728 print_generic_expr (dump_file, last->op, 0);
729 fprintf (dump_file, " -> ");
730 print_generic_stmt (dump_file, last->op, 0);
733 ops->ordered_remove (i);
734 reassociate_stats.ops_eliminated ++;
736 return true;
738 case BIT_XOR_EXPR:
739 if (dump_file && (dump_flags & TDF_DETAILS))
741 fprintf (dump_file, "Equivalence: ");
742 print_generic_expr (dump_file, curr->op, 0);
743 fprintf (dump_file, " ^ ");
744 print_generic_expr (dump_file, last->op, 0);
745 fprintf (dump_file, " -> nothing\n");
748 reassociate_stats.ops_eliminated += 2;
750 if (ops->length () == 2)
752 ops->truncate (0);
753 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
754 *all_done = true;
756 else
758 ops->ordered_remove (i-1);
759 ops->ordered_remove (i-1);
762 return true;
764 default:
765 break;
768 return false;
771 static vec<tree> plus_negates;
773 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
774 expression, look in OPS for a corresponding positive operation to cancel
775 it out. If we find one, remove the other from OPS, replace
776 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
777 return false. */
779 static bool
780 eliminate_plus_minus_pair (enum tree_code opcode,
781 vec<operand_entry *> *ops,
782 unsigned int currindex,
783 operand_entry *curr)
785 tree negateop;
786 tree notop;
787 unsigned int i;
788 operand_entry *oe;
790 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
791 return false;
793 negateop = get_unary_op (curr->op, NEGATE_EXPR);
794 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
795 if (negateop == NULL_TREE && notop == NULL_TREE)
796 return false;
798 /* Any non-negated version will have a rank that is one less than
799 the current rank. So once we hit those ranks, if we don't find
800 one, we can stop. */
802 for (i = currindex + 1;
803 ops->iterate (i, &oe)
804 && oe->rank >= curr->rank - 1 ;
805 i++)
807 if (negateop
808 && ops_equal_values_p (oe->op, negateop))
810 if (dump_file && (dump_flags & TDF_DETAILS))
812 fprintf (dump_file, "Equivalence: ");
813 print_generic_expr (dump_file, negateop, 0);
814 fprintf (dump_file, " + -");
815 print_generic_expr (dump_file, oe->op, 0);
816 fprintf (dump_file, " -> 0\n");
819 ops->ordered_remove (i);
820 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
821 ops->ordered_remove (currindex);
822 reassociate_stats.ops_eliminated ++;
824 return true;
826 else if (notop
827 && ops_equal_values_p (oe->op, notop))
829 tree op_type = TREE_TYPE (oe->op);
831 if (dump_file && (dump_flags & TDF_DETAILS))
833 fprintf (dump_file, "Equivalence: ");
834 print_generic_expr (dump_file, notop, 0);
835 fprintf (dump_file, " + ~");
836 print_generic_expr (dump_file, oe->op, 0);
837 fprintf (dump_file, " -> -1\n");
840 ops->ordered_remove (i);
841 add_to_ops_vec (ops, build_all_ones_cst (op_type));
842 ops->ordered_remove (currindex);
843 reassociate_stats.ops_eliminated ++;
845 return true;
849 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
850 save it for later inspection in repropagate_negates(). */
851 if (negateop != NULL_TREE
852 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
853 plus_negates.safe_push (curr->op);
855 return false;
858 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
859 bitwise not expression, look in OPS for a corresponding operand to
860 cancel it out. If we find one, remove the other from OPS, replace
861 OPS[CURRINDEX] with 0, and return true. Otherwise, return
862 false. */
864 static bool
865 eliminate_not_pairs (enum tree_code opcode,
866 vec<operand_entry *> *ops,
867 unsigned int currindex,
868 operand_entry *curr)
870 tree notop;
871 unsigned int i;
872 operand_entry *oe;
874 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
875 || TREE_CODE (curr->op) != SSA_NAME)
876 return false;
878 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
879 if (notop == NULL_TREE)
880 return false;
882 /* Any non-not version will have a rank that is one less than
883 the current rank. So once we hit those ranks, if we don't find
884 one, we can stop. */
886 for (i = currindex + 1;
887 ops->iterate (i, &oe)
888 && oe->rank >= curr->rank - 1;
889 i++)
891 if (oe->op == notop)
893 if (dump_file && (dump_flags & TDF_DETAILS))
895 fprintf (dump_file, "Equivalence: ");
896 print_generic_expr (dump_file, notop, 0);
897 if (opcode == BIT_AND_EXPR)
898 fprintf (dump_file, " & ~");
899 else if (opcode == BIT_IOR_EXPR)
900 fprintf (dump_file, " | ~");
901 print_generic_expr (dump_file, oe->op, 0);
902 if (opcode == BIT_AND_EXPR)
903 fprintf (dump_file, " -> 0\n");
904 else if (opcode == BIT_IOR_EXPR)
905 fprintf (dump_file, " -> -1\n");
908 if (opcode == BIT_AND_EXPR)
909 oe->op = build_zero_cst (TREE_TYPE (oe->op));
910 else if (opcode == BIT_IOR_EXPR)
911 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
913 reassociate_stats.ops_eliminated += ops->length () - 1;
914 ops->truncate (0);
915 ops->quick_push (oe);
916 return true;
920 return false;
923 /* Use constant value that may be present in OPS to try to eliminate
924 operands. Note that this function is only really used when we've
925 eliminated ops for other reasons, or merged constants. Across
926 single statements, fold already does all of this, plus more. There
927 is little point in duplicating logic, so I've only included the
928 identities that I could ever construct testcases to trigger. */
930 static void
931 eliminate_using_constants (enum tree_code opcode,
932 vec<operand_entry *> *ops)
934 operand_entry *oelast = ops->last ();
935 tree type = TREE_TYPE (oelast->op);
937 if (oelast->rank == 0
938 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
940 switch (opcode)
942 case BIT_AND_EXPR:
943 if (integer_zerop (oelast->op))
945 if (ops->length () != 1)
947 if (dump_file && (dump_flags & TDF_DETAILS))
948 fprintf (dump_file, "Found & 0, removing all other ops\n");
950 reassociate_stats.ops_eliminated += ops->length () - 1;
952 ops->truncate (0);
953 ops->quick_push (oelast);
954 return;
957 else if (integer_all_onesp (oelast->op))
959 if (ops->length () != 1)
961 if (dump_file && (dump_flags & TDF_DETAILS))
962 fprintf (dump_file, "Found & -1, removing\n");
963 ops->pop ();
964 reassociate_stats.ops_eliminated++;
967 break;
968 case BIT_IOR_EXPR:
969 if (integer_all_onesp (oelast->op))
971 if (ops->length () != 1)
973 if (dump_file && (dump_flags & TDF_DETAILS))
974 fprintf (dump_file, "Found | -1, removing all other ops\n");
976 reassociate_stats.ops_eliminated += ops->length () - 1;
978 ops->truncate (0);
979 ops->quick_push (oelast);
980 return;
983 else if (integer_zerop (oelast->op))
985 if (ops->length () != 1)
987 if (dump_file && (dump_flags & TDF_DETAILS))
988 fprintf (dump_file, "Found | 0, removing\n");
989 ops->pop ();
990 reassociate_stats.ops_eliminated++;
993 break;
994 case MULT_EXPR:
995 if (integer_zerop (oelast->op)
996 || (FLOAT_TYPE_P (type)
997 && !HONOR_NANS (type)
998 && !HONOR_SIGNED_ZEROS (type)
999 && real_zerop (oelast->op)))
1001 if (ops->length () != 1)
1003 if (dump_file && (dump_flags & TDF_DETAILS))
1004 fprintf (dump_file, "Found * 0, removing all other ops\n");
1006 reassociate_stats.ops_eliminated += ops->length () - 1;
1007 ops->truncate (1);
1008 ops->quick_push (oelast);
1009 return;
1012 else if (integer_onep (oelast->op)
1013 || (FLOAT_TYPE_P (type)
1014 && !HONOR_SNANS (type)
1015 && real_onep (oelast->op)))
1017 if (ops->length () != 1)
1019 if (dump_file && (dump_flags & TDF_DETAILS))
1020 fprintf (dump_file, "Found * 1, removing\n");
1021 ops->pop ();
1022 reassociate_stats.ops_eliminated++;
1023 return;
1026 break;
1027 case BIT_XOR_EXPR:
1028 case PLUS_EXPR:
1029 case MINUS_EXPR:
1030 if (integer_zerop (oelast->op)
1031 || (FLOAT_TYPE_P (type)
1032 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1033 && fold_real_zero_addition_p (type, oelast->op,
1034 opcode == MINUS_EXPR)))
1036 if (ops->length () != 1)
1038 if (dump_file && (dump_flags & TDF_DETAILS))
1039 fprintf (dump_file, "Found [|^+] 0, removing\n");
1040 ops->pop ();
1041 reassociate_stats.ops_eliminated++;
1042 return;
1045 break;
1046 default:
1047 break;
1053 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1054 bool, bool);
1056 /* Structure for tracking and counting operands. */
1057 struct oecount {
1058 int cnt;
1059 int id;
1060 enum tree_code oecode;
1061 tree op;
1065 /* The heap for the oecount hashtable and the sorted list of operands. */
1066 static vec<oecount> cvec;
1069 /* Oecount hashtable helpers. */
1071 struct oecount_hasher : int_hash <int, 0, 1>
1073 static inline hashval_t hash (int);
1074 static inline bool equal (int, int);
1077 /* Hash function for oecount. */
1079 inline hashval_t
1080 oecount_hasher::hash (int p)
1082 const oecount *c = &cvec[p - 42];
1083 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1086 /* Comparison function for oecount. */
1088 inline bool
1089 oecount_hasher::equal (int p1, int p2)
1091 const oecount *c1 = &cvec[p1 - 42];
1092 const oecount *c2 = &cvec[p2 - 42];
1093 return (c1->oecode == c2->oecode
1094 && c1->op == c2->op);
1097 /* Comparison function for qsort sorting oecount elements by count. */
1099 static int
1100 oecount_cmp (const void *p1, const void *p2)
1102 const oecount *c1 = (const oecount *)p1;
1103 const oecount *c2 = (const oecount *)p2;
1104 if (c1->cnt != c2->cnt)
1105 return c1->cnt - c2->cnt;
1106 else
1107 /* If counts are identical, use unique IDs to stabilize qsort. */
1108 return c1->id - c2->id;
1111 /* Return TRUE iff STMT represents a builtin call that raises OP
1112 to some exponent. */
1114 static bool
1115 stmt_is_power_of_op (gimple *stmt, tree op)
1117 if (!is_gimple_call (stmt))
1118 return false;
1120 switch (gimple_call_combined_fn (stmt))
1122 CASE_CFN_POW:
1123 CASE_CFN_POWI:
1124 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1126 default:
1127 return false;
1131 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1132 in place and return the result. Assumes that stmt_is_power_of_op
1133 was previously called for STMT and returned TRUE. */
1135 static HOST_WIDE_INT
1136 decrement_power (gimple *stmt)
1138 REAL_VALUE_TYPE c, cint;
1139 HOST_WIDE_INT power;
1140 tree arg1;
1142 switch (gimple_call_combined_fn (stmt))
1144 CASE_CFN_POW:
1145 arg1 = gimple_call_arg (stmt, 1);
1146 c = TREE_REAL_CST (arg1);
1147 power = real_to_integer (&c) - 1;
1148 real_from_integer (&cint, VOIDmode, power, SIGNED);
1149 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1150 return power;
1152 CASE_CFN_POWI:
1153 arg1 = gimple_call_arg (stmt, 1);
1154 power = TREE_INT_CST_LOW (arg1) - 1;
1155 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1156 return power;
1158 default:
1159 gcc_unreachable ();
1163 /* Replace SSA defined by STMT and replace all its uses with new
1164 SSA. Also return the new SSA. */
1166 static tree
1167 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1169 gimple *use_stmt;
1170 use_operand_p use;
1171 imm_use_iterator iter;
1172 tree new_lhs, new_debug_lhs = NULL_TREE;
1173 tree lhs = gimple_get_lhs (stmt);
1175 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1176 gimple_set_lhs (stmt, new_lhs);
1178 /* Also need to update GIMPLE_DEBUGs. */
1179 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1181 tree repl = new_lhs;
1182 if (is_gimple_debug (use_stmt))
1184 if (new_debug_lhs == NULL_TREE)
1186 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1187 gdebug *def_temp
1188 = gimple_build_debug_bind (new_debug_lhs,
1189 build2 (opcode, TREE_TYPE (lhs),
1190 new_lhs, op),
1191 stmt);
1192 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1193 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1194 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1195 gimple_set_uid (def_temp, gimple_uid (stmt));
1196 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1197 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1199 repl = new_debug_lhs;
1201 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1202 SET_USE (use, repl);
1203 update_stmt (use_stmt);
1205 return new_lhs;
1208 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1209 uses with new SSAs. Also do this for the stmt that defines DEF
1210 if *DEF is not OP. */
1212 static void
1213 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1214 vec<gimple *> &stmts_to_fix)
1216 unsigned i;
1217 gimple *stmt;
1219 if (*def != op
1220 && TREE_CODE (*def) == SSA_NAME
1221 && (stmt = SSA_NAME_DEF_STMT (*def))
1222 && gimple_code (stmt) != GIMPLE_NOP)
1223 *def = make_new_ssa_for_def (stmt, opcode, op);
1225 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1226 make_new_ssa_for_def (stmt, opcode, op);
1229 /* Find the single immediate use of STMT's LHS, and replace it
1230 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1231 replace *DEF with OP as well. */
1233 static void
1234 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1236 tree lhs;
1237 gimple *use_stmt;
1238 use_operand_p use;
1239 gimple_stmt_iterator gsi;
1241 if (is_gimple_call (stmt))
1242 lhs = gimple_call_lhs (stmt);
1243 else
1244 lhs = gimple_assign_lhs (stmt);
1246 gcc_assert (has_single_use (lhs));
1247 single_imm_use (lhs, &use, &use_stmt);
1248 if (lhs == *def)
1249 *def = op;
1250 SET_USE (use, op);
1251 if (TREE_CODE (op) != SSA_NAME)
1252 update_stmt (use_stmt);
1253 gsi = gsi_for_stmt (stmt);
1254 unlink_stmt_vdef (stmt);
1255 reassoc_remove_stmt (&gsi);
1256 release_defs (stmt);
1259 /* Walks the linear chain with result *DEF searching for an operation
1260 with operand OP and code OPCODE removing that from the chain. *DEF
1261 is updated if there is only one operand but no operation left. */
1263 static void
1264 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1266 tree orig_def = *def;
1267 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1268 /* PR72835 - Record the stmt chain that has to be updated such that
1269 we dont use the same LHS when the values computed are different. */
1270 auto_vec<gimple *, 64> stmts_to_fix;
1274 tree name;
1276 if (opcode == MULT_EXPR)
1278 if (stmt_is_power_of_op (stmt, op))
1280 if (decrement_power (stmt) == 1)
1282 if (stmts_to_fix.length () > 0)
1283 stmts_to_fix.pop ();
1284 propagate_op_to_single_use (op, stmt, def);
1286 break;
1288 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1290 if (gimple_assign_rhs1 (stmt) == op)
1292 tree cst = build_minus_one_cst (TREE_TYPE (op));
1293 if (stmts_to_fix.length () > 0)
1294 stmts_to_fix.pop ();
1295 propagate_op_to_single_use (cst, stmt, def);
1296 break;
1298 else if (integer_minus_onep (op)
1299 || real_minus_onep (op))
1301 gimple_assign_set_rhs_code
1302 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1303 break;
1308 name = gimple_assign_rhs1 (stmt);
1310 /* If this is the operation we look for and one of the operands
1311 is ours simply propagate the other operand into the stmts
1312 single use. */
1313 if (gimple_assign_rhs_code (stmt) == opcode
1314 && (name == op
1315 || gimple_assign_rhs2 (stmt) == op))
1317 if (name == op)
1318 name = gimple_assign_rhs2 (stmt);
1319 if (stmts_to_fix.length () > 0)
1320 stmts_to_fix.pop ();
1321 propagate_op_to_single_use (name, stmt, def);
1322 break;
1325 /* We might have a multiply of two __builtin_pow* calls, and
1326 the operand might be hiding in the rightmost one. Likewise
1327 this can happen for a negate. */
1328 if (opcode == MULT_EXPR
1329 && gimple_assign_rhs_code (stmt) == opcode
1330 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1331 && has_single_use (gimple_assign_rhs2 (stmt)))
1333 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1334 if (stmt_is_power_of_op (stmt2, op))
1336 if (decrement_power (stmt2) == 1)
1337 propagate_op_to_single_use (op, stmt2, def);
1338 else
1339 stmts_to_fix.safe_push (stmt2);
1340 break;
1342 else if (is_gimple_assign (stmt2)
1343 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1345 if (gimple_assign_rhs1 (stmt2) == op)
1347 tree cst = build_minus_one_cst (TREE_TYPE (op));
1348 propagate_op_to_single_use (cst, stmt2, def);
1349 break;
1351 else if (integer_minus_onep (op)
1352 || real_minus_onep (op))
1354 stmts_to_fix.safe_push (stmt2);
1355 gimple_assign_set_rhs_code
1356 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1357 break;
1362 /* Continue walking the chain. */
1363 gcc_assert (name != op
1364 && TREE_CODE (name) == SSA_NAME);
1365 stmt = SSA_NAME_DEF_STMT (name);
1366 stmts_to_fix.safe_push (stmt);
1368 while (1);
1370 if (stmts_to_fix.length () > 0 || *def == orig_def)
1371 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1374 /* Returns true if statement S1 dominates statement S2. Like
1375 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1377 static bool
1378 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1380 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1382 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1383 SSA_NAME. Assume it lives at the beginning of function and
1384 thus dominates everything. */
1385 if (!bb1 || s1 == s2)
1386 return true;
1388 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1389 if (!bb2)
1390 return false;
1392 if (bb1 == bb2)
1394 /* PHIs in the same basic block are assumed to be
1395 executed all in parallel, if only one stmt is a PHI,
1396 it dominates the other stmt in the same basic block. */
1397 if (gimple_code (s1) == GIMPLE_PHI)
1398 return true;
1400 if (gimple_code (s2) == GIMPLE_PHI)
1401 return false;
1403 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1405 if (gimple_uid (s1) < gimple_uid (s2))
1406 return true;
1408 if (gimple_uid (s1) > gimple_uid (s2))
1409 return false;
1411 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1412 unsigned int uid = gimple_uid (s1);
1413 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1415 gimple *s = gsi_stmt (gsi);
1416 if (gimple_uid (s) != uid)
1417 break;
1418 if (s == s2)
1419 return true;
1422 return false;
1425 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1428 /* Insert STMT after INSERT_POINT. */
1430 static void
1431 insert_stmt_after (gimple *stmt, gimple *insert_point)
1433 gimple_stmt_iterator gsi;
1434 basic_block bb;
1436 if (gimple_code (insert_point) == GIMPLE_PHI)
1437 bb = gimple_bb (insert_point);
1438 else if (!stmt_ends_bb_p (insert_point))
1440 gsi = gsi_for_stmt (insert_point);
1441 gimple_set_uid (stmt, gimple_uid (insert_point));
1442 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1443 return;
1445 else
1446 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1447 thus if it must end a basic block, it should be a call that can
1448 throw, or some assignment that can throw. If it throws, the LHS
1449 of it will not be initialized though, so only valid places using
1450 the SSA_NAME should be dominated by the fallthru edge. */
1451 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1452 gsi = gsi_after_labels (bb);
1453 if (gsi_end_p (gsi))
1455 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1456 gimple_set_uid (stmt,
1457 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1459 else
1460 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1461 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1464 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1465 the result. Places the statement after the definition of either
1466 OP1 or OP2. Returns the new statement. */
1468 static gimple *
1469 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1471 gimple *op1def = NULL, *op2def = NULL;
1472 gimple_stmt_iterator gsi;
1473 tree op;
1474 gassign *sum;
1476 /* Create the addition statement. */
1477 op = make_ssa_name (type);
1478 sum = gimple_build_assign (op, opcode, op1, op2);
1480 /* Find an insertion place and insert. */
1481 if (TREE_CODE (op1) == SSA_NAME)
1482 op1def = SSA_NAME_DEF_STMT (op1);
1483 if (TREE_CODE (op2) == SSA_NAME)
1484 op2def = SSA_NAME_DEF_STMT (op2);
1485 if ((!op1def || gimple_nop_p (op1def))
1486 && (!op2def || gimple_nop_p (op2def)))
1488 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1489 if (gsi_end_p (gsi))
1491 gimple_stmt_iterator gsi2
1492 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1493 gimple_set_uid (sum,
1494 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1496 else
1497 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1498 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1500 else
1502 gimple *insert_point;
1503 if ((!op1def || gimple_nop_p (op1def))
1504 || (op2def && !gimple_nop_p (op2def)
1505 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1506 insert_point = op2def;
1507 else
1508 insert_point = op1def;
1509 insert_stmt_after (sum, insert_point);
1511 update_stmt (sum);
1513 return sum;
1516 /* Perform un-distribution of divisions and multiplications.
1517 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1518 to (A + B) / X for real X.
1520 The algorithm is organized as follows.
1522 - First we walk the addition chain *OPS looking for summands that
1523 are defined by a multiplication or a real division. This results
1524 in the candidates bitmap with relevant indices into *OPS.
1526 - Second we build the chains of multiplications or divisions for
1527 these candidates, counting the number of occurrences of (operand, code)
1528 pairs in all of the candidates chains.
1530 - Third we sort the (operand, code) pairs by number of occurrence and
1531 process them starting with the pair with the most uses.
1533 * For each such pair we walk the candidates again to build a
1534 second candidate bitmap noting all multiplication/division chains
1535 that have at least one occurrence of (operand, code).
1537 * We build an alternate addition chain only covering these
1538 candidates with one (operand, code) operation removed from their
1539 multiplication/division chain.
1541 * The first candidate gets replaced by the alternate addition chain
1542 multiplied/divided by the operand.
1544 * All candidate chains get disabled for further processing and
1545 processing of (operand, code) pairs continues.
1547 The alternate addition chains built are re-processed by the main
1548 reassociation algorithm which allows optimizing a * x * y + b * y * x
1549 to (a + b ) * x * y in one invocation of the reassociation pass. */
1551 static bool
1552 undistribute_ops_list (enum tree_code opcode,
1553 vec<operand_entry *> *ops, struct loop *loop)
1555 unsigned int length = ops->length ();
1556 operand_entry *oe1;
1557 unsigned i, j;
1558 unsigned nr_candidates, nr_candidates2;
1559 sbitmap_iterator sbi0;
1560 vec<operand_entry *> *subops;
1561 bool changed = false;
1562 int next_oecount_id = 0;
1564 if (length <= 1
1565 || opcode != PLUS_EXPR)
1566 return false;
1568 /* Build a list of candidates to process. */
1569 auto_sbitmap candidates (length);
1570 bitmap_clear (candidates);
1571 nr_candidates = 0;
1572 FOR_EACH_VEC_ELT (*ops, i, oe1)
1574 enum tree_code dcode;
1575 gimple *oe1def;
1577 if (TREE_CODE (oe1->op) != SSA_NAME)
1578 continue;
1579 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1580 if (!is_gimple_assign (oe1def))
1581 continue;
1582 dcode = gimple_assign_rhs_code (oe1def);
1583 if ((dcode != MULT_EXPR
1584 && dcode != RDIV_EXPR)
1585 || !is_reassociable_op (oe1def, dcode, loop))
1586 continue;
1588 bitmap_set_bit (candidates, i);
1589 nr_candidates++;
1592 if (nr_candidates < 2)
1593 return false;
1595 if (dump_file && (dump_flags & TDF_DETAILS))
1597 fprintf (dump_file, "searching for un-distribute opportunities ");
1598 print_generic_expr (dump_file,
1599 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1600 fprintf (dump_file, " %d\n", nr_candidates);
1603 /* Build linearized sub-operand lists and the counting table. */
1604 cvec.create (0);
1606 hash_table<oecount_hasher> ctable (15);
1608 /* ??? Macro arguments cannot have multi-argument template types in
1609 them. This typedef is needed to workaround that limitation. */
1610 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1611 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1612 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1614 gimple *oedef;
1615 enum tree_code oecode;
1616 unsigned j;
1618 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1619 oecode = gimple_assign_rhs_code (oedef);
1620 linearize_expr_tree (&subops[i], oedef,
1621 associative_tree_code (oecode), false);
1623 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1625 oecount c;
1626 int *slot;
1627 int idx;
1628 c.oecode = oecode;
1629 c.cnt = 1;
1630 c.id = next_oecount_id++;
1631 c.op = oe1->op;
1632 cvec.safe_push (c);
1633 idx = cvec.length () + 41;
1634 slot = ctable.find_slot (idx, INSERT);
1635 if (!*slot)
1637 *slot = idx;
1639 else
1641 cvec.pop ();
1642 cvec[*slot - 42].cnt++;
1647 /* Sort the counting table. */
1648 cvec.qsort (oecount_cmp);
1650 if (dump_file && (dump_flags & TDF_DETAILS))
1652 oecount *c;
1653 fprintf (dump_file, "Candidates:\n");
1654 FOR_EACH_VEC_ELT (cvec, j, c)
1656 fprintf (dump_file, " %u %s: ", c->cnt,
1657 c->oecode == MULT_EXPR
1658 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1659 print_generic_expr (dump_file, c->op, 0);
1660 fprintf (dump_file, "\n");
1664 /* Process the (operand, code) pairs in order of most occurrence. */
1665 auto_sbitmap candidates2 (length);
1666 while (!cvec.is_empty ())
1668 oecount *c = &cvec.last ();
1669 if (c->cnt < 2)
1670 break;
1672 /* Now collect the operands in the outer chain that contain
1673 the common operand in their inner chain. */
1674 bitmap_clear (candidates2);
1675 nr_candidates2 = 0;
1676 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1678 gimple *oedef;
1679 enum tree_code oecode;
1680 unsigned j;
1681 tree op = (*ops)[i]->op;
1683 /* If we undistributed in this chain already this may be
1684 a constant. */
1685 if (TREE_CODE (op) != SSA_NAME)
1686 continue;
1688 oedef = SSA_NAME_DEF_STMT (op);
1689 oecode = gimple_assign_rhs_code (oedef);
1690 if (oecode != c->oecode)
1691 continue;
1693 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1695 if (oe1->op == c->op)
1697 bitmap_set_bit (candidates2, i);
1698 ++nr_candidates2;
1699 break;
1704 if (nr_candidates2 >= 2)
1706 operand_entry *oe1, *oe2;
1707 gimple *prod;
1708 int first = bitmap_first_set_bit (candidates2);
1710 /* Build the new addition chain. */
1711 oe1 = (*ops)[first];
1712 if (dump_file && (dump_flags & TDF_DETAILS))
1714 fprintf (dump_file, "Building (");
1715 print_generic_expr (dump_file, oe1->op, 0);
1717 zero_one_operation (&oe1->op, c->oecode, c->op);
1718 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1720 gimple *sum;
1721 oe2 = (*ops)[i];
1722 if (dump_file && (dump_flags & TDF_DETAILS))
1724 fprintf (dump_file, " + ");
1725 print_generic_expr (dump_file, oe2->op, 0);
1727 zero_one_operation (&oe2->op, c->oecode, c->op);
1728 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1729 oe1->op, oe2->op, opcode);
1730 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1731 oe2->rank = 0;
1732 oe1->op = gimple_get_lhs (sum);
1735 /* Apply the multiplication/division. */
1736 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1737 oe1->op, c->op, c->oecode);
1738 if (dump_file && (dump_flags & TDF_DETAILS))
1740 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1741 print_generic_expr (dump_file, c->op, 0);
1742 fprintf (dump_file, "\n");
1745 /* Record it in the addition chain and disable further
1746 undistribution with this op. */
1747 oe1->op = gimple_assign_lhs (prod);
1748 oe1->rank = get_rank (oe1->op);
1749 subops[first].release ();
1751 changed = true;
1754 cvec.pop ();
1757 for (i = 0; i < ops->length (); ++i)
1758 subops[i].release ();
1759 free (subops);
1760 cvec.release ();
1762 return changed;
1765 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1766 expression, examine the other OPS to see if any of them are comparisons
1767 of the same values, which we may be able to combine or eliminate.
1768 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1770 static bool
1771 eliminate_redundant_comparison (enum tree_code opcode,
1772 vec<operand_entry *> *ops,
1773 unsigned int currindex,
1774 operand_entry *curr)
1776 tree op1, op2;
1777 enum tree_code lcode, rcode;
1778 gimple *def1, *def2;
1779 int i;
1780 operand_entry *oe;
1782 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1783 return false;
1785 /* Check that CURR is a comparison. */
1786 if (TREE_CODE (curr->op) != SSA_NAME)
1787 return false;
1788 def1 = SSA_NAME_DEF_STMT (curr->op);
1789 if (!is_gimple_assign (def1))
1790 return false;
1791 lcode = gimple_assign_rhs_code (def1);
1792 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1793 return false;
1794 op1 = gimple_assign_rhs1 (def1);
1795 op2 = gimple_assign_rhs2 (def1);
1797 /* Now look for a similar comparison in the remaining OPS. */
1798 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1800 tree t;
1802 if (TREE_CODE (oe->op) != SSA_NAME)
1803 continue;
1804 def2 = SSA_NAME_DEF_STMT (oe->op);
1805 if (!is_gimple_assign (def2))
1806 continue;
1807 rcode = gimple_assign_rhs_code (def2);
1808 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1809 continue;
1811 /* If we got here, we have a match. See if we can combine the
1812 two comparisons. */
1813 if (opcode == BIT_IOR_EXPR)
1814 t = maybe_fold_or_comparisons (lcode, op1, op2,
1815 rcode, gimple_assign_rhs1 (def2),
1816 gimple_assign_rhs2 (def2));
1817 else
1818 t = maybe_fold_and_comparisons (lcode, op1, op2,
1819 rcode, gimple_assign_rhs1 (def2),
1820 gimple_assign_rhs2 (def2));
1821 if (!t)
1822 continue;
1824 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1825 always give us a boolean_type_node value back. If the original
1826 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1827 we need to convert. */
1828 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1829 t = fold_convert (TREE_TYPE (curr->op), t);
1831 if (TREE_CODE (t) != INTEGER_CST
1832 && !operand_equal_p (t, curr->op, 0))
1834 enum tree_code subcode;
1835 tree newop1, newop2;
1836 if (!COMPARISON_CLASS_P (t))
1837 continue;
1838 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1839 STRIP_USELESS_TYPE_CONVERSION (newop1);
1840 STRIP_USELESS_TYPE_CONVERSION (newop2);
1841 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1842 continue;
1845 if (dump_file && (dump_flags & TDF_DETAILS))
1847 fprintf (dump_file, "Equivalence: ");
1848 print_generic_expr (dump_file, curr->op, 0);
1849 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1850 print_generic_expr (dump_file, oe->op, 0);
1851 fprintf (dump_file, " -> ");
1852 print_generic_expr (dump_file, t, 0);
1853 fprintf (dump_file, "\n");
1856 /* Now we can delete oe, as it has been subsumed by the new combined
1857 expression t. */
1858 ops->ordered_remove (i);
1859 reassociate_stats.ops_eliminated ++;
1861 /* If t is the same as curr->op, we're done. Otherwise we must
1862 replace curr->op with t. Special case is if we got a constant
1863 back, in which case we add it to the end instead of in place of
1864 the current entry. */
1865 if (TREE_CODE (t) == INTEGER_CST)
1867 ops->ordered_remove (currindex);
1868 add_to_ops_vec (ops, t);
1870 else if (!operand_equal_p (t, curr->op, 0))
1872 gimple *sum;
1873 enum tree_code subcode;
1874 tree newop1;
1875 tree newop2;
1876 gcc_assert (COMPARISON_CLASS_P (t));
1877 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1878 STRIP_USELESS_TYPE_CONVERSION (newop1);
1879 STRIP_USELESS_TYPE_CONVERSION (newop2);
1880 gcc_checking_assert (is_gimple_val (newop1)
1881 && is_gimple_val (newop2));
1882 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1883 curr->op = gimple_get_lhs (sum);
1885 return true;
1888 return false;
1892 /* Transform repeated addition of same values into multiply with
1893 constant. */
1894 static bool
1895 transform_add_to_multiply (vec<operand_entry *> *ops)
1897 operand_entry *oe;
1898 tree op = NULL_TREE;
1899 int j;
1900 int i, start = -1, end = 0, count = 0;
1901 auto_vec<std::pair <int, int> > indxs;
1902 bool changed = false;
1904 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1905 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1906 || !flag_unsafe_math_optimizations))
1907 return false;
1909 /* Look for repeated operands. */
1910 FOR_EACH_VEC_ELT (*ops, i, oe)
1912 if (start == -1)
1914 count = 1;
1915 op = oe->op;
1916 start = i;
1918 else if (operand_equal_p (oe->op, op, 0))
1920 count++;
1921 end = i;
1923 else
1925 if (count > 1)
1926 indxs.safe_push (std::make_pair (start, end));
1927 count = 1;
1928 op = oe->op;
1929 start = i;
1933 if (count > 1)
1934 indxs.safe_push (std::make_pair (start, end));
1936 for (j = indxs.length () - 1; j >= 0; --j)
1938 /* Convert repeated operand addition to multiplication. */
1939 start = indxs[j].first;
1940 end = indxs[j].second;
1941 op = (*ops)[start]->op;
1942 count = end - start + 1;
1943 for (i = end; i >= start; --i)
1944 ops->unordered_remove (i);
1945 tree tmp = make_ssa_name (TREE_TYPE (op));
1946 tree cst = build_int_cst (integer_type_node, count);
1947 gassign *mul_stmt
1948 = gimple_build_assign (tmp, MULT_EXPR,
1949 op, fold_convert (TREE_TYPE (op), cst));
1950 gimple_set_visited (mul_stmt, true);
1951 add_to_ops_vec (ops, tmp, mul_stmt);
1952 changed = true;
1955 return changed;
1959 /* Perform various identities and other optimizations on the list of
1960 operand entries, stored in OPS. The tree code for the binary
1961 operation between all the operands is OPCODE. */
1963 static void
1964 optimize_ops_list (enum tree_code opcode,
1965 vec<operand_entry *> *ops)
1967 unsigned int length = ops->length ();
1968 unsigned int i;
1969 operand_entry *oe;
1970 operand_entry *oelast = NULL;
1971 bool iterate = false;
1973 if (length == 1)
1974 return;
1976 oelast = ops->last ();
1978 /* If the last two are constants, pop the constants off, merge them
1979 and try the next two. */
1980 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1982 operand_entry *oelm1 = (*ops)[length - 2];
1984 if (oelm1->rank == 0
1985 && is_gimple_min_invariant (oelm1->op)
1986 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1987 TREE_TYPE (oelast->op)))
1989 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1990 oelm1->op, oelast->op);
1992 if (folded && is_gimple_min_invariant (folded))
1994 if (dump_file && (dump_flags & TDF_DETAILS))
1995 fprintf (dump_file, "Merging constants\n");
1997 ops->pop ();
1998 ops->pop ();
2000 add_to_ops_vec (ops, folded);
2001 reassociate_stats.constants_eliminated++;
2003 optimize_ops_list (opcode, ops);
2004 return;
2009 eliminate_using_constants (opcode, ops);
2010 oelast = NULL;
2012 for (i = 0; ops->iterate (i, &oe);)
2014 bool done = false;
2016 if (eliminate_not_pairs (opcode, ops, i, oe))
2017 return;
2018 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2019 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2020 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2022 if (done)
2023 return;
2024 iterate = true;
2025 oelast = NULL;
2026 continue;
2028 oelast = oe;
2029 i++;
2032 length = ops->length ();
2033 oelast = ops->last ();
2035 if (iterate)
2036 optimize_ops_list (opcode, ops);
2039 /* The following functions are subroutines to optimize_range_tests and allow
2040 it to try to change a logical combination of comparisons into a range
2041 test.
2043 For example, both
2044 X == 2 || X == 5 || X == 3 || X == 4
2046 X >= 2 && X <= 5
2047 are converted to
2048 (unsigned) (X - 2) <= 3
2050 For more information see comments above fold_test_range in fold-const.c,
2051 this implementation is for GIMPLE. */
2053 struct range_entry
2055 tree exp;
2056 tree low;
2057 tree high;
2058 bool in_p;
2059 bool strict_overflow_p;
2060 unsigned int idx, next;
2063 /* This is similar to make_range in fold-const.c, but on top of
2064 GIMPLE instead of trees. If EXP is non-NULL, it should be
2065 an SSA_NAME and STMT argument is ignored, otherwise STMT
2066 argument should be a GIMPLE_COND. */
2068 static void
2069 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2071 int in_p;
2072 tree low, high;
2073 bool is_bool, strict_overflow_p;
2075 r->exp = NULL_TREE;
2076 r->in_p = false;
2077 r->strict_overflow_p = false;
2078 r->low = NULL_TREE;
2079 r->high = NULL_TREE;
2080 if (exp != NULL_TREE
2081 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2082 return;
2084 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2085 and see if we can refine the range. Some of the cases below may not
2086 happen, but it doesn't seem worth worrying about this. We "continue"
2087 the outer loop when we've changed something; otherwise we "break"
2088 the switch, which will "break" the while. */
2089 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2090 high = low;
2091 in_p = 0;
2092 strict_overflow_p = false;
2093 is_bool = false;
2094 if (exp == NULL_TREE)
2095 is_bool = true;
2096 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2098 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2099 is_bool = true;
2100 else
2101 return;
2103 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2104 is_bool = true;
2106 while (1)
2108 enum tree_code code;
2109 tree arg0, arg1, exp_type;
2110 tree nexp;
2111 location_t loc;
2113 if (exp != NULL_TREE)
2115 if (TREE_CODE (exp) != SSA_NAME
2116 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2117 break;
2119 stmt = SSA_NAME_DEF_STMT (exp);
2120 if (!is_gimple_assign (stmt))
2121 break;
2123 code = gimple_assign_rhs_code (stmt);
2124 arg0 = gimple_assign_rhs1 (stmt);
2125 arg1 = gimple_assign_rhs2 (stmt);
2126 exp_type = TREE_TYPE (exp);
2128 else
2130 code = gimple_cond_code (stmt);
2131 arg0 = gimple_cond_lhs (stmt);
2132 arg1 = gimple_cond_rhs (stmt);
2133 exp_type = boolean_type_node;
2136 if (TREE_CODE (arg0) != SSA_NAME)
2137 break;
2138 loc = gimple_location (stmt);
2139 switch (code)
2141 case BIT_NOT_EXPR:
2142 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2143 /* Ensure the range is either +[-,0], +[0,0],
2144 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2145 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2146 or similar expression of unconditional true or
2147 false, it should not be negated. */
2148 && ((high && integer_zerop (high))
2149 || (low && integer_onep (low))))
2151 in_p = !in_p;
2152 exp = arg0;
2153 continue;
2155 break;
2156 case SSA_NAME:
2157 exp = arg0;
2158 continue;
2159 CASE_CONVERT:
2160 if (is_bool)
2161 goto do_default;
2162 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2164 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2165 is_bool = true;
2166 else
2167 return;
2169 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2170 is_bool = true;
2171 goto do_default;
2172 case EQ_EXPR:
2173 case NE_EXPR:
2174 case LT_EXPR:
2175 case LE_EXPR:
2176 case GE_EXPR:
2177 case GT_EXPR:
2178 is_bool = true;
2179 /* FALLTHRU */
2180 default:
2181 if (!is_bool)
2182 return;
2183 do_default:
2184 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2185 &low, &high, &in_p,
2186 &strict_overflow_p);
2187 if (nexp != NULL_TREE)
2189 exp = nexp;
2190 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2191 continue;
2193 break;
2195 break;
2197 if (is_bool)
2199 r->exp = exp;
2200 r->in_p = in_p;
2201 r->low = low;
2202 r->high = high;
2203 r->strict_overflow_p = strict_overflow_p;
2207 /* Comparison function for qsort. Sort entries
2208 without SSA_NAME exp first, then with SSA_NAMEs sorted
2209 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2210 by increasing ->low and if ->low is the same, by increasing
2211 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2212 maximum. */
2214 static int
2215 range_entry_cmp (const void *a, const void *b)
2217 const struct range_entry *p = (const struct range_entry *) a;
2218 const struct range_entry *q = (const struct range_entry *) b;
2220 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2222 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2224 /* Group range_entries for the same SSA_NAME together. */
2225 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2226 return -1;
2227 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2228 return 1;
2229 /* If ->low is different, NULL low goes first, then by
2230 ascending low. */
2231 if (p->low != NULL_TREE)
2233 if (q->low != NULL_TREE)
2235 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2236 p->low, q->low);
2237 if (tem && integer_onep (tem))
2238 return -1;
2239 tem = fold_binary (GT_EXPR, boolean_type_node,
2240 p->low, q->low);
2241 if (tem && integer_onep (tem))
2242 return 1;
2244 else
2245 return 1;
2247 else if (q->low != NULL_TREE)
2248 return -1;
2249 /* If ->high is different, NULL high goes last, before that by
2250 ascending high. */
2251 if (p->high != NULL_TREE)
2253 if (q->high != NULL_TREE)
2255 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2256 p->high, q->high);
2257 if (tem && integer_onep (tem))
2258 return -1;
2259 tem = fold_binary (GT_EXPR, boolean_type_node,
2260 p->high, q->high);
2261 if (tem && integer_onep (tem))
2262 return 1;
2264 else
2265 return -1;
2267 else if (q->high != NULL_TREE)
2268 return 1;
2269 /* If both ranges are the same, sort below by ascending idx. */
2271 else
2272 return 1;
2274 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2275 return -1;
2277 if (p->idx < q->idx)
2278 return -1;
2279 else
2281 gcc_checking_assert (p->idx > q->idx);
2282 return 1;
2286 /* Helper routine of optimize_range_test.
2287 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2288 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2289 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2290 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2291 an array of COUNT pointers to other ranges. Return
2292 true if the range merge has been successful.
2293 If OPCODE is ERROR_MARK, this is called from within
2294 maybe_optimize_range_tests and is performing inter-bb range optimization.
2295 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2296 oe->rank. */
2298 static bool
2299 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2300 struct range_entry **otherrangep,
2301 unsigned int count, enum tree_code opcode,
2302 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2303 bool in_p, tree low, tree high, bool strict_overflow_p)
2305 operand_entry *oe = (*ops)[range->idx];
2306 tree op = oe->op;
2307 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2308 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2309 location_t loc = gimple_location (stmt);
2310 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2311 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2312 in_p, low, high);
2313 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2314 gimple_stmt_iterator gsi;
2315 unsigned int i, uid;
2317 if (tem == NULL_TREE)
2318 return false;
2320 /* If op is default def SSA_NAME, there is no place to insert the
2321 new comparison. Give up, unless we can use OP itself as the
2322 range test. */
2323 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2325 if (op == range->exp
2326 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2327 || TREE_CODE (optype) == BOOLEAN_TYPE)
2328 && (op == tem
2329 || (TREE_CODE (tem) == EQ_EXPR
2330 && TREE_OPERAND (tem, 0) == op
2331 && integer_onep (TREE_OPERAND (tem, 1))))
2332 && opcode != BIT_IOR_EXPR
2333 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2335 stmt = NULL;
2336 tem = op;
2338 else
2339 return false;
2342 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2343 warning_at (loc, OPT_Wstrict_overflow,
2344 "assuming signed overflow does not occur "
2345 "when simplifying range test");
2347 if (dump_file && (dump_flags & TDF_DETAILS))
2349 struct range_entry *r;
2350 fprintf (dump_file, "Optimizing range tests ");
2351 print_generic_expr (dump_file, range->exp, 0);
2352 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2353 print_generic_expr (dump_file, range->low, 0);
2354 fprintf (dump_file, ", ");
2355 print_generic_expr (dump_file, range->high, 0);
2356 fprintf (dump_file, "]");
2357 for (i = 0; i < count; i++)
2359 if (otherrange)
2360 r = otherrange + i;
2361 else
2362 r = otherrangep[i];
2363 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2364 print_generic_expr (dump_file, r->low, 0);
2365 fprintf (dump_file, ", ");
2366 print_generic_expr (dump_file, r->high, 0);
2367 fprintf (dump_file, "]");
2369 fprintf (dump_file, "\n into ");
2370 print_generic_expr (dump_file, tem, 0);
2371 fprintf (dump_file, "\n");
2374 if (opcode == BIT_IOR_EXPR
2375 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2376 tem = invert_truthvalue_loc (loc, tem);
2378 tem = fold_convert_loc (loc, optype, tem);
2379 if (stmt)
2381 gsi = gsi_for_stmt (stmt);
2382 uid = gimple_uid (stmt);
2384 else
2386 gsi = gsi_none ();
2387 uid = 0;
2389 if (stmt == NULL)
2390 gcc_checking_assert (tem == op);
2391 /* In rare cases range->exp can be equal to lhs of stmt.
2392 In that case we have to insert after the stmt rather then before
2393 it. If stmt is a PHI, insert it at the start of the basic block. */
2394 else if (op != range->exp)
2396 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2397 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2398 GSI_SAME_STMT);
2399 gsi_prev (&gsi);
2401 else if (gimple_code (stmt) != GIMPLE_PHI)
2403 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2404 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2405 GSI_CONTINUE_LINKING);
2407 else
2409 gsi = gsi_after_labels (gimple_bb (stmt));
2410 if (!gsi_end_p (gsi))
2411 uid = gimple_uid (gsi_stmt (gsi));
2412 else
2414 gsi = gsi_start_bb (gimple_bb (stmt));
2415 uid = 1;
2416 while (!gsi_end_p (gsi))
2418 uid = gimple_uid (gsi_stmt (gsi));
2419 gsi_next (&gsi);
2422 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2423 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2424 GSI_SAME_STMT);
2425 if (gsi_end_p (gsi))
2426 gsi = gsi_last_bb (gimple_bb (stmt));
2427 else
2428 gsi_prev (&gsi);
2430 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2431 if (gimple_uid (gsi_stmt (gsi)))
2432 break;
2433 else
2434 gimple_set_uid (gsi_stmt (gsi), uid);
2436 oe->op = tem;
2437 range->exp = exp;
2438 range->low = low;
2439 range->high = high;
2440 range->in_p = in_p;
2441 range->strict_overflow_p = false;
2443 for (i = 0; i < count; i++)
2445 if (otherrange)
2446 range = otherrange + i;
2447 else
2448 range = otherrangep[i];
2449 oe = (*ops)[range->idx];
2450 /* Now change all the other range test immediate uses, so that
2451 those tests will be optimized away. */
2452 if (opcode == ERROR_MARK)
2454 if (oe->op)
2455 oe->op = build_int_cst (TREE_TYPE (oe->op),
2456 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2457 else
2458 oe->op = (oe->rank == BIT_IOR_EXPR
2459 ? boolean_false_node : boolean_true_node);
2461 else
2462 oe->op = error_mark_node;
2463 range->exp = NULL_TREE;
2464 range->low = NULL_TREE;
2465 range->high = NULL_TREE;
2467 return true;
2470 /* Optimize X == CST1 || X == CST2
2471 if popcount (CST1 ^ CST2) == 1 into
2472 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2473 Similarly for ranges. E.g.
2474 X != 2 && X != 3 && X != 10 && X != 11
2475 will be transformed by the previous optimization into
2476 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2477 and this loop can transform that into
2478 !(((X & ~8) - 2U) <= 1U). */
2480 static bool
2481 optimize_range_tests_xor (enum tree_code opcode, tree type,
2482 tree lowi, tree lowj, tree highi, tree highj,
2483 vec<operand_entry *> *ops,
2484 struct range_entry *rangei,
2485 struct range_entry *rangej)
2487 tree lowxor, highxor, tem, exp;
2488 /* Check lowi ^ lowj == highi ^ highj and
2489 popcount (lowi ^ lowj) == 1. */
2490 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2491 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2492 return false;
2493 if (!integer_pow2p (lowxor))
2494 return false;
2495 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2496 if (!tree_int_cst_equal (lowxor, highxor))
2497 return false;
2499 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2500 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2501 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2502 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2503 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2504 NULL, rangei->in_p, lowj, highj,
2505 rangei->strict_overflow_p
2506 || rangej->strict_overflow_p))
2507 return true;
2508 return false;
2511 /* Optimize X == CST1 || X == CST2
2512 if popcount (CST2 - CST1) == 1 into
2513 ((X - CST1) & ~(CST2 - CST1)) == 0.
2514 Similarly for ranges. E.g.
2515 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2516 || X == 75 || X == 45
2517 will be transformed by the previous optimization into
2518 (X - 43U) <= 3U || (X - 75U) <= 3U
2519 and this loop can transform that into
2520 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2521 static bool
2522 optimize_range_tests_diff (enum tree_code opcode, tree type,
2523 tree lowi, tree lowj, tree highi, tree highj,
2524 vec<operand_entry *> *ops,
2525 struct range_entry *rangei,
2526 struct range_entry *rangej)
2528 tree tem1, tem2, mask;
2529 /* Check highi - lowi == highj - lowj. */
2530 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2531 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2532 return false;
2533 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2534 if (!tree_int_cst_equal (tem1, tem2))
2535 return false;
2536 /* Check popcount (lowj - lowi) == 1. */
2537 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2538 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2539 return false;
2540 if (!integer_pow2p (tem1))
2541 return false;
2543 type = unsigned_type_for (type);
2544 tem1 = fold_convert (type, tem1);
2545 tem2 = fold_convert (type, tem2);
2546 lowi = fold_convert (type, lowi);
2547 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2548 tem1 = fold_binary (MINUS_EXPR, type,
2549 fold_convert (type, rangei->exp), lowi);
2550 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2551 lowj = build_int_cst (type, 0);
2552 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2553 NULL, rangei->in_p, lowj, tem2,
2554 rangei->strict_overflow_p
2555 || rangej->strict_overflow_p))
2556 return true;
2557 return false;
2560 /* It does some common checks for function optimize_range_tests_xor and
2561 optimize_range_tests_diff.
2562 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2563 Else it calls optimize_range_tests_diff. */
2565 static bool
2566 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2567 bool optimize_xor, vec<operand_entry *> *ops,
2568 struct range_entry *ranges)
2570 int i, j;
2571 bool any_changes = false;
2572 for (i = first; i < length; i++)
2574 tree lowi, highi, lowj, highj, type, tem;
2576 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2577 continue;
2578 type = TREE_TYPE (ranges[i].exp);
2579 if (!INTEGRAL_TYPE_P (type))
2580 continue;
2581 lowi = ranges[i].low;
2582 if (lowi == NULL_TREE)
2583 lowi = TYPE_MIN_VALUE (type);
2584 highi = ranges[i].high;
2585 if (highi == NULL_TREE)
2586 continue;
2587 for (j = i + 1; j < length && j < i + 64; j++)
2589 bool changes;
2590 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2591 continue;
2592 lowj = ranges[j].low;
2593 if (lowj == NULL_TREE)
2594 continue;
2595 highj = ranges[j].high;
2596 if (highj == NULL_TREE)
2597 highj = TYPE_MAX_VALUE (type);
2598 /* Check lowj > highi. */
2599 tem = fold_binary (GT_EXPR, boolean_type_node,
2600 lowj, highi);
2601 if (tem == NULL_TREE || !integer_onep (tem))
2602 continue;
2603 if (optimize_xor)
2604 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2605 highi, highj, ops,
2606 ranges + i, ranges + j);
2607 else
2608 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2609 highi, highj, ops,
2610 ranges + i, ranges + j);
2611 if (changes)
2613 any_changes = true;
2614 break;
2618 return any_changes;
2621 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2622 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2623 EXP on success, NULL otherwise. */
2625 static tree
2626 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2627 wide_int *mask, tree *totallowp)
2629 tree tem = int_const_binop (MINUS_EXPR, high, low);
2630 if (tem == NULL_TREE
2631 || TREE_CODE (tem) != INTEGER_CST
2632 || TREE_OVERFLOW (tem)
2633 || tree_int_cst_sgn (tem) == -1
2634 || compare_tree_int (tem, prec) != -1)
2635 return NULL_TREE;
2637 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2638 *mask = wi::shifted_mask (0, max, false, prec);
2639 if (TREE_CODE (exp) == BIT_AND_EXPR
2640 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2642 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2643 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2644 if (wi::popcount (msk) == 1
2645 && wi::ltu_p (msk, prec - max))
2647 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2648 max += msk.to_uhwi ();
2649 exp = TREE_OPERAND (exp, 0);
2650 if (integer_zerop (low)
2651 && TREE_CODE (exp) == PLUS_EXPR
2652 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2654 tree ret = TREE_OPERAND (exp, 0);
2655 STRIP_NOPS (ret);
2656 widest_int bias
2657 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2658 TYPE_PRECISION (TREE_TYPE (low))));
2659 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2660 if (totallowp)
2662 *totallowp = tbias;
2663 return ret;
2665 else if (!tree_int_cst_lt (totallow, tbias))
2666 return NULL_TREE;
2667 bias = wi::to_widest (tbias);
2668 bias -= wi::to_widest (totallow);
2669 if (bias >= 0 && bias < prec - max)
2671 *mask = wi::lshift (*mask, bias);
2672 return ret;
2677 if (totallowp)
2678 return exp;
2679 if (!tree_int_cst_lt (totallow, low))
2680 return exp;
2681 tem = int_const_binop (MINUS_EXPR, low, totallow);
2682 if (tem == NULL_TREE
2683 || TREE_CODE (tem) != INTEGER_CST
2684 || TREE_OVERFLOW (tem)
2685 || compare_tree_int (tem, prec - max) == 1)
2686 return NULL_TREE;
2688 *mask = wi::lshift (*mask, wi::to_widest (tem));
2689 return exp;
2692 /* Attempt to optimize small range tests using bit test.
2693 E.g.
2694 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2695 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2696 has been by earlier optimizations optimized into:
2697 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2698 As all the 43 through 82 range is less than 64 numbers,
2699 for 64-bit word targets optimize that into:
2700 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2702 static bool
2703 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2704 vec<operand_entry *> *ops,
2705 struct range_entry *ranges)
2707 int i, j;
2708 bool any_changes = false;
2709 int prec = GET_MODE_BITSIZE (word_mode);
2710 auto_vec<struct range_entry *, 64> candidates;
2712 for (i = first; i < length - 2; i++)
2714 tree lowi, highi, lowj, highj, type;
2716 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2717 continue;
2718 type = TREE_TYPE (ranges[i].exp);
2719 if (!INTEGRAL_TYPE_P (type))
2720 continue;
2721 lowi = ranges[i].low;
2722 if (lowi == NULL_TREE)
2723 lowi = TYPE_MIN_VALUE (type);
2724 highi = ranges[i].high;
2725 if (highi == NULL_TREE)
2726 continue;
2727 wide_int mask;
2728 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2729 highi, &mask, &lowi);
2730 if (exp == NULL_TREE)
2731 continue;
2732 bool strict_overflow_p = ranges[i].strict_overflow_p;
2733 candidates.truncate (0);
2734 int end = MIN (i + 64, length);
2735 for (j = i + 1; j < end; j++)
2737 tree exp2;
2738 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2739 continue;
2740 if (ranges[j].exp == exp)
2742 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2744 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2745 if (exp2 == exp)
2747 else if (TREE_CODE (exp2) == PLUS_EXPR)
2749 exp2 = TREE_OPERAND (exp2, 0);
2750 STRIP_NOPS (exp2);
2751 if (exp2 != exp)
2752 continue;
2754 else
2755 continue;
2757 else
2758 continue;
2759 lowj = ranges[j].low;
2760 if (lowj == NULL_TREE)
2761 continue;
2762 highj = ranges[j].high;
2763 if (highj == NULL_TREE)
2764 highj = TYPE_MAX_VALUE (type);
2765 wide_int mask2;
2766 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2767 highj, &mask2, NULL);
2768 if (exp2 != exp)
2769 continue;
2770 mask |= mask2;
2771 strict_overflow_p |= ranges[j].strict_overflow_p;
2772 candidates.safe_push (&ranges[j]);
2775 /* If we need otherwise 3 or more comparisons, use a bit test. */
2776 if (candidates.length () >= 2)
2778 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2779 wi::to_widest (lowi)
2780 + prec - 1 - wi::clz (mask));
2781 operand_entry *oe = (*ops)[ranges[i].idx];
2782 tree op = oe->op;
2783 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2784 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2785 location_t loc = gimple_location (stmt);
2786 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2788 /* See if it isn't cheaper to pretend the minimum value of the
2789 range is 0, if maximum value is small enough.
2790 We can avoid then subtraction of the minimum value, but the
2791 mask constant could be perhaps more expensive. */
2792 if (compare_tree_int (lowi, 0) > 0
2793 && compare_tree_int (high, prec) < 0)
2795 int cost_diff;
2796 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2797 rtx reg = gen_raw_REG (word_mode, 10000);
2798 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2799 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2800 GEN_INT (-m)), speed_p);
2801 rtx r = immed_wide_int_const (mask, word_mode);
2802 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2803 word_mode, speed_p);
2804 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2805 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2806 word_mode, speed_p);
2807 if (cost_diff > 0)
2809 mask = wi::lshift (mask, m);
2810 lowi = build_zero_cst (TREE_TYPE (lowi));
2814 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2815 false, lowi, high);
2816 if (tem == NULL_TREE || is_gimple_val (tem))
2817 continue;
2818 tree etype = unsigned_type_for (TREE_TYPE (exp));
2819 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2820 fold_convert_loc (loc, etype, exp),
2821 fold_convert_loc (loc, etype, lowi));
2822 exp = fold_convert_loc (loc, integer_type_node, exp);
2823 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2824 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2825 build_int_cst (word_type, 1), exp);
2826 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2827 wide_int_to_tree (word_type, mask));
2828 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2829 build_zero_cst (word_type));
2830 if (is_gimple_val (exp))
2831 continue;
2833 /* The shift might have undefined behavior if TEM is true,
2834 but reassociate_bb isn't prepared to have basic blocks
2835 split when it is running. So, temporarily emit a code
2836 with BIT_IOR_EXPR instead of &&, and fix it up in
2837 branch_fixup. */
2838 gimple_seq seq;
2839 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2840 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2841 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2842 gimple_seq seq2;
2843 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2844 gimple_seq_add_seq_without_update (&seq, seq2);
2845 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2846 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2847 gimple *g = gimple_build_assign (make_ssa_name (optype),
2848 BIT_IOR_EXPR, tem, exp);
2849 gimple_set_location (g, loc);
2850 gimple_seq_add_stmt_without_update (&seq, g);
2851 exp = gimple_assign_lhs (g);
2852 tree val = build_zero_cst (optype);
2853 if (update_range_test (&ranges[i], NULL, candidates.address (),
2854 candidates.length (), opcode, ops, exp,
2855 seq, false, val, val, strict_overflow_p))
2857 any_changes = true;
2858 reassoc_branch_fixups.safe_push (tem);
2860 else
2861 gimple_seq_discard (seq);
2864 return any_changes;
2867 /* Attempt to optimize for signed a and b where b is known to be >= 0:
2868 a >= 0 && a < b into (unsigned) a < (unsigned) b
2869 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
2871 static bool
2872 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
2873 vec<operand_entry *> *ops,
2874 struct range_entry *ranges)
2876 int i;
2877 bool any_changes = false;
2878 hash_map<tree, int> *map = NULL;
2880 for (i = first; i < length; i++)
2882 if (ranges[i].exp == NULL_TREE
2883 || TREE_CODE (ranges[i].exp) != SSA_NAME
2884 || !ranges[i].in_p)
2885 continue;
2887 tree type = TREE_TYPE (ranges[i].exp);
2888 if (!INTEGRAL_TYPE_P (type)
2889 || TYPE_UNSIGNED (type)
2890 || ranges[i].low == NULL_TREE
2891 || !integer_zerop (ranges[i].low)
2892 || ranges[i].high != NULL_TREE)
2893 continue;
2894 /* EXP >= 0 here. */
2895 if (map == NULL)
2896 map = new hash_map <tree, int>;
2897 map->put (ranges[i].exp, i);
2900 if (map == NULL)
2901 return false;
2903 for (i = 0; i < length; i++)
2905 if (ranges[i].low == NULL_TREE
2906 || ranges[i].high == NULL_TREE
2907 || !integer_zerop (ranges[i].low)
2908 || !integer_zerop (ranges[i].high))
2909 continue;
2911 gimple *stmt;
2912 tree_code ccode;
2913 tree rhs1, rhs2;
2914 if (ranges[i].exp)
2916 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
2917 continue;
2918 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
2919 if (!is_gimple_assign (stmt))
2920 continue;
2921 ccode = gimple_assign_rhs_code (stmt);
2922 rhs1 = gimple_assign_rhs1 (stmt);
2923 rhs2 = gimple_assign_rhs2 (stmt);
2925 else
2927 operand_entry *oe = (*ops)[ranges[i].idx];
2928 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2929 if (gimple_code (stmt) != GIMPLE_COND)
2930 continue;
2931 ccode = gimple_cond_code (stmt);
2932 rhs1 = gimple_cond_lhs (stmt);
2933 rhs2 = gimple_cond_rhs (stmt);
2936 if (TREE_CODE (rhs1) != SSA_NAME
2937 || rhs2 == NULL_TREE
2938 || TREE_CODE (rhs2) != SSA_NAME)
2939 continue;
2941 switch (ccode)
2943 case GT_EXPR:
2944 case GE_EXPR:
2945 if (!ranges[i].in_p)
2946 std::swap (rhs1, rhs2);
2947 ccode = swap_tree_comparison (ccode);
2948 break;
2949 case LT_EXPR:
2950 case LE_EXPR:
2951 if (ranges[i].in_p)
2952 std::swap (rhs1, rhs2);
2953 break;
2954 default:
2955 continue;
2958 int *idx = map->get (rhs1);
2959 if (idx == NULL)
2960 continue;
2962 wide_int nz = get_nonzero_bits (rhs2);
2963 if (wi::neg_p (nz))
2964 continue;
2966 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
2967 and RHS2 is known to be RHS2 >= 0. */
2968 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
2970 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2971 if ((ranges[*idx].strict_overflow_p
2972 || ranges[i].strict_overflow_p)
2973 && issue_strict_overflow_warning (wc))
2974 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
2975 "assuming signed overflow does not occur "
2976 "when simplifying range test");
2978 if (dump_file && (dump_flags & TDF_DETAILS))
2980 struct range_entry *r = &ranges[*idx];
2981 fprintf (dump_file, "Optimizing range test ");
2982 print_generic_expr (dump_file, r->exp, 0);
2983 fprintf (dump_file, " +[");
2984 print_generic_expr (dump_file, r->low, 0);
2985 fprintf (dump_file, ", ");
2986 print_generic_expr (dump_file, r->high, 0);
2987 fprintf (dump_file, "] and comparison ");
2988 print_generic_expr (dump_file, rhs1, 0);
2989 fprintf (dump_file, " %s ", op_symbol_code (ccode));
2990 print_generic_expr (dump_file, rhs2, 0);
2991 fprintf (dump_file, "\n into (");
2992 print_generic_expr (dump_file, utype, 0);
2993 fprintf (dump_file, ") ");
2994 print_generic_expr (dump_file, rhs1, 0);
2995 fprintf (dump_file, " %s (", op_symbol_code (ccode));
2996 print_generic_expr (dump_file, utype, 0);
2997 fprintf (dump_file, ") ");
2998 print_generic_expr (dump_file, rhs2, 0);
2999 fprintf (dump_file, "\n");
3002 if (ranges[i].in_p)
3003 std::swap (rhs1, rhs2);
3005 unsigned int uid = gimple_uid (stmt);
3006 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3007 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3008 gimple_set_uid (g, uid);
3009 rhs1 = gimple_assign_lhs (g);
3010 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3011 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3012 gimple_set_uid (g, uid);
3013 rhs2 = gimple_assign_lhs (g);
3014 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3015 if (tree_swap_operands_p (rhs1, rhs2))
3017 std::swap (rhs1, rhs2);
3018 ccode = swap_tree_comparison (ccode);
3020 if (gimple_code (stmt) == GIMPLE_COND)
3022 gcond *c = as_a <gcond *> (stmt);
3023 gimple_cond_set_code (c, ccode);
3024 gimple_cond_set_lhs (c, rhs1);
3025 gimple_cond_set_rhs (c, rhs2);
3026 update_stmt (stmt);
3028 else
3030 operand_entry *oe = (*ops)[ranges[i].idx];
3031 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3032 if (!INTEGRAL_TYPE_P (ctype)
3033 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3034 && TYPE_PRECISION (ctype) != 1))
3035 ctype = boolean_type_node;
3036 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3037 gimple_set_uid (g, uid);
3038 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3039 if (oe->op && ctype != TREE_TYPE (oe->op))
3041 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3042 NOP_EXPR, gimple_assign_lhs (g));
3043 gimple_set_uid (g, uid);
3044 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3046 ranges[i].exp = gimple_assign_lhs (g);
3047 oe->op = ranges[i].exp;
3048 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3049 ranges[i].high = ranges[i].low;
3051 ranges[i].strict_overflow_p = false;
3052 operand_entry *oe = (*ops)[ranges[*idx].idx];
3053 /* Now change all the other range test immediate uses, so that
3054 those tests will be optimized away. */
3055 if (opcode == ERROR_MARK)
3057 if (oe->op)
3058 oe->op = build_int_cst (TREE_TYPE (oe->op),
3059 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3060 else
3061 oe->op = (oe->rank == BIT_IOR_EXPR
3062 ? boolean_false_node : boolean_true_node);
3064 else
3065 oe->op = error_mark_node;
3066 ranges[*idx].exp = NULL_TREE;
3067 ranges[*idx].low = NULL_TREE;
3068 ranges[*idx].high = NULL_TREE;
3069 any_changes = true;
3072 delete map;
3073 return any_changes;
3076 /* Optimize range tests, similarly how fold_range_test optimizes
3077 it on trees. The tree code for the binary
3078 operation between all the operands is OPCODE.
3079 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3080 maybe_optimize_range_tests for inter-bb range optimization.
3081 In that case if oe->op is NULL, oe->id is bb->index whose
3082 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3083 the actual opcode. */
3085 static bool
3086 optimize_range_tests (enum tree_code opcode,
3087 vec<operand_entry *> *ops)
3089 unsigned int length = ops->length (), i, j, first;
3090 operand_entry *oe;
3091 struct range_entry *ranges;
3092 bool any_changes = false;
3094 if (length == 1)
3095 return false;
3097 ranges = XNEWVEC (struct range_entry, length);
3098 for (i = 0; i < length; i++)
3100 oe = (*ops)[i];
3101 ranges[i].idx = i;
3102 init_range_entry (ranges + i, oe->op,
3103 oe->op
3104 ? NULL
3105 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3106 /* For | invert it now, we will invert it again before emitting
3107 the optimized expression. */
3108 if (opcode == BIT_IOR_EXPR
3109 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3110 ranges[i].in_p = !ranges[i].in_p;
3113 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3114 for (i = 0; i < length; i++)
3115 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3116 break;
3118 /* Try to merge ranges. */
3119 for (first = i; i < length; i++)
3121 tree low = ranges[i].low;
3122 tree high = ranges[i].high;
3123 int in_p = ranges[i].in_p;
3124 bool strict_overflow_p = ranges[i].strict_overflow_p;
3125 int update_fail_count = 0;
3127 for (j = i + 1; j < length; j++)
3129 if (ranges[i].exp != ranges[j].exp)
3130 break;
3131 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3132 ranges[j].in_p, ranges[j].low, ranges[j].high))
3133 break;
3134 strict_overflow_p |= ranges[j].strict_overflow_p;
3137 if (j == i + 1)
3138 continue;
3140 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3141 opcode, ops, ranges[i].exp, NULL, in_p,
3142 low, high, strict_overflow_p))
3144 i = j - 1;
3145 any_changes = true;
3147 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3148 while update_range_test would fail. */
3149 else if (update_fail_count == 64)
3150 i = j - 1;
3151 else
3152 ++update_fail_count;
3155 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3156 ops, ranges);
3158 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3159 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3160 ops, ranges);
3161 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3162 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3163 ops, ranges);
3164 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3165 ranges);
3167 if (any_changes && opcode != ERROR_MARK)
3169 j = 0;
3170 FOR_EACH_VEC_ELT (*ops, i, oe)
3172 if (oe->op == error_mark_node)
3173 continue;
3174 else if (i != j)
3175 (*ops)[j] = oe;
3176 j++;
3178 ops->truncate (j);
3181 XDELETEVEC (ranges);
3182 return any_changes;
3185 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3186 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3187 otherwise the comparison code. */
3189 static tree_code
3190 ovce_extract_ops (tree var, gassign **rets, bool *reti)
3192 if (TREE_CODE (var) != SSA_NAME)
3193 return ERROR_MARK;
3195 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3196 if (stmt == NULL)
3197 return ERROR_MARK;
3199 /* ??? If we start creating more COND_EXPR, we could perform
3200 this same optimization with them. For now, simplify. */
3201 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3202 return ERROR_MARK;
3204 tree cond = gimple_assign_rhs1 (stmt);
3205 tree_code cmp = TREE_CODE (cond);
3206 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3207 return ERROR_MARK;
3209 /* ??? For now, allow only canonical true and false result vectors.
3210 We could expand this to other constants should the need arise,
3211 but at the moment we don't create them. */
3212 tree t = gimple_assign_rhs2 (stmt);
3213 tree f = gimple_assign_rhs3 (stmt);
3214 bool inv;
3215 if (integer_all_onesp (t))
3216 inv = false;
3217 else if (integer_all_onesp (f))
3219 cmp = invert_tree_comparison (cmp, false);
3220 inv = true;
3222 else
3223 return ERROR_MARK;
3224 if (!integer_zerop (f))
3225 return ERROR_MARK;
3227 /* Success! */
3228 if (rets)
3229 *rets = stmt;
3230 if (reti)
3231 *reti = inv;
3232 return cmp;
3235 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3236 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3238 static bool
3239 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3241 unsigned int length = ops->length (), i, j;
3242 bool any_changes = false;
3244 if (length == 1)
3245 return false;
3247 for (i = 0; i < length; ++i)
3249 tree elt0 = (*ops)[i]->op;
3251 gassign *stmt0;
3252 bool invert;
3253 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3254 if (cmp0 == ERROR_MARK)
3255 continue;
3257 for (j = i + 1; j < length; ++j)
3259 tree &elt1 = (*ops)[j]->op;
3261 gassign *stmt1;
3262 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3263 if (cmp1 == ERROR_MARK)
3264 continue;
3266 tree cond0 = gimple_assign_rhs1 (stmt0);
3267 tree x0 = TREE_OPERAND (cond0, 0);
3268 tree y0 = TREE_OPERAND (cond0, 1);
3270 tree cond1 = gimple_assign_rhs1 (stmt1);
3271 tree x1 = TREE_OPERAND (cond1, 0);
3272 tree y1 = TREE_OPERAND (cond1, 1);
3274 tree comb;
3275 if (opcode == BIT_AND_EXPR)
3276 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3277 else if (opcode == BIT_IOR_EXPR)
3278 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3279 else
3280 gcc_unreachable ();
3281 if (comb == NULL)
3282 continue;
3284 /* Success! */
3285 if (dump_file && (dump_flags & TDF_DETAILS))
3287 fprintf (dump_file, "Transforming ");
3288 print_generic_expr (dump_file, cond0, 0);
3289 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3290 print_generic_expr (dump_file, cond1, 0);
3291 fprintf (dump_file, " into ");
3292 print_generic_expr (dump_file, comb, 0);
3293 fputc ('\n', dump_file);
3296 gimple_assign_set_rhs1 (stmt0, comb);
3297 if (invert)
3298 std::swap (*gimple_assign_rhs2_ptr (stmt0),
3299 *gimple_assign_rhs3_ptr (stmt0));
3300 update_stmt (stmt0);
3302 elt1 = error_mark_node;
3303 any_changes = true;
3307 if (any_changes)
3309 operand_entry *oe;
3310 j = 0;
3311 FOR_EACH_VEC_ELT (*ops, i, oe)
3313 if (oe->op == error_mark_node)
3314 continue;
3315 else if (i != j)
3316 (*ops)[j] = oe;
3317 j++;
3319 ops->truncate (j);
3322 return any_changes;
3325 /* Return true if STMT is a cast like:
3326 <bb N>:
3328 _123 = (int) _234;
3330 <bb M>:
3331 # _345 = PHI <_123(N), 1(...), 1(...)>
3332 where _234 has bool type, _123 has single use and
3333 bb N has a single successor M. This is commonly used in
3334 the last block of a range test.
3336 Also Return true if STMT is tcc_compare like:
3337 <bb N>:
3339 _234 = a_2(D) == 2;
3341 <bb M>:
3342 # _345 = PHI <_234(N), 1(...), 1(...)>
3343 _346 = (int) _345;
3344 where _234 has booltype, single use and
3345 bb N has a single successor M. This is commonly used in
3346 the last block of a range test. */
3348 static bool
3349 final_range_test_p (gimple *stmt)
3351 basic_block bb, rhs_bb, lhs_bb;
3352 edge e;
3353 tree lhs, rhs;
3354 use_operand_p use_p;
3355 gimple *use_stmt;
3357 if (!gimple_assign_cast_p (stmt)
3358 && (!is_gimple_assign (stmt)
3359 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3360 != tcc_comparison)))
3361 return false;
3362 bb = gimple_bb (stmt);
3363 if (!single_succ_p (bb))
3364 return false;
3365 e = single_succ_edge (bb);
3366 if (e->flags & EDGE_COMPLEX)
3367 return false;
3369 lhs = gimple_assign_lhs (stmt);
3370 rhs = gimple_assign_rhs1 (stmt);
3371 if (gimple_assign_cast_p (stmt)
3372 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3373 || TREE_CODE (rhs) != SSA_NAME
3374 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3375 return false;
3377 if (!gimple_assign_cast_p (stmt)
3378 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3379 return false;
3381 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3382 if (!single_imm_use (lhs, &use_p, &use_stmt))
3383 return false;
3385 if (gimple_code (use_stmt) != GIMPLE_PHI
3386 || gimple_bb (use_stmt) != e->dest)
3387 return false;
3389 /* And that the rhs is defined in the same loop. */
3390 if (gimple_assign_cast_p (stmt))
3392 if (TREE_CODE (rhs) != SSA_NAME
3393 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3394 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3395 return false;
3397 else
3399 if (TREE_CODE (lhs) != SSA_NAME
3400 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3401 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3402 return false;
3405 return true;
3408 /* Return true if BB is suitable basic block for inter-bb range test
3409 optimization. If BACKWARD is true, BB should be the only predecessor
3410 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3411 or compared with to find a common basic block to which all conditions
3412 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3413 be the only predecessor of BB. */
3415 static bool
3416 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3417 bool backward)
3419 edge_iterator ei, ei2;
3420 edge e, e2;
3421 gimple *stmt;
3422 gphi_iterator gsi;
3423 bool other_edge_seen = false;
3424 bool is_cond;
3426 if (test_bb == bb)
3427 return false;
3428 /* Check last stmt first. */
3429 stmt = last_stmt (bb);
3430 if (stmt == NULL
3431 || (gimple_code (stmt) != GIMPLE_COND
3432 && (backward || !final_range_test_p (stmt)))
3433 || gimple_visited_p (stmt)
3434 || stmt_could_throw_p (stmt)
3435 || *other_bb == bb)
3436 return false;
3437 is_cond = gimple_code (stmt) == GIMPLE_COND;
3438 if (is_cond)
3440 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3441 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3442 to *OTHER_BB (if not set yet, try to find it out). */
3443 if (EDGE_COUNT (bb->succs) != 2)
3444 return false;
3445 FOR_EACH_EDGE (e, ei, bb->succs)
3447 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3448 return false;
3449 if (e->dest == test_bb)
3451 if (backward)
3452 continue;
3453 else
3454 return false;
3456 if (e->dest == bb)
3457 return false;
3458 if (*other_bb == NULL)
3460 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3461 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3462 return false;
3463 else if (e->dest == e2->dest)
3464 *other_bb = e->dest;
3465 if (*other_bb == NULL)
3466 return false;
3468 if (e->dest == *other_bb)
3469 other_edge_seen = true;
3470 else if (backward)
3471 return false;
3473 if (*other_bb == NULL || !other_edge_seen)
3474 return false;
3476 else if (single_succ (bb) != *other_bb)
3477 return false;
3479 /* Now check all PHIs of *OTHER_BB. */
3480 e = find_edge (bb, *other_bb);
3481 e2 = find_edge (test_bb, *other_bb);
3482 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3484 gphi *phi = gsi.phi ();
3485 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3486 corresponding to BB and TEST_BB predecessor must be the same. */
3487 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3488 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3490 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3491 one of the PHIs should have the lhs of the last stmt in
3492 that block as PHI arg and that PHI should have 0 or 1
3493 corresponding to it in all other range test basic blocks
3494 considered. */
3495 if (!is_cond)
3497 if (gimple_phi_arg_def (phi, e->dest_idx)
3498 == gimple_assign_lhs (stmt)
3499 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3500 || integer_onep (gimple_phi_arg_def (phi,
3501 e2->dest_idx))))
3502 continue;
3504 else
3506 gimple *test_last = last_stmt (test_bb);
3507 if (gimple_code (test_last) != GIMPLE_COND
3508 && gimple_phi_arg_def (phi, e2->dest_idx)
3509 == gimple_assign_lhs (test_last)
3510 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3511 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3512 continue;
3515 return false;
3518 return true;
3521 /* Return true if BB doesn't have side-effects that would disallow
3522 range test optimization, all SSA_NAMEs set in the bb are consumed
3523 in the bb and there are no PHIs. */
3525 static bool
3526 no_side_effect_bb (basic_block bb)
3528 gimple_stmt_iterator gsi;
3529 gimple *last;
3531 if (!gimple_seq_empty_p (phi_nodes (bb)))
3532 return false;
3533 last = last_stmt (bb);
3534 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3536 gimple *stmt = gsi_stmt (gsi);
3537 tree lhs;
3538 imm_use_iterator imm_iter;
3539 use_operand_p use_p;
3541 if (is_gimple_debug (stmt))
3542 continue;
3543 if (gimple_has_side_effects (stmt))
3544 return false;
3545 if (stmt == last)
3546 return true;
3547 if (!is_gimple_assign (stmt))
3548 return false;
3549 lhs = gimple_assign_lhs (stmt);
3550 if (TREE_CODE (lhs) != SSA_NAME)
3551 return false;
3552 if (gimple_assign_rhs_could_trap_p (stmt))
3553 return false;
3554 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3556 gimple *use_stmt = USE_STMT (use_p);
3557 if (is_gimple_debug (use_stmt))
3558 continue;
3559 if (gimple_bb (use_stmt) != bb)
3560 return false;
3563 return false;
3566 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3567 return true and fill in *OPS recursively. */
3569 static bool
3570 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3571 struct loop *loop)
3573 gimple *stmt = SSA_NAME_DEF_STMT (var);
3574 tree rhs[2];
3575 int i;
3577 if (!is_reassociable_op (stmt, code, loop))
3578 return false;
3580 rhs[0] = gimple_assign_rhs1 (stmt);
3581 rhs[1] = gimple_assign_rhs2 (stmt);
3582 gimple_set_visited (stmt, true);
3583 for (i = 0; i < 2; i++)
3584 if (TREE_CODE (rhs[i]) == SSA_NAME
3585 && !get_ops (rhs[i], code, ops, loop)
3586 && has_single_use (rhs[i]))
3588 operand_entry *oe = operand_entry_pool.allocate ();
3590 oe->op = rhs[i];
3591 oe->rank = code;
3592 oe->id = 0;
3593 oe->count = 1;
3594 oe->stmt_to_insert = NULL;
3595 ops->safe_push (oe);
3597 return true;
3600 /* Find the ops that were added by get_ops starting from VAR, see if
3601 they were changed during update_range_test and if yes, create new
3602 stmts. */
3604 static tree
3605 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3606 unsigned int *pidx, struct loop *loop)
3608 gimple *stmt = SSA_NAME_DEF_STMT (var);
3609 tree rhs[4];
3610 int i;
3612 if (!is_reassociable_op (stmt, code, loop))
3613 return NULL;
3615 rhs[0] = gimple_assign_rhs1 (stmt);
3616 rhs[1] = gimple_assign_rhs2 (stmt);
3617 rhs[2] = rhs[0];
3618 rhs[3] = rhs[1];
3619 for (i = 0; i < 2; i++)
3620 if (TREE_CODE (rhs[i]) == SSA_NAME)
3622 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3623 if (rhs[2 + i] == NULL_TREE)
3625 if (has_single_use (rhs[i]))
3626 rhs[2 + i] = ops[(*pidx)++]->op;
3627 else
3628 rhs[2 + i] = rhs[i];
3631 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3632 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3634 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3635 var = make_ssa_name (TREE_TYPE (var));
3636 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3637 rhs[2], rhs[3]);
3638 gimple_set_uid (g, gimple_uid (stmt));
3639 gimple_set_visited (g, true);
3640 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3642 return var;
3645 /* Structure to track the initial value passed to get_ops and
3646 the range in the ops vector for each basic block. */
3648 struct inter_bb_range_test_entry
3650 tree op;
3651 unsigned int first_idx, last_idx;
3654 /* Inter-bb range test optimization.
3656 Returns TRUE if a gimple conditional is optimized to a true/false,
3657 otherwise return FALSE.
3659 This indicates to the caller that it should run a CFG cleanup pass
3660 once reassociation is completed. */
3662 static bool
3663 maybe_optimize_range_tests (gimple *stmt)
3665 basic_block first_bb = gimple_bb (stmt);
3666 basic_block last_bb = first_bb;
3667 basic_block other_bb = NULL;
3668 basic_block bb;
3669 edge_iterator ei;
3670 edge e;
3671 auto_vec<operand_entry *> ops;
3672 auto_vec<inter_bb_range_test_entry> bbinfo;
3673 bool any_changes = false;
3674 bool cfg_cleanup_needed = false;
3676 /* Consider only basic blocks that end with GIMPLE_COND or
3677 a cast statement satisfying final_range_test_p. All
3678 but the last bb in the first_bb .. last_bb range
3679 should end with GIMPLE_COND. */
3680 if (gimple_code (stmt) == GIMPLE_COND)
3682 if (EDGE_COUNT (first_bb->succs) != 2)
3683 return cfg_cleanup_needed;
3685 else if (final_range_test_p (stmt))
3686 other_bb = single_succ (first_bb);
3687 else
3688 return cfg_cleanup_needed;
3690 if (stmt_could_throw_p (stmt))
3691 return cfg_cleanup_needed;
3693 /* As relative ordering of post-dominator sons isn't fixed,
3694 maybe_optimize_range_tests can be called first on any
3695 bb in the range we want to optimize. So, start searching
3696 backwards, if first_bb can be set to a predecessor. */
3697 while (single_pred_p (first_bb))
3699 basic_block pred_bb = single_pred (first_bb);
3700 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3701 break;
3702 if (!no_side_effect_bb (first_bb))
3703 break;
3704 first_bb = pred_bb;
3706 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3707 Before starting forward search in last_bb successors, find
3708 out the other_bb. */
3709 if (first_bb == last_bb)
3711 other_bb = NULL;
3712 /* As non-GIMPLE_COND last stmt always terminates the range,
3713 if forward search didn't discover anything, just give up. */
3714 if (gimple_code (stmt) != GIMPLE_COND)
3715 return cfg_cleanup_needed;
3716 /* Look at both successors. Either it ends with a GIMPLE_COND
3717 and satisfies suitable_cond_bb, or ends with a cast and
3718 other_bb is that cast's successor. */
3719 FOR_EACH_EDGE (e, ei, first_bb->succs)
3720 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3721 || e->dest == first_bb)
3722 return cfg_cleanup_needed;
3723 else if (single_pred_p (e->dest))
3725 stmt = last_stmt (e->dest);
3726 if (stmt
3727 && gimple_code (stmt) == GIMPLE_COND
3728 && EDGE_COUNT (e->dest->succs) == 2)
3730 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3731 break;
3732 else
3733 other_bb = NULL;
3735 else if (stmt
3736 && final_range_test_p (stmt)
3737 && find_edge (first_bb, single_succ (e->dest)))
3739 other_bb = single_succ (e->dest);
3740 if (other_bb == first_bb)
3741 other_bb = NULL;
3744 if (other_bb == NULL)
3745 return cfg_cleanup_needed;
3747 /* Now do the forward search, moving last_bb to successor bbs
3748 that aren't other_bb. */
3749 while (EDGE_COUNT (last_bb->succs) == 2)
3751 FOR_EACH_EDGE (e, ei, last_bb->succs)
3752 if (e->dest != other_bb)
3753 break;
3754 if (e == NULL)
3755 break;
3756 if (!single_pred_p (e->dest))
3757 break;
3758 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3759 break;
3760 if (!no_side_effect_bb (e->dest))
3761 break;
3762 last_bb = e->dest;
3764 if (first_bb == last_bb)
3765 return cfg_cleanup_needed;
3766 /* Here basic blocks first_bb through last_bb's predecessor
3767 end with GIMPLE_COND, all of them have one of the edges to
3768 other_bb and another to another block in the range,
3769 all blocks except first_bb don't have side-effects and
3770 last_bb ends with either GIMPLE_COND, or cast satisfying
3771 final_range_test_p. */
3772 for (bb = last_bb; ; bb = single_pred (bb))
3774 enum tree_code code;
3775 tree lhs, rhs;
3776 inter_bb_range_test_entry bb_ent;
3778 bb_ent.op = NULL_TREE;
3779 bb_ent.first_idx = ops.length ();
3780 bb_ent.last_idx = bb_ent.first_idx;
3781 e = find_edge (bb, other_bb);
3782 stmt = last_stmt (bb);
3783 gimple_set_visited (stmt, true);
3784 if (gimple_code (stmt) != GIMPLE_COND)
3786 use_operand_p use_p;
3787 gimple *phi;
3788 edge e2;
3789 unsigned int d;
3791 lhs = gimple_assign_lhs (stmt);
3792 rhs = gimple_assign_rhs1 (stmt);
3793 gcc_assert (bb == last_bb);
3795 /* stmt is
3796 _123 = (int) _234;
3798 _234 = a_2(D) == 2;
3800 followed by:
3801 <bb M>:
3802 # _345 = PHI <_123(N), 1(...), 1(...)>
3804 or 0 instead of 1. If it is 0, the _234
3805 range test is anded together with all the
3806 other range tests, if it is 1, it is ored with
3807 them. */
3808 single_imm_use (lhs, &use_p, &phi);
3809 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3810 e2 = find_edge (first_bb, other_bb);
3811 d = e2->dest_idx;
3812 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3813 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3814 code = BIT_AND_EXPR;
3815 else
3817 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3818 code = BIT_IOR_EXPR;
3821 /* If _234 SSA_NAME_DEF_STMT is
3822 _234 = _567 | _789;
3823 (or &, corresponding to 1/0 in the phi arguments,
3824 push into ops the individual range test arguments
3825 of the bitwise or resp. and, recursively. */
3826 if (TREE_CODE (rhs) == SSA_NAME
3827 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3828 != tcc_comparison)
3829 && !get_ops (rhs, code, &ops,
3830 loop_containing_stmt (stmt))
3831 && has_single_use (rhs))
3833 /* Otherwise, push the _234 range test itself. */
3834 operand_entry *oe = operand_entry_pool.allocate ();
3836 oe->op = rhs;
3837 oe->rank = code;
3838 oe->id = 0;
3839 oe->count = 1;
3840 oe->stmt_to_insert = NULL;
3841 ops.safe_push (oe);
3842 bb_ent.last_idx++;
3843 bb_ent.op = rhs;
3845 else if (is_gimple_assign (stmt)
3846 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3847 == tcc_comparison)
3848 && !get_ops (lhs, code, &ops,
3849 loop_containing_stmt (stmt))
3850 && has_single_use (lhs))
3852 operand_entry *oe = operand_entry_pool.allocate ();
3853 oe->op = lhs;
3854 oe->rank = code;
3855 oe->id = 0;
3856 oe->count = 1;
3857 ops.safe_push (oe);
3858 bb_ent.last_idx++;
3859 bb_ent.op = lhs;
3861 else
3863 bb_ent.last_idx = ops.length ();
3864 bb_ent.op = rhs;
3866 bbinfo.safe_push (bb_ent);
3867 continue;
3869 /* Otherwise stmt is GIMPLE_COND. */
3870 code = gimple_cond_code (stmt);
3871 lhs = gimple_cond_lhs (stmt);
3872 rhs = gimple_cond_rhs (stmt);
3873 if (TREE_CODE (lhs) == SSA_NAME
3874 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3875 && ((code != EQ_EXPR && code != NE_EXPR)
3876 || rhs != boolean_false_node
3877 /* Either push into ops the individual bitwise
3878 or resp. and operands, depending on which
3879 edge is other_bb. */
3880 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3881 ^ (code == EQ_EXPR))
3882 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3883 loop_containing_stmt (stmt))))
3885 /* Or push the GIMPLE_COND stmt itself. */
3886 operand_entry *oe = operand_entry_pool.allocate ();
3888 oe->op = NULL;
3889 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3890 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3891 /* oe->op = NULL signs that there is no SSA_NAME
3892 for the range test, and oe->id instead is the
3893 basic block number, at which's end the GIMPLE_COND
3894 is. */
3895 oe->id = bb->index;
3896 oe->count = 1;
3897 oe->stmt_to_insert = NULL;
3898 ops.safe_push (oe);
3899 bb_ent.op = NULL;
3900 bb_ent.last_idx++;
3902 else if (ops.length () > bb_ent.first_idx)
3904 bb_ent.op = lhs;
3905 bb_ent.last_idx = ops.length ();
3907 bbinfo.safe_push (bb_ent);
3908 if (bb == first_bb)
3909 break;
3911 if (ops.length () > 1)
3912 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3913 if (any_changes)
3915 unsigned int idx, max_idx = 0;
3916 /* update_ops relies on has_single_use predicates returning the
3917 same values as it did during get_ops earlier. Additionally it
3918 never removes statements, only adds new ones and it should walk
3919 from the single imm use and check the predicate already before
3920 making those changes.
3921 On the other side, the handling of GIMPLE_COND directly can turn
3922 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3923 it needs to be done in a separate loop afterwards. */
3924 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3926 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3927 && bbinfo[idx].op != NULL_TREE)
3929 tree new_op;
3931 max_idx = idx;
3932 stmt = last_stmt (bb);
3933 new_op = update_ops (bbinfo[idx].op,
3934 (enum tree_code)
3935 ops[bbinfo[idx].first_idx]->rank,
3936 ops, &bbinfo[idx].first_idx,
3937 loop_containing_stmt (stmt));
3938 if (new_op == NULL_TREE)
3940 gcc_assert (bb == last_bb);
3941 new_op = ops[bbinfo[idx].first_idx++]->op;
3943 if (bbinfo[idx].op != new_op)
3945 imm_use_iterator iter;
3946 use_operand_p use_p;
3947 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
3949 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3950 if (is_gimple_debug (use_stmt))
3951 continue;
3952 else if (gimple_code (use_stmt) == GIMPLE_COND
3953 || gimple_code (use_stmt) == GIMPLE_PHI)
3954 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3955 SET_USE (use_p, new_op);
3956 else if ((is_gimple_assign (use_stmt)
3957 && (TREE_CODE_CLASS
3958 (gimple_assign_rhs_code (use_stmt))
3959 == tcc_comparison)))
3960 cast_or_tcc_cmp_stmt = use_stmt;
3961 else if (gimple_assign_cast_p (use_stmt))
3962 cast_or_tcc_cmp_stmt = use_stmt;
3963 else
3964 gcc_unreachable ();
3966 if (cast_or_tcc_cmp_stmt)
3968 gcc_assert (bb == last_bb);
3969 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
3970 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3971 enum tree_code rhs_code
3972 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
3973 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
3974 : CONVERT_EXPR;
3975 gassign *g;
3976 if (is_gimple_min_invariant (new_op))
3978 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3979 g = gimple_build_assign (new_lhs, new_op);
3981 else
3982 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3983 gimple_stmt_iterator gsi
3984 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
3985 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
3986 gimple_set_visited (g, true);
3987 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3988 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3989 if (is_gimple_debug (use_stmt))
3990 continue;
3991 else if (gimple_code (use_stmt) == GIMPLE_COND
3992 || gimple_code (use_stmt) == GIMPLE_PHI)
3993 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3994 SET_USE (use_p, new_lhs);
3995 else
3996 gcc_unreachable ();
4000 if (bb == first_bb)
4001 break;
4003 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4005 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4006 && bbinfo[idx].op == NULL_TREE
4007 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4009 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4011 if (idx > max_idx)
4012 max_idx = idx;
4014 /* If we collapse the conditional to a true/false
4015 condition, then bubble that knowledge up to our caller. */
4016 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4018 gimple_cond_make_false (cond_stmt);
4019 cfg_cleanup_needed = true;
4021 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4023 gimple_cond_make_true (cond_stmt);
4024 cfg_cleanup_needed = true;
4026 else
4028 gimple_cond_set_code (cond_stmt, NE_EXPR);
4029 gimple_cond_set_lhs (cond_stmt,
4030 ops[bbinfo[idx].first_idx]->op);
4031 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4033 update_stmt (cond_stmt);
4035 if (bb == first_bb)
4036 break;
4039 /* The above changes could result in basic blocks after the first
4040 modified one, up to and including last_bb, to be executed even if
4041 they would not be in the original program. If the value ranges of
4042 assignment lhs' in those bbs were dependent on the conditions
4043 guarding those basic blocks which now can change, the VRs might
4044 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4045 are only used within the same bb, it should be not a big deal if
4046 we just reset all the VRs in those bbs. See PR68671. */
4047 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4048 reset_flow_sensitive_info_in_bb (bb);
4050 return cfg_cleanup_needed;
4053 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4054 of STMT in it's operands. This is also known as a "destructive
4055 update" operation. */
4057 static bool
4058 is_phi_for_stmt (gimple *stmt, tree operand)
4060 gimple *def_stmt;
4061 gphi *def_phi;
4062 tree lhs;
4063 use_operand_p arg_p;
4064 ssa_op_iter i;
4066 if (TREE_CODE (operand) != SSA_NAME)
4067 return false;
4069 lhs = gimple_assign_lhs (stmt);
4071 def_stmt = SSA_NAME_DEF_STMT (operand);
4072 def_phi = dyn_cast <gphi *> (def_stmt);
4073 if (!def_phi)
4074 return false;
4076 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4077 if (lhs == USE_FROM_PTR (arg_p))
4078 return true;
4079 return false;
4082 /* Remove def stmt of VAR if VAR has zero uses and recurse
4083 on rhs1 operand if so. */
4085 static void
4086 remove_visited_stmt_chain (tree var)
4088 gimple *stmt;
4089 gimple_stmt_iterator gsi;
4091 while (1)
4093 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4094 return;
4095 stmt = SSA_NAME_DEF_STMT (var);
4096 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4098 var = gimple_assign_rhs1 (stmt);
4099 gsi = gsi_for_stmt (stmt);
4100 reassoc_remove_stmt (&gsi);
4101 release_defs (stmt);
4103 else
4104 return;
4108 /* This function checks three consequtive operands in
4109 passed operands vector OPS starting from OPINDEX and
4110 swaps two operands if it is profitable for binary operation
4111 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4113 We pair ops with the same rank if possible.
4115 The alternative we try is to see if STMT is a destructive
4116 update style statement, which is like:
4117 b = phi (a, ...)
4118 a = c + b;
4119 In that case, we want to use the destructive update form to
4120 expose the possible vectorizer sum reduction opportunity.
4121 In that case, the third operand will be the phi node. This
4122 check is not performed if STMT is null.
4124 We could, of course, try to be better as noted above, and do a
4125 lot of work to try to find these opportunities in >3 operand
4126 cases, but it is unlikely to be worth it. */
4128 static void
4129 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4130 unsigned int opindex, gimple *stmt)
4132 operand_entry *oe1, *oe2, *oe3;
4134 oe1 = ops[opindex];
4135 oe2 = ops[opindex + 1];
4136 oe3 = ops[opindex + 2];
4138 if ((oe1->rank == oe2->rank
4139 && oe2->rank != oe3->rank)
4140 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4141 && !is_phi_for_stmt (stmt, oe1->op)
4142 && !is_phi_for_stmt (stmt, oe2->op)))
4143 std::swap (*oe1, *oe3);
4144 else if ((oe1->rank == oe3->rank
4145 && oe2->rank != oe3->rank)
4146 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4147 && !is_phi_for_stmt (stmt, oe1->op)
4148 && !is_phi_for_stmt (stmt, oe3->op)))
4149 std::swap (*oe1, *oe2);
4152 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4153 two definitions, otherwise return STMT. */
4155 static inline gimple *
4156 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4158 if (TREE_CODE (rhs1) == SSA_NAME
4159 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4160 stmt = SSA_NAME_DEF_STMT (rhs1);
4161 if (TREE_CODE (rhs2) == SSA_NAME
4162 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4163 stmt = SSA_NAME_DEF_STMT (rhs2);
4164 return stmt;
4167 /* If the stmt that defines operand has to be inserted, insert it
4168 before the use. */
4169 static void
4170 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4172 gcc_assert (is_gimple_assign (stmt_to_insert));
4173 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4174 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4175 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4176 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4177 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4179 /* If the insert point is not stmt, then insert_point would be
4180 the point where operand rhs1 or rhs2 is defined. In this case,
4181 stmt_to_insert has to be inserted afterwards. This would
4182 only happen when the stmt insertion point is flexible. */
4183 if (stmt == insert_point)
4184 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4185 else
4186 insert_stmt_after (stmt_to_insert, insert_point);
4190 /* Recursively rewrite our linearized statements so that the operators
4191 match those in OPS[OPINDEX], putting the computation in rank
4192 order. Return new lhs. */
4194 static tree
4195 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4196 vec<operand_entry *> ops, bool changed)
4198 tree rhs1 = gimple_assign_rhs1 (stmt);
4199 tree rhs2 = gimple_assign_rhs2 (stmt);
4200 tree lhs = gimple_assign_lhs (stmt);
4201 operand_entry *oe;
4203 /* The final recursion case for this function is that you have
4204 exactly two operations left.
4205 If we had exactly one op in the entire list to start with, we
4206 would have never called this function, and the tail recursion
4207 rewrites them one at a time. */
4208 if (opindex + 2 == ops.length ())
4210 operand_entry *oe1, *oe2;
4212 oe1 = ops[opindex];
4213 oe2 = ops[opindex + 1];
4215 if (rhs1 != oe1->op || rhs2 != oe2->op)
4217 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4218 unsigned int uid = gimple_uid (stmt);
4220 if (dump_file && (dump_flags & TDF_DETAILS))
4222 fprintf (dump_file, "Transforming ");
4223 print_gimple_stmt (dump_file, stmt, 0, 0);
4226 /* If the stmt that defines operand has to be inserted, insert it
4227 before the use. */
4228 if (oe1->stmt_to_insert)
4229 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4230 if (oe2->stmt_to_insert)
4231 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4232 /* Even when changed is false, reassociation could have e.g. removed
4233 some redundant operations, so unless we are just swapping the
4234 arguments or unless there is no change at all (then we just
4235 return lhs), force creation of a new SSA_NAME. */
4236 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4238 gimple *insert_point
4239 = find_insert_point (stmt, oe1->op, oe2->op);
4240 lhs = make_ssa_name (TREE_TYPE (lhs));
4241 stmt
4242 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4243 oe1->op, oe2->op);
4244 gimple_set_uid (stmt, uid);
4245 gimple_set_visited (stmt, true);
4246 if (insert_point == gsi_stmt (gsi))
4247 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4248 else
4249 insert_stmt_after (stmt, insert_point);
4251 else
4253 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4254 == stmt);
4255 gimple_assign_set_rhs1 (stmt, oe1->op);
4256 gimple_assign_set_rhs2 (stmt, oe2->op);
4257 update_stmt (stmt);
4260 if (rhs1 != oe1->op && rhs1 != oe2->op)
4261 remove_visited_stmt_chain (rhs1);
4263 if (dump_file && (dump_flags & TDF_DETAILS))
4265 fprintf (dump_file, " into ");
4266 print_gimple_stmt (dump_file, stmt, 0, 0);
4269 return lhs;
4272 /* If we hit here, we should have 3 or more ops left. */
4273 gcc_assert (opindex + 2 < ops.length ());
4275 /* Rewrite the next operator. */
4276 oe = ops[opindex];
4278 /* If the stmt that defines operand has to be inserted, insert it
4279 before the use. */
4280 if (oe->stmt_to_insert)
4281 insert_stmt_before_use (stmt, oe->stmt_to_insert);
4283 /* Recurse on the LHS of the binary operator, which is guaranteed to
4284 be the non-leaf side. */
4285 tree new_rhs1
4286 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4287 changed || oe->op != rhs2);
4289 if (oe->op != rhs2 || new_rhs1 != rhs1)
4291 if (dump_file && (dump_flags & TDF_DETAILS))
4293 fprintf (dump_file, "Transforming ");
4294 print_gimple_stmt (dump_file, stmt, 0, 0);
4297 /* If changed is false, this is either opindex == 0
4298 or all outer rhs2's were equal to corresponding oe->op,
4299 and powi_result is NULL.
4300 That means lhs is equivalent before and after reassociation.
4301 Otherwise ensure the old lhs SSA_NAME is not reused and
4302 create a new stmt as well, so that any debug stmts will be
4303 properly adjusted. */
4304 if (changed)
4306 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4307 unsigned int uid = gimple_uid (stmt);
4308 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4310 lhs = make_ssa_name (TREE_TYPE (lhs));
4311 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4312 new_rhs1, oe->op);
4313 gimple_set_uid (stmt, uid);
4314 gimple_set_visited (stmt, true);
4315 if (insert_point == gsi_stmt (gsi))
4316 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4317 else
4318 insert_stmt_after (stmt, insert_point);
4320 else
4322 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4323 == stmt);
4324 gimple_assign_set_rhs1 (stmt, new_rhs1);
4325 gimple_assign_set_rhs2 (stmt, oe->op);
4326 update_stmt (stmt);
4329 if (dump_file && (dump_flags & TDF_DETAILS))
4331 fprintf (dump_file, " into ");
4332 print_gimple_stmt (dump_file, stmt, 0, 0);
4335 return lhs;
4338 /* Find out how many cycles we need to compute statements chain.
4339 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4340 maximum number of independent statements we may execute per cycle. */
4342 static int
4343 get_required_cycles (int ops_num, int cpu_width)
4345 int res;
4346 int elog;
4347 unsigned int rest;
4349 /* While we have more than 2 * cpu_width operands
4350 we may reduce number of operands by cpu_width
4351 per cycle. */
4352 res = ops_num / (2 * cpu_width);
4354 /* Remained operands count may be reduced twice per cycle
4355 until we have only one operand. */
4356 rest = (unsigned)(ops_num - res * cpu_width);
4357 elog = exact_log2 (rest);
4358 if (elog >= 0)
4359 res += elog;
4360 else
4361 res += floor_log2 (rest) + 1;
4363 return res;
4366 /* Returns an optimal number of registers to use for computation of
4367 given statements. */
4369 static int
4370 get_reassociation_width (int ops_num, enum tree_code opc,
4371 machine_mode mode)
4373 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4374 int width;
4375 int width_min;
4376 int cycles_best;
4378 if (param_width > 0)
4379 width = param_width;
4380 else
4381 width = targetm.sched.reassociation_width (opc, mode);
4383 if (width == 1)
4384 return width;
4386 /* Get the minimal time required for sequence computation. */
4387 cycles_best = get_required_cycles (ops_num, width);
4389 /* Check if we may use less width and still compute sequence for
4390 the same time. It will allow us to reduce registers usage.
4391 get_required_cycles is monotonically increasing with lower width
4392 so we can perform a binary search for the minimal width that still
4393 results in the optimal cycle count. */
4394 width_min = 1;
4395 while (width > width_min)
4397 int width_mid = (width + width_min) / 2;
4399 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4400 width = width_mid;
4401 else if (width_min < width_mid)
4402 width_min = width_mid;
4403 else
4404 break;
4407 return width;
4410 /* Recursively rewrite our linearized statements so that the operators
4411 match those in OPS[OPINDEX], putting the computation in rank
4412 order and trying to allow operations to be executed in
4413 parallel. */
4415 static void
4416 rewrite_expr_tree_parallel (gassign *stmt, int width,
4417 vec<operand_entry *> ops)
4419 enum tree_code opcode = gimple_assign_rhs_code (stmt);
4420 int op_num = ops.length ();
4421 gcc_assert (op_num > 0);
4422 int stmt_num = op_num - 1;
4423 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4424 int op_index = op_num - 1;
4425 int stmt_index = 0;
4426 int ready_stmts_end = 0;
4427 int i = 0;
4428 gimple *stmt1 = NULL, *stmt2 = NULL;
4429 tree last_rhs1 = gimple_assign_rhs1 (stmt);
4431 /* We start expression rewriting from the top statements.
4432 So, in this loop we create a full list of statements
4433 we will work with. */
4434 stmts[stmt_num - 1] = stmt;
4435 for (i = stmt_num - 2; i >= 0; i--)
4436 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4438 for (i = 0; i < stmt_num; i++)
4440 tree op1, op2;
4442 /* Determine whether we should use results of
4443 already handled statements or not. */
4444 if (ready_stmts_end == 0
4445 && (i - stmt_index >= width || op_index < 1))
4446 ready_stmts_end = i;
4448 /* Now we choose operands for the next statement. Non zero
4449 value in ready_stmts_end means here that we should use
4450 the result of already generated statements as new operand. */
4451 if (ready_stmts_end > 0)
4453 op1 = gimple_assign_lhs (stmts[stmt_index++]);
4454 if (ready_stmts_end > stmt_index)
4455 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4456 else if (op_index >= 0)
4458 operand_entry *oe = ops[op_index--];
4459 stmt2 = oe->stmt_to_insert;
4460 op2 = oe->op;
4462 else
4464 gcc_assert (stmt_index < i);
4465 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4468 if (stmt_index >= ready_stmts_end)
4469 ready_stmts_end = 0;
4471 else
4473 if (op_index > 1)
4474 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4475 operand_entry *oe2 = ops[op_index--];
4476 operand_entry *oe1 = ops[op_index--];
4477 op2 = oe2->op;
4478 stmt2 = oe2->stmt_to_insert;
4479 op1 = oe1->op;
4480 stmt1 = oe1->stmt_to_insert;
4483 /* If we emit the last statement then we should put
4484 operands into the last statement. It will also
4485 break the loop. */
4486 if (op_index < 0 && stmt_index == i)
4487 i = stmt_num - 1;
4489 if (dump_file && (dump_flags & TDF_DETAILS))
4491 fprintf (dump_file, "Transforming ");
4492 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4495 /* If the stmt that defines operand has to be inserted, insert it
4496 before the use. */
4497 if (stmt1)
4498 insert_stmt_before_use (stmts[i], stmt1);
4499 if (stmt2)
4500 insert_stmt_before_use (stmts[i], stmt2);
4501 stmt1 = stmt2 = NULL;
4503 /* We keep original statement only for the last one. All
4504 others are recreated. */
4505 if (i == stmt_num - 1)
4507 gimple_assign_set_rhs1 (stmts[i], op1);
4508 gimple_assign_set_rhs2 (stmts[i], op2);
4509 update_stmt (stmts[i]);
4511 else
4513 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4515 if (dump_file && (dump_flags & TDF_DETAILS))
4517 fprintf (dump_file, " into ");
4518 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4522 remove_visited_stmt_chain (last_rhs1);
4525 /* Transform STMT, which is really (A +B) + (C + D) into the left
4526 linear form, ((A+B)+C)+D.
4527 Recurse on D if necessary. */
4529 static void
4530 linearize_expr (gimple *stmt)
4532 gimple_stmt_iterator gsi;
4533 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4534 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4535 gimple *oldbinrhs = binrhs;
4536 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4537 gimple *newbinrhs = NULL;
4538 struct loop *loop = loop_containing_stmt (stmt);
4539 tree lhs = gimple_assign_lhs (stmt);
4541 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4542 && is_reassociable_op (binrhs, rhscode, loop));
4544 gsi = gsi_for_stmt (stmt);
4546 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4547 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4548 gimple_assign_rhs_code (binrhs),
4549 gimple_assign_lhs (binlhs),
4550 gimple_assign_rhs2 (binrhs));
4551 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4552 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4553 gimple_set_uid (binrhs, gimple_uid (stmt));
4555 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4556 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4558 if (dump_file && (dump_flags & TDF_DETAILS))
4560 fprintf (dump_file, "Linearized: ");
4561 print_gimple_stmt (dump_file, stmt, 0, 0);
4564 reassociate_stats.linearized++;
4565 update_stmt (stmt);
4567 gsi = gsi_for_stmt (oldbinrhs);
4568 reassoc_remove_stmt (&gsi);
4569 release_defs (oldbinrhs);
4571 gimple_set_visited (stmt, true);
4572 gimple_set_visited (binlhs, true);
4573 gimple_set_visited (binrhs, true);
4575 /* Tail recurse on the new rhs if it still needs reassociation. */
4576 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4577 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4578 want to change the algorithm while converting to tuples. */
4579 linearize_expr (stmt);
4582 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4583 it. Otherwise, return NULL. */
4585 static gimple *
4586 get_single_immediate_use (tree lhs)
4588 use_operand_p immuse;
4589 gimple *immusestmt;
4591 if (TREE_CODE (lhs) == SSA_NAME
4592 && single_imm_use (lhs, &immuse, &immusestmt)
4593 && is_gimple_assign (immusestmt))
4594 return immusestmt;
4596 return NULL;
4599 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4600 representing the negated value. Insertions of any necessary
4601 instructions go before GSI.
4602 This function is recursive in that, if you hand it "a_5" as the
4603 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4604 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4606 static tree
4607 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4609 gimple *negatedefstmt = NULL;
4610 tree resultofnegate;
4611 gimple_stmt_iterator gsi;
4612 unsigned int uid;
4614 /* If we are trying to negate a name, defined by an add, negate the
4615 add operands instead. */
4616 if (TREE_CODE (tonegate) == SSA_NAME)
4617 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4618 if (TREE_CODE (tonegate) == SSA_NAME
4619 && is_gimple_assign (negatedefstmt)
4620 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4621 && has_single_use (gimple_assign_lhs (negatedefstmt))
4622 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4624 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4625 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4626 tree lhs = gimple_assign_lhs (negatedefstmt);
4627 gimple *g;
4629 gsi = gsi_for_stmt (negatedefstmt);
4630 rhs1 = negate_value (rhs1, &gsi);
4632 gsi = gsi_for_stmt (negatedefstmt);
4633 rhs2 = negate_value (rhs2, &gsi);
4635 gsi = gsi_for_stmt (negatedefstmt);
4636 lhs = make_ssa_name (TREE_TYPE (lhs));
4637 gimple_set_visited (negatedefstmt, true);
4638 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4639 gimple_set_uid (g, gimple_uid (negatedefstmt));
4640 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4641 return lhs;
4644 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4645 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4646 NULL_TREE, true, GSI_SAME_STMT);
4647 gsi = *gsip;
4648 uid = gimple_uid (gsi_stmt (gsi));
4649 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4651 gimple *stmt = gsi_stmt (gsi);
4652 if (gimple_uid (stmt) != 0)
4653 break;
4654 gimple_set_uid (stmt, uid);
4656 return resultofnegate;
4659 /* Return true if we should break up the subtract in STMT into an add
4660 with negate. This is true when we the subtract operands are really
4661 adds, or the subtract itself is used in an add expression. In
4662 either case, breaking up the subtract into an add with negate
4663 exposes the adds to reassociation. */
4665 static bool
4666 should_break_up_subtract (gimple *stmt)
4668 tree lhs = gimple_assign_lhs (stmt);
4669 tree binlhs = gimple_assign_rhs1 (stmt);
4670 tree binrhs = gimple_assign_rhs2 (stmt);
4671 gimple *immusestmt;
4672 struct loop *loop = loop_containing_stmt (stmt);
4674 if (TREE_CODE (binlhs) == SSA_NAME
4675 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4676 return true;
4678 if (TREE_CODE (binrhs) == SSA_NAME
4679 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4680 return true;
4682 if (TREE_CODE (lhs) == SSA_NAME
4683 && (immusestmt = get_single_immediate_use (lhs))
4684 && is_gimple_assign (immusestmt)
4685 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4686 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4687 return true;
4688 return false;
4691 /* Transform STMT from A - B into A + -B. */
4693 static void
4694 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4696 tree rhs1 = gimple_assign_rhs1 (stmt);
4697 tree rhs2 = gimple_assign_rhs2 (stmt);
4699 if (dump_file && (dump_flags & TDF_DETAILS))
4701 fprintf (dump_file, "Breaking up subtract ");
4702 print_gimple_stmt (dump_file, stmt, 0, 0);
4705 rhs2 = negate_value (rhs2, gsip);
4706 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4707 update_stmt (stmt);
4710 /* Determine whether STMT is a builtin call that raises an SSA name
4711 to an integer power and has only one use. If so, and this is early
4712 reassociation and unsafe math optimizations are permitted, place
4713 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4714 If any of these conditions does not hold, return FALSE. */
4716 static bool
4717 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4719 tree arg1;
4720 REAL_VALUE_TYPE c, cint;
4722 switch (gimple_call_combined_fn (stmt))
4724 CASE_CFN_POW:
4725 if (flag_errno_math)
4726 return false;
4728 *base = gimple_call_arg (stmt, 0);
4729 arg1 = gimple_call_arg (stmt, 1);
4731 if (TREE_CODE (arg1) != REAL_CST)
4732 return false;
4734 c = TREE_REAL_CST (arg1);
4736 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4737 return false;
4739 *exponent = real_to_integer (&c);
4740 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4741 if (!real_identical (&c, &cint))
4742 return false;
4744 break;
4746 CASE_CFN_POWI:
4747 *base = gimple_call_arg (stmt, 0);
4748 arg1 = gimple_call_arg (stmt, 1);
4750 if (!tree_fits_shwi_p (arg1))
4751 return false;
4753 *exponent = tree_to_shwi (arg1);
4754 break;
4756 default:
4757 return false;
4760 /* Expanding negative exponents is generally unproductive, so we don't
4761 complicate matters with those. Exponents of zero and one should
4762 have been handled by expression folding. */
4763 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4764 return false;
4766 return true;
4769 /* Try to derive and add operand entry for OP to *OPS. Return false if
4770 unsuccessful. */
4772 static bool
4773 try_special_add_to_ops (vec<operand_entry *> *ops,
4774 enum tree_code code,
4775 tree op, gimple* def_stmt)
4777 tree base = NULL_TREE;
4778 HOST_WIDE_INT exponent = 0;
4780 if (TREE_CODE (op) != SSA_NAME
4781 || ! has_single_use (op))
4782 return false;
4784 if (code == MULT_EXPR
4785 && reassoc_insert_powi_p
4786 && flag_unsafe_math_optimizations
4787 && is_gimple_call (def_stmt)
4788 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
4790 add_repeat_to_ops_vec (ops, base, exponent);
4791 gimple_set_visited (def_stmt, true);
4792 return true;
4794 else if (code == MULT_EXPR
4795 && is_gimple_assign (def_stmt)
4796 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
4797 && !HONOR_SNANS (TREE_TYPE (op))
4798 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
4799 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
4801 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4802 tree cst = build_minus_one_cst (TREE_TYPE (op));
4803 add_to_ops_vec (ops, rhs1);
4804 add_to_ops_vec (ops, cst);
4805 gimple_set_visited (def_stmt, true);
4806 return true;
4809 return false;
4812 /* Recursively linearize a binary expression that is the RHS of STMT.
4813 Place the operands of the expression tree in the vector named OPS. */
4815 static void
4816 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4817 bool is_associative, bool set_visited)
4819 tree binlhs = gimple_assign_rhs1 (stmt);
4820 tree binrhs = gimple_assign_rhs2 (stmt);
4821 gimple *binlhsdef = NULL, *binrhsdef = NULL;
4822 bool binlhsisreassoc = false;
4823 bool binrhsisreassoc = false;
4824 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4825 struct loop *loop = loop_containing_stmt (stmt);
4827 if (set_visited)
4828 gimple_set_visited (stmt, true);
4830 if (TREE_CODE (binlhs) == SSA_NAME)
4832 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4833 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4834 && !stmt_could_throw_p (binlhsdef));
4837 if (TREE_CODE (binrhs) == SSA_NAME)
4839 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4840 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4841 && !stmt_could_throw_p (binrhsdef));
4844 /* If the LHS is not reassociable, but the RHS is, we need to swap
4845 them. If neither is reassociable, there is nothing we can do, so
4846 just put them in the ops vector. If the LHS is reassociable,
4847 linearize it. If both are reassociable, then linearize the RHS
4848 and the LHS. */
4850 if (!binlhsisreassoc)
4852 /* If this is not a associative operation like division, give up. */
4853 if (!is_associative)
4855 add_to_ops_vec (ops, binrhs);
4856 return;
4859 if (!binrhsisreassoc)
4861 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4862 add_to_ops_vec (ops, binrhs);
4864 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
4865 add_to_ops_vec (ops, binlhs);
4867 return;
4870 if (dump_file && (dump_flags & TDF_DETAILS))
4872 fprintf (dump_file, "swapping operands of ");
4873 print_gimple_stmt (dump_file, stmt, 0, 0);
4876 swap_ssa_operands (stmt,
4877 gimple_assign_rhs1_ptr (stmt),
4878 gimple_assign_rhs2_ptr (stmt));
4879 update_stmt (stmt);
4881 if (dump_file && (dump_flags & TDF_DETAILS))
4883 fprintf (dump_file, " is now ");
4884 print_gimple_stmt (dump_file, stmt, 0, 0);
4887 /* We want to make it so the lhs is always the reassociative op,
4888 so swap. */
4889 std::swap (binlhs, binrhs);
4891 else if (binrhsisreassoc)
4893 linearize_expr (stmt);
4894 binlhs = gimple_assign_rhs1 (stmt);
4895 binrhs = gimple_assign_rhs2 (stmt);
4898 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4899 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4900 rhscode, loop));
4901 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4902 is_associative, set_visited);
4904 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4905 add_to_ops_vec (ops, binrhs);
4908 /* Repropagate the negates back into subtracts, since no other pass
4909 currently does it. */
4911 static void
4912 repropagate_negates (void)
4914 unsigned int i = 0;
4915 tree negate;
4917 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4919 gimple *user = get_single_immediate_use (negate);
4921 if (!user || !is_gimple_assign (user))
4922 continue;
4924 /* The negate operand can be either operand of a PLUS_EXPR
4925 (it can be the LHS if the RHS is a constant for example).
4927 Force the negate operand to the RHS of the PLUS_EXPR, then
4928 transform the PLUS_EXPR into a MINUS_EXPR. */
4929 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4931 /* If the negated operand appears on the LHS of the
4932 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4933 to force the negated operand to the RHS of the PLUS_EXPR. */
4934 if (gimple_assign_rhs1 (user) == negate)
4936 swap_ssa_operands (user,
4937 gimple_assign_rhs1_ptr (user),
4938 gimple_assign_rhs2_ptr (user));
4941 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4942 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4943 if (gimple_assign_rhs2 (user) == negate)
4945 tree rhs1 = gimple_assign_rhs1 (user);
4946 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
4947 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4948 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4949 update_stmt (user);
4952 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4954 if (gimple_assign_rhs1 (user) == negate)
4956 /* We have
4957 x = -a
4958 y = x - b
4959 which we transform into
4960 x = a + b
4961 y = -x .
4962 This pushes down the negate which we possibly can merge
4963 into some other operation, hence insert it into the
4964 plus_negates vector. */
4965 gimple *feed = SSA_NAME_DEF_STMT (negate);
4966 tree a = gimple_assign_rhs1 (feed);
4967 tree b = gimple_assign_rhs2 (user);
4968 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4969 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4970 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4971 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4972 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4973 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4974 user = gsi_stmt (gsi2);
4975 update_stmt (user);
4976 reassoc_remove_stmt (&gsi);
4977 release_defs (feed);
4978 plus_negates.safe_push (gimple_assign_lhs (user));
4980 else
4982 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4983 rid of one operation. */
4984 gimple *feed = SSA_NAME_DEF_STMT (negate);
4985 tree a = gimple_assign_rhs1 (feed);
4986 tree rhs1 = gimple_assign_rhs1 (user);
4987 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4988 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4989 update_stmt (gsi_stmt (gsi));
4995 /* Returns true if OP is of a type for which we can do reassociation.
4996 That is for integral or non-saturating fixed-point types, and for
4997 floating point type when associative-math is enabled. */
4999 static bool
5000 can_reassociate_p (tree op)
5002 tree type = TREE_TYPE (op);
5003 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5004 return false;
5005 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5006 || NON_SAT_FIXED_POINT_TYPE_P (type)
5007 || (flag_associative_math && FLOAT_TYPE_P (type)))
5008 return true;
5009 return false;
5012 /* Break up subtract operations in block BB.
5014 We do this top down because we don't know whether the subtract is
5015 part of a possible chain of reassociation except at the top.
5017 IE given
5018 d = f + g
5019 c = a + e
5020 b = c - d
5021 q = b - r
5022 k = t - q
5024 we want to break up k = t - q, but we won't until we've transformed q
5025 = b - r, which won't be broken up until we transform b = c - d.
5027 En passant, clear the GIMPLE visited flag on every statement
5028 and set UIDs within each basic block. */
5030 static void
5031 break_up_subtract_bb (basic_block bb)
5033 gimple_stmt_iterator gsi;
5034 basic_block son;
5035 unsigned int uid = 1;
5037 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5039 gimple *stmt = gsi_stmt (gsi);
5040 gimple_set_visited (stmt, false);
5041 gimple_set_uid (stmt, uid++);
5043 if (!is_gimple_assign (stmt)
5044 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5045 continue;
5047 /* Look for simple gimple subtract operations. */
5048 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5050 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5051 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5052 continue;
5054 /* Check for a subtract used only in an addition. If this
5055 is the case, transform it into add of a negate for better
5056 reassociation. IE transform C = A-B into C = A + -B if C
5057 is only used in an addition. */
5058 if (should_break_up_subtract (stmt))
5059 break_up_subtract (stmt, &gsi);
5061 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5062 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5063 plus_negates.safe_push (gimple_assign_lhs (stmt));
5065 for (son = first_dom_son (CDI_DOMINATORS, bb);
5066 son;
5067 son = next_dom_son (CDI_DOMINATORS, son))
5068 break_up_subtract_bb (son);
5071 /* Used for repeated factor analysis. */
5072 struct repeat_factor
5074 /* An SSA name that occurs in a multiply chain. */
5075 tree factor;
5077 /* Cached rank of the factor. */
5078 unsigned rank;
5080 /* Number of occurrences of the factor in the chain. */
5081 HOST_WIDE_INT count;
5083 /* An SSA name representing the product of this factor and
5084 all factors appearing later in the repeated factor vector. */
5085 tree repr;
5089 static vec<repeat_factor> repeat_factor_vec;
5091 /* Used for sorting the repeat factor vector. Sort primarily by
5092 ascending occurrence count, secondarily by descending rank. */
5094 static int
5095 compare_repeat_factors (const void *x1, const void *x2)
5097 const repeat_factor *rf1 = (const repeat_factor *) x1;
5098 const repeat_factor *rf2 = (const repeat_factor *) x2;
5100 if (rf1->count != rf2->count)
5101 return rf1->count - rf2->count;
5103 return rf2->rank - rf1->rank;
5106 /* Look for repeated operands in OPS in the multiply tree rooted at
5107 STMT. Replace them with an optimal sequence of multiplies and powi
5108 builtin calls, and remove the used operands from OPS. Return an
5109 SSA name representing the value of the replacement sequence. */
5111 static tree
5112 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5114 unsigned i, j, vec_len;
5115 int ii;
5116 operand_entry *oe;
5117 repeat_factor *rf1, *rf2;
5118 repeat_factor rfnew;
5119 tree result = NULL_TREE;
5120 tree target_ssa, iter_result;
5121 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5122 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5123 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5124 gimple *mul_stmt, *pow_stmt;
5126 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5127 target. */
5128 if (!powi_fndecl)
5129 return NULL_TREE;
5131 /* Allocate the repeated factor vector. */
5132 repeat_factor_vec.create (10);
5134 /* Scan the OPS vector for all SSA names in the product and build
5135 up a vector of occurrence counts for each factor. */
5136 FOR_EACH_VEC_ELT (*ops, i, oe)
5138 if (TREE_CODE (oe->op) == SSA_NAME)
5140 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5142 if (rf1->factor == oe->op)
5144 rf1->count += oe->count;
5145 break;
5149 if (j >= repeat_factor_vec.length ())
5151 rfnew.factor = oe->op;
5152 rfnew.rank = oe->rank;
5153 rfnew.count = oe->count;
5154 rfnew.repr = NULL_TREE;
5155 repeat_factor_vec.safe_push (rfnew);
5160 /* Sort the repeated factor vector by (a) increasing occurrence count,
5161 and (b) decreasing rank. */
5162 repeat_factor_vec.qsort (compare_repeat_factors);
5164 /* It is generally best to combine as many base factors as possible
5165 into a product before applying __builtin_powi to the result.
5166 However, the sort order chosen for the repeated factor vector
5167 allows us to cache partial results for the product of the base
5168 factors for subsequent use. When we already have a cached partial
5169 result from a previous iteration, it is best to make use of it
5170 before looking for another __builtin_pow opportunity.
5172 As an example, consider x * x * y * y * y * z * z * z * z.
5173 We want to first compose the product x * y * z, raise it to the
5174 second power, then multiply this by y * z, and finally multiply
5175 by z. This can be done in 5 multiplies provided we cache y * z
5176 for use in both expressions:
5178 t1 = y * z
5179 t2 = t1 * x
5180 t3 = t2 * t2
5181 t4 = t1 * t3
5182 result = t4 * z
5184 If we instead ignored the cached y * z and first multiplied by
5185 the __builtin_pow opportunity z * z, we would get the inferior:
5187 t1 = y * z
5188 t2 = t1 * x
5189 t3 = t2 * t2
5190 t4 = z * z
5191 t5 = t3 * t4
5192 result = t5 * y */
5194 vec_len = repeat_factor_vec.length ();
5196 /* Repeatedly look for opportunities to create a builtin_powi call. */
5197 while (true)
5199 HOST_WIDE_INT power;
5201 /* First look for the largest cached product of factors from
5202 preceding iterations. If found, create a builtin_powi for
5203 it if the minimum occurrence count for its factors is at
5204 least 2, or just use this cached product as our next
5205 multiplicand if the minimum occurrence count is 1. */
5206 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5208 if (rf1->repr && rf1->count > 0)
5209 break;
5212 if (j < vec_len)
5214 power = rf1->count;
5216 if (power == 1)
5218 iter_result = rf1->repr;
5220 if (dump_file && (dump_flags & TDF_DETAILS))
5222 unsigned elt;
5223 repeat_factor *rf;
5224 fputs ("Multiplying by cached product ", dump_file);
5225 for (elt = j; elt < vec_len; elt++)
5227 rf = &repeat_factor_vec[elt];
5228 print_generic_expr (dump_file, rf->factor, 0);
5229 if (elt < vec_len - 1)
5230 fputs (" * ", dump_file);
5232 fputs ("\n", dump_file);
5235 else
5237 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5238 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5239 build_int_cst (integer_type_node,
5240 power));
5241 gimple_call_set_lhs (pow_stmt, iter_result);
5242 gimple_set_location (pow_stmt, gimple_location (stmt));
5243 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5244 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5246 if (dump_file && (dump_flags & TDF_DETAILS))
5248 unsigned elt;
5249 repeat_factor *rf;
5250 fputs ("Building __builtin_pow call for cached product (",
5251 dump_file);
5252 for (elt = j; elt < vec_len; elt++)
5254 rf = &repeat_factor_vec[elt];
5255 print_generic_expr (dump_file, rf->factor, 0);
5256 if (elt < vec_len - 1)
5257 fputs (" * ", dump_file);
5259 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5260 power);
5264 else
5266 /* Otherwise, find the first factor in the repeated factor
5267 vector whose occurrence count is at least 2. If no such
5268 factor exists, there are no builtin_powi opportunities
5269 remaining. */
5270 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5272 if (rf1->count >= 2)
5273 break;
5276 if (j >= vec_len)
5277 break;
5279 power = rf1->count;
5281 if (dump_file && (dump_flags & TDF_DETAILS))
5283 unsigned elt;
5284 repeat_factor *rf;
5285 fputs ("Building __builtin_pow call for (", dump_file);
5286 for (elt = j; elt < vec_len; elt++)
5288 rf = &repeat_factor_vec[elt];
5289 print_generic_expr (dump_file, rf->factor, 0);
5290 if (elt < vec_len - 1)
5291 fputs (" * ", dump_file);
5293 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5296 reassociate_stats.pows_created++;
5298 /* Visit each element of the vector in reverse order (so that
5299 high-occurrence elements are visited first, and within the
5300 same occurrence count, lower-ranked elements are visited
5301 first). Form a linear product of all elements in this order
5302 whose occurrencce count is at least that of element J.
5303 Record the SSA name representing the product of each element
5304 with all subsequent elements in the vector. */
5305 if (j == vec_len - 1)
5306 rf1->repr = rf1->factor;
5307 else
5309 for (ii = vec_len - 2; ii >= (int)j; ii--)
5311 tree op1, op2;
5313 rf1 = &repeat_factor_vec[ii];
5314 rf2 = &repeat_factor_vec[ii + 1];
5316 /* Init the last factor's representative to be itself. */
5317 if (!rf2->repr)
5318 rf2->repr = rf2->factor;
5320 op1 = rf1->factor;
5321 op2 = rf2->repr;
5323 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5324 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5325 op1, op2);
5326 gimple_set_location (mul_stmt, gimple_location (stmt));
5327 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5328 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5329 rf1->repr = target_ssa;
5331 /* Don't reprocess the multiply we just introduced. */
5332 gimple_set_visited (mul_stmt, true);
5336 /* Form a call to __builtin_powi for the maximum product
5337 just formed, raised to the power obtained earlier. */
5338 rf1 = &repeat_factor_vec[j];
5339 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5340 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5341 build_int_cst (integer_type_node,
5342 power));
5343 gimple_call_set_lhs (pow_stmt, iter_result);
5344 gimple_set_location (pow_stmt, gimple_location (stmt));
5345 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5346 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5349 /* If we previously formed at least one other builtin_powi call,
5350 form the product of this one and those others. */
5351 if (result)
5353 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5354 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5355 result, iter_result);
5356 gimple_set_location (mul_stmt, gimple_location (stmt));
5357 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5358 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5359 gimple_set_visited (mul_stmt, true);
5360 result = new_result;
5362 else
5363 result = iter_result;
5365 /* Decrement the occurrence count of each element in the product
5366 by the count found above, and remove this many copies of each
5367 factor from OPS. */
5368 for (i = j; i < vec_len; i++)
5370 unsigned k = power;
5371 unsigned n;
5373 rf1 = &repeat_factor_vec[i];
5374 rf1->count -= power;
5376 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5378 if (oe->op == rf1->factor)
5380 if (oe->count <= k)
5382 ops->ordered_remove (n);
5383 k -= oe->count;
5385 if (k == 0)
5386 break;
5388 else
5390 oe->count -= k;
5391 break;
5398 /* At this point all elements in the repeated factor vector have a
5399 remaining occurrence count of 0 or 1, and those with a count of 1
5400 don't have cached representatives. Re-sort the ops vector and
5401 clean up. */
5402 ops->qsort (sort_by_operand_rank);
5403 repeat_factor_vec.release ();
5405 /* Return the final product computed herein. Note that there may
5406 still be some elements with single occurrence count left in OPS;
5407 those will be handled by the normal reassociation logic. */
5408 return result;
5411 /* Attempt to optimize
5412 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5413 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5415 static void
5416 attempt_builtin_copysign (vec<operand_entry *> *ops)
5418 operand_entry *oe;
5419 unsigned int i;
5420 unsigned int length = ops->length ();
5421 tree cst = ops->last ()->op;
5423 if (length == 1 || TREE_CODE (cst) != REAL_CST)
5424 return;
5426 FOR_EACH_VEC_ELT (*ops, i, oe)
5428 if (TREE_CODE (oe->op) == SSA_NAME
5429 && has_single_use (oe->op))
5431 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5432 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5434 tree arg0, arg1;
5435 switch (gimple_call_combined_fn (old_call))
5437 CASE_CFN_COPYSIGN:
5438 arg0 = gimple_call_arg (old_call, 0);
5439 arg1 = gimple_call_arg (old_call, 1);
5440 /* The first argument of copysign must be a constant,
5441 otherwise there's nothing to do. */
5442 if (TREE_CODE (arg0) == REAL_CST)
5444 tree type = TREE_TYPE (arg0);
5445 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5446 /* If we couldn't fold to a single constant, skip it.
5447 That happens e.g. for inexact multiplication when
5448 -frounding-math. */
5449 if (mul == NULL_TREE)
5450 break;
5451 /* Instead of adjusting OLD_CALL, let's build a new
5452 call to not leak the LHS and prevent keeping bogus
5453 debug statements. DCE will clean up the old call. */
5454 gcall *new_call;
5455 if (gimple_call_internal_p (old_call))
5456 new_call = gimple_build_call_internal
5457 (IFN_COPYSIGN, 2, mul, arg1);
5458 else
5459 new_call = gimple_build_call
5460 (gimple_call_fndecl (old_call), 2, mul, arg1);
5461 tree lhs = make_ssa_name (type);
5462 gimple_call_set_lhs (new_call, lhs);
5463 gimple_set_location (new_call,
5464 gimple_location (old_call));
5465 insert_stmt_after (new_call, old_call);
5466 /* We've used the constant, get rid of it. */
5467 ops->pop ();
5468 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5469 /* Handle the CST1 < 0 case by negating the result. */
5470 if (cst1_neg)
5472 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5473 gimple *negate_stmt
5474 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5475 insert_stmt_after (negate_stmt, new_call);
5476 oe->op = negrhs;
5478 else
5479 oe->op = lhs;
5480 if (dump_file && (dump_flags & TDF_DETAILS))
5482 fprintf (dump_file, "Optimizing copysign: ");
5483 print_generic_expr (dump_file, cst, 0);
5484 fprintf (dump_file, " * COPYSIGN (");
5485 print_generic_expr (dump_file, arg0, 0);
5486 fprintf (dump_file, ", ");
5487 print_generic_expr (dump_file, arg1, 0);
5488 fprintf (dump_file, ") into %sCOPYSIGN (",
5489 cst1_neg ? "-" : "");
5490 print_generic_expr (dump_file, mul, 0);
5491 fprintf (dump_file, ", ");
5492 print_generic_expr (dump_file, arg1, 0);
5493 fprintf (dump_file, "\n");
5495 return;
5497 break;
5498 default:
5499 break;
5506 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5508 static void
5509 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5511 tree rhs1;
5513 if (dump_file && (dump_flags & TDF_DETAILS))
5515 fprintf (dump_file, "Transforming ");
5516 print_gimple_stmt (dump_file, stmt, 0, 0);
5519 rhs1 = gimple_assign_rhs1 (stmt);
5520 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5521 update_stmt (stmt);
5522 remove_visited_stmt_chain (rhs1);
5524 if (dump_file && (dump_flags & TDF_DETAILS))
5526 fprintf (dump_file, " into ");
5527 print_gimple_stmt (dump_file, stmt, 0, 0);
5531 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5533 static void
5534 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5535 tree rhs1, tree rhs2)
5537 if (dump_file && (dump_flags & TDF_DETAILS))
5539 fprintf (dump_file, "Transforming ");
5540 print_gimple_stmt (dump_file, stmt, 0, 0);
5543 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5544 update_stmt (gsi_stmt (*gsi));
5545 remove_visited_stmt_chain (rhs1);
5547 if (dump_file && (dump_flags & TDF_DETAILS))
5549 fprintf (dump_file, " into ");
5550 print_gimple_stmt (dump_file, stmt, 0, 0);
5554 /* Reassociate expressions in basic block BB and its post-dominator as
5555 children.
5557 Bubble up return status from maybe_optimize_range_tests. */
5559 static bool
5560 reassociate_bb (basic_block bb)
5562 gimple_stmt_iterator gsi;
5563 basic_block son;
5564 gimple *stmt = last_stmt (bb);
5565 bool cfg_cleanup_needed = false;
5567 if (stmt && !gimple_visited_p (stmt))
5568 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5570 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5572 stmt = gsi_stmt (gsi);
5574 if (is_gimple_assign (stmt)
5575 && !stmt_could_throw_p (stmt))
5577 tree lhs, rhs1, rhs2;
5578 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5580 /* If this is not a gimple binary expression, there is
5581 nothing for us to do with it. */
5582 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5583 continue;
5585 /* If this was part of an already processed statement,
5586 we don't need to touch it again. */
5587 if (gimple_visited_p (stmt))
5589 /* This statement might have become dead because of previous
5590 reassociations. */
5591 if (has_zero_uses (gimple_get_lhs (stmt)))
5593 reassoc_remove_stmt (&gsi);
5594 release_defs (stmt);
5595 /* We might end up removing the last stmt above which
5596 places the iterator to the end of the sequence.
5597 Reset it to the last stmt in this case which might
5598 be the end of the sequence as well if we removed
5599 the last statement of the sequence. In which case
5600 we need to bail out. */
5601 if (gsi_end_p (gsi))
5603 gsi = gsi_last_bb (bb);
5604 if (gsi_end_p (gsi))
5605 break;
5608 continue;
5611 lhs = gimple_assign_lhs (stmt);
5612 rhs1 = gimple_assign_rhs1 (stmt);
5613 rhs2 = gimple_assign_rhs2 (stmt);
5615 /* For non-bit or min/max operations we can't associate
5616 all types. Verify that here. */
5617 if (rhs_code != BIT_IOR_EXPR
5618 && rhs_code != BIT_AND_EXPR
5619 && rhs_code != BIT_XOR_EXPR
5620 && rhs_code != MIN_EXPR
5621 && rhs_code != MAX_EXPR
5622 && (!can_reassociate_p (lhs)
5623 || !can_reassociate_p (rhs1)
5624 || !can_reassociate_p (rhs2)))
5625 continue;
5627 if (associative_tree_code (rhs_code))
5629 auto_vec<operand_entry *> ops;
5630 tree powi_result = NULL_TREE;
5631 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5633 /* There may be no immediate uses left by the time we
5634 get here because we may have eliminated them all. */
5635 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5636 continue;
5638 gimple_set_visited (stmt, true);
5639 linearize_expr_tree (&ops, stmt, true, true);
5640 ops.qsort (sort_by_operand_rank);
5641 optimize_ops_list (rhs_code, &ops);
5642 if (undistribute_ops_list (rhs_code, &ops,
5643 loop_containing_stmt (stmt)))
5645 ops.qsort (sort_by_operand_rank);
5646 optimize_ops_list (rhs_code, &ops);
5649 if (rhs_code == PLUS_EXPR
5650 && transform_add_to_multiply (&ops))
5651 ops.qsort (sort_by_operand_rank);
5653 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5655 if (is_vector)
5656 optimize_vec_cond_expr (rhs_code, &ops);
5657 else
5658 optimize_range_tests (rhs_code, &ops);
5661 if (rhs_code == MULT_EXPR && !is_vector)
5663 attempt_builtin_copysign (&ops);
5665 if (reassoc_insert_powi_p
5666 && flag_unsafe_math_optimizations)
5667 powi_result = attempt_builtin_powi (stmt, &ops);
5670 operand_entry *last;
5671 bool negate_result = false;
5672 if (ops.length () > 1
5673 && rhs_code == MULT_EXPR)
5675 last = ops.last ();
5676 if ((integer_minus_onep (last->op)
5677 || real_minus_onep (last->op))
5678 && !HONOR_SNANS (TREE_TYPE (lhs))
5679 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5680 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5682 ops.pop ();
5683 negate_result = true;
5687 tree new_lhs = lhs;
5688 /* If the operand vector is now empty, all operands were
5689 consumed by the __builtin_powi optimization. */
5690 if (ops.length () == 0)
5691 transform_stmt_to_copy (&gsi, stmt, powi_result);
5692 else if (ops.length () == 1)
5694 tree last_op = ops.last ()->op;
5696 /* If the stmt that defines operand has to be inserted, insert it
5697 before the use. */
5698 if (ops.last ()->stmt_to_insert)
5699 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5700 if (powi_result)
5701 transform_stmt_to_multiply (&gsi, stmt, last_op,
5702 powi_result);
5703 else
5704 transform_stmt_to_copy (&gsi, stmt, last_op);
5706 else
5708 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5709 int ops_num = ops.length ();
5710 int width = get_reassociation_width (ops_num, rhs_code, mode);
5712 if (dump_file && (dump_flags & TDF_DETAILS))
5713 fprintf (dump_file,
5714 "Width = %d was chosen for reassociation\n", width);
5716 if (width > 1
5717 && ops.length () > 3)
5718 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5719 width, ops);
5720 else
5722 /* When there are three operands left, we want
5723 to make sure the ones that get the double
5724 binary op are chosen wisely. */
5725 int len = ops.length ();
5726 if (len >= 3)
5727 swap_ops_for_binary_stmt (ops, len - 3, stmt);
5729 new_lhs = rewrite_expr_tree (stmt, 0, ops,
5730 powi_result != NULL
5731 || negate_result);
5734 /* If we combined some repeated factors into a
5735 __builtin_powi call, multiply that result by the
5736 reassociated operands. */
5737 if (powi_result)
5739 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5740 tree type = TREE_TYPE (lhs);
5741 tree target_ssa = make_temp_ssa_name (type, NULL,
5742 "reassocpow");
5743 gimple_set_lhs (lhs_stmt, target_ssa);
5744 update_stmt (lhs_stmt);
5745 if (lhs != new_lhs)
5747 target_ssa = new_lhs;
5748 new_lhs = lhs;
5750 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5751 powi_result, target_ssa);
5752 gimple_set_location (mul_stmt, gimple_location (stmt));
5753 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5754 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5758 if (negate_result)
5760 stmt = SSA_NAME_DEF_STMT (lhs);
5761 tree tmp = make_ssa_name (TREE_TYPE (lhs));
5762 gimple_set_lhs (stmt, tmp);
5763 if (lhs != new_lhs)
5764 tmp = new_lhs;
5765 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
5766 tmp);
5767 gimple_set_uid (neg_stmt, gimple_uid (stmt));
5768 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
5769 update_stmt (stmt);
5774 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5775 son;
5776 son = next_dom_son (CDI_POST_DOMINATORS, son))
5777 cfg_cleanup_needed |= reassociate_bb (son);
5779 return cfg_cleanup_needed;
5782 /* Add jumps around shifts for range tests turned into bit tests.
5783 For each SSA_NAME VAR we have code like:
5784 VAR = ...; // final stmt of range comparison
5785 // bit test here...;
5786 OTHERVAR = ...; // final stmt of the bit test sequence
5787 RES = VAR | OTHERVAR;
5788 Turn the above into:
5789 VAR = ...;
5790 if (VAR != 0)
5791 goto <l3>;
5792 else
5793 goto <l2>;
5794 <l2>:
5795 // bit test here...;
5796 OTHERVAR = ...;
5797 <l3>:
5798 # RES = PHI<1(l1), OTHERVAR(l2)>; */
5800 static void
5801 branch_fixup (void)
5803 tree var;
5804 unsigned int i;
5806 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
5808 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
5809 gimple *use_stmt;
5810 use_operand_p use;
5811 bool ok = single_imm_use (var, &use, &use_stmt);
5812 gcc_assert (ok
5813 && is_gimple_assign (use_stmt)
5814 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5815 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5817 basic_block cond_bb = gimple_bb (def_stmt);
5818 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5819 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5821 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5822 gimple *g = gimple_build_cond (NE_EXPR, var,
5823 build_zero_cst (TREE_TYPE (var)),
5824 NULL_TREE, NULL_TREE);
5825 location_t loc = gimple_location (use_stmt);
5826 gimple_set_location (g, loc);
5827 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5829 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5830 etrue->probability = REG_BR_PROB_BASE / 2;
5831 etrue->count = cond_bb->count / 2;
5832 edge efalse = find_edge (cond_bb, then_bb);
5833 efalse->flags = EDGE_FALSE_VALUE;
5834 efalse->probability -= etrue->probability;
5835 efalse->count -= etrue->count;
5836 then_bb->count -= etrue->count;
5838 tree othervar = NULL_TREE;
5839 if (gimple_assign_rhs1 (use_stmt) == var)
5840 othervar = gimple_assign_rhs2 (use_stmt);
5841 else if (gimple_assign_rhs2 (use_stmt) == var)
5842 othervar = gimple_assign_rhs1 (use_stmt);
5843 else
5844 gcc_unreachable ();
5845 tree lhs = gimple_assign_lhs (use_stmt);
5846 gphi *phi = create_phi_node (lhs, merge_bb);
5847 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5848 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5849 gsi = gsi_for_stmt (use_stmt);
5850 gsi_remove (&gsi, true);
5852 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5853 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5855 reassoc_branch_fixups.release ();
5858 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5859 void debug_ops_vector (vec<operand_entry *> ops);
5861 /* Dump the operand entry vector OPS to FILE. */
5863 void
5864 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5866 operand_entry *oe;
5867 unsigned int i;
5869 FOR_EACH_VEC_ELT (ops, i, oe)
5871 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5872 print_generic_expr (file, oe->op, 0);
5873 fprintf (file, "\n");
5877 /* Dump the operand entry vector OPS to STDERR. */
5879 DEBUG_FUNCTION void
5880 debug_ops_vector (vec<operand_entry *> ops)
5882 dump_ops_vector (stderr, ops);
5885 /* Bubble up return status from reassociate_bb. */
5887 static bool
5888 do_reassoc (void)
5890 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5891 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5894 /* Initialize the reassociation pass. */
5896 static void
5897 init_reassoc (void)
5899 int i;
5900 long rank = 2;
5901 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5903 /* Find the loops, so that we can prevent moving calculations in
5904 them. */
5905 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5907 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5909 next_operand_entry_id = 0;
5911 /* Reverse RPO (Reverse Post Order) will give us something where
5912 deeper loops come later. */
5913 pre_and_rev_post_order_compute (NULL, bbs, false);
5914 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5915 operand_rank = new hash_map<tree, long>;
5917 /* Give each default definition a distinct rank. This includes
5918 parameters and the static chain. Walk backwards over all
5919 SSA names so that we get proper rank ordering according
5920 to tree_swap_operands_p. */
5921 for (i = num_ssa_names - 1; i > 0; --i)
5923 tree name = ssa_name (i);
5924 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5925 insert_operand_rank (name, ++rank);
5928 /* Set up rank for each BB */
5929 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5930 bb_rank[bbs[i]] = ++rank << 16;
5932 free (bbs);
5933 calculate_dominance_info (CDI_POST_DOMINATORS);
5934 plus_negates = vNULL;
5937 /* Cleanup after the reassociation pass, and print stats if
5938 requested. */
5940 static void
5941 fini_reassoc (void)
5943 statistics_counter_event (cfun, "Linearized",
5944 reassociate_stats.linearized);
5945 statistics_counter_event (cfun, "Constants eliminated",
5946 reassociate_stats.constants_eliminated);
5947 statistics_counter_event (cfun, "Ops eliminated",
5948 reassociate_stats.ops_eliminated);
5949 statistics_counter_event (cfun, "Statements rewritten",
5950 reassociate_stats.rewritten);
5951 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5952 reassociate_stats.pows_encountered);
5953 statistics_counter_event (cfun, "Built-in powi calls created",
5954 reassociate_stats.pows_created);
5956 delete operand_rank;
5957 operand_entry_pool.release ();
5958 free (bb_rank);
5959 plus_negates.release ();
5960 free_dominance_info (CDI_POST_DOMINATORS);
5961 loop_optimizer_finalize ();
5964 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
5965 insertion of __builtin_powi calls.
5967 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5968 optimization of a gimple conditional. Otherwise returns zero. */
5970 static unsigned int
5971 execute_reassoc (bool insert_powi_p)
5973 reassoc_insert_powi_p = insert_powi_p;
5975 init_reassoc ();
5977 bool cfg_cleanup_needed;
5978 cfg_cleanup_needed = do_reassoc ();
5979 repropagate_negates ();
5980 branch_fixup ();
5982 fini_reassoc ();
5983 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
5986 namespace {
5988 const pass_data pass_data_reassoc =
5990 GIMPLE_PASS, /* type */
5991 "reassoc", /* name */
5992 OPTGROUP_NONE, /* optinfo_flags */
5993 TV_TREE_REASSOC, /* tv_id */
5994 ( PROP_cfg | PROP_ssa ), /* properties_required */
5995 0, /* properties_provided */
5996 0, /* properties_destroyed */
5997 0, /* todo_flags_start */
5998 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6001 class pass_reassoc : public gimple_opt_pass
6003 public:
6004 pass_reassoc (gcc::context *ctxt)
6005 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6008 /* opt_pass methods: */
6009 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
6010 void set_pass_param (unsigned int n, bool param)
6012 gcc_assert (n == 0);
6013 insert_powi_p = param;
6015 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
6016 virtual unsigned int execute (function *)
6017 { return execute_reassoc (insert_powi_p); }
6019 private:
6020 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6021 point 3a in the pass header comment. */
6022 bool insert_powi_p;
6023 }; // class pass_reassoc
6025 } // anon namespace
6027 gimple_opt_pass *
6028 make_pass_reassoc (gcc::context *ctxt)
6030 return new pass_reassoc (ctxt);