* tree-ssa-loop-ivopts.c (ivopts_estimate_reg_pressure): New
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob6831f4474515a427839004a53f4685403800e551
1 /* Reassociation for trees.
2 Copyright (C) 2005-2017 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "params.h"
52 #include "builtins.h"
53 #include "gimplify.h"
54 #include "case-cfn-macros.h"
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
60 It consists of five steps:
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_*
70 3. Optimization of the operand lists, eliminating things like a +
71 -a, a & a, etc.
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
80 5. Repropagate negates, as nothing else will clean it up ATM.
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
101 a (1), b (1), c (1), d (2), e (2)
104 We start with our merge worklist empty, and the ops list with all of
105 those on it.
107 You want to first merge all leaves of the same rank, as much as
108 possible.
110 So first build a binary op of
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
122 Then build a binary op of d + e
123 mergetmp2 = d + e
125 and put mergetmp2 on the merge worklist.
127 so merge worklist = {mergetmp, c, mergetmp2}
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
132 So we have
134 build binary op
135 mergetmp3 = mergetmp + c
137 worklist = {mergetmp2, mergetmp3}
139 mergetmp4 = mergetmp2 + mergetmp3
141 worklist = {mergetmp4}
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
152 So why don't we do this?
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
160 mergetmp = op1 + op2
161 newstmt = mergetmp + op3
163 instead of
164 mergetmp = op2 + op3
165 newstmt = mergetmp + op1
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
169 can do.
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179 static bool reassoc_insert_powi_p;
181 /* Statistics */
182 static struct
184 int linearized;
185 int constants_eliminated;
186 int ops_eliminated;
187 int rewritten;
188 int pows_encountered;
189 int pows_created;
190 } reassociate_stats;
192 /* Operator, rank pair. */
193 struct operand_entry
195 unsigned int rank;
196 unsigned int id;
197 tree op;
198 unsigned int count;
199 gimple *stmt_to_insert;
202 static object_allocator<operand_entry> operand_entry_pool
203 ("operand entry pool");
205 /* This is used to assign a unique ID to each struct operand_entry
206 so that qsort results are identical on different hosts. */
207 static unsigned int next_operand_entry_id;
209 /* Starting rank number for a given basic block, so that we can rank
210 operations using unmovable instructions in that BB based on the bb
211 depth. */
212 static long *bb_rank;
214 /* Operand->rank hashtable. */
215 static hash_map<tree, long> *operand_rank;
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218 all basic blocks the CFG should be adjusted - basic blocks
219 split right after that SSA_NAME's definition statement and before
220 the only use, which must be a bit ior. */
221 static vec<tree> reassoc_branch_fixups;
223 /* Forward decls. */
224 static long get_rank (tree);
225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228 possibly added by gsi_remove. */
230 bool
231 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
233 gimple *stmt = gsi_stmt (*gsi);
235 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
236 return gsi_remove (gsi, true);
238 gimple_stmt_iterator prev = *gsi;
239 gsi_prev (&prev);
240 unsigned uid = gimple_uid (stmt);
241 basic_block bb = gimple_bb (stmt);
242 bool ret = gsi_remove (gsi, true);
243 if (!gsi_end_p (prev))
244 gsi_next (&prev);
245 else
246 prev = gsi_start_bb (bb);
247 gimple *end_stmt = gsi_stmt (*gsi);
248 while ((stmt = gsi_stmt (prev)) != end_stmt)
250 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
251 gimple_set_uid (stmt, uid);
252 gsi_next (&prev);
254 return ret;
257 /* Bias amount for loop-carried phis. We want this to be larger than
258 the depth of any reassociation tree we can see, but not larger than
259 the rank difference between two blocks. */
260 #define PHI_LOOP_BIAS (1 << 15)
262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
263 an innermost loop, and the phi has only a single use which is inside
264 the loop, then the rank is the block rank of the loop latch plus an
265 extra bias for the loop-carried dependence. This causes expressions
266 calculated into an accumulator variable to be independent for each
267 iteration of the loop. If STMT is some other phi, the rank is the
268 block rank of its containing block. */
269 static long
270 phi_rank (gimple *stmt)
272 basic_block bb = gimple_bb (stmt);
273 struct loop *father = bb->loop_father;
274 tree res;
275 unsigned i;
276 use_operand_p use;
277 gimple *use_stmt;
279 /* We only care about real loops (those with a latch). */
280 if (!father->latch)
281 return bb_rank[bb->index];
283 /* Interesting phis must be in headers of innermost loops. */
284 if (bb != father->header
285 || father->inner)
286 return bb_rank[bb->index];
288 /* Ignore virtual SSA_NAMEs. */
289 res = gimple_phi_result (stmt);
290 if (virtual_operand_p (res))
291 return bb_rank[bb->index];
293 /* The phi definition must have a single use, and that use must be
294 within the loop. Otherwise this isn't an accumulator pattern. */
295 if (!single_imm_use (res, &use, &use_stmt)
296 || gimple_bb (use_stmt)->loop_father != father)
297 return bb_rank[bb->index];
299 /* Look for phi arguments from within the loop. If found, bias this phi. */
300 for (i = 0; i < gimple_phi_num_args (stmt); i++)
302 tree arg = gimple_phi_arg_def (stmt, i);
303 if (TREE_CODE (arg) == SSA_NAME
304 && !SSA_NAME_IS_DEFAULT_DEF (arg))
306 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
307 if (gimple_bb (def_stmt)->loop_father == father)
308 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
312 /* Must be an uninteresting phi. */
313 return bb_rank[bb->index];
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317 loop-carried dependence of an innermost loop, return TRUE; else
318 return FALSE. */
319 static bool
320 loop_carried_phi (tree exp)
322 gimple *phi_stmt;
323 long block_rank;
325 if (TREE_CODE (exp) != SSA_NAME
326 || SSA_NAME_IS_DEFAULT_DEF (exp))
327 return false;
329 phi_stmt = SSA_NAME_DEF_STMT (exp);
331 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
332 return false;
334 /* Non-loop-carried phis have block rank. Loop-carried phis have
335 an additional bias added in. If this phi doesn't have block rank,
336 it's biased and should not be propagated. */
337 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
339 if (phi_rank (phi_stmt) != block_rank)
340 return true;
342 return false;
345 /* Return the maximum of RANK and the rank that should be propagated
346 from expression OP. For most operands, this is just the rank of OP.
347 For loop-carried phis, the value is zero to avoid undoing the bias
348 in favor of the phi. */
349 static long
350 propagate_rank (long rank, tree op)
352 long op_rank;
354 if (loop_carried_phi (op))
355 return rank;
357 op_rank = get_rank (op);
359 return MAX (rank, op_rank);
362 /* Look up the operand rank structure for expression E. */
364 static inline long
365 find_operand_rank (tree e)
367 long *slot = operand_rank->get (e);
368 return slot ? *slot : -1;
371 /* Insert {E,RANK} into the operand rank hashtable. */
373 static inline void
374 insert_operand_rank (tree e, long rank)
376 gcc_assert (rank > 0);
377 gcc_assert (!operand_rank->put (e, rank));
380 /* Given an expression E, return the rank of the expression. */
382 static long
383 get_rank (tree e)
385 /* SSA_NAME's have the rank of the expression they are the result
387 For globals and uninitialized values, the rank is 0.
388 For function arguments, use the pre-setup rank.
389 For PHI nodes, stores, asm statements, etc, we use the rank of
390 the BB.
391 For simple operations, the rank is the maximum rank of any of
392 its operands, or the bb_rank, whichever is less.
393 I make no claims that this is optimal, however, it gives good
394 results. */
396 /* We make an exception to the normal ranking system to break
397 dependences of accumulator variables in loops. Suppose we
398 have a simple one-block loop containing:
400 x_1 = phi(x_0, x_2)
401 b = a + x_1
402 c = b + d
403 x_2 = c + e
405 As shown, each iteration of the calculation into x is fully
406 dependent upon the iteration before it. We would prefer to
407 see this in the form:
409 x_1 = phi(x_0, x_2)
410 b = a + d
411 c = b + e
412 x_2 = c + x_1
414 If the loop is unrolled, the calculations of b and c from
415 different iterations can be interleaved.
417 To obtain this result during reassociation, we bias the rank
418 of the phi definition x_1 upward, when it is recognized as an
419 accumulator pattern. The artificial rank causes it to be
420 added last, providing the desired independence. */
422 if (TREE_CODE (e) == SSA_NAME)
424 ssa_op_iter iter;
425 gimple *stmt;
426 long rank;
427 tree op;
429 if (SSA_NAME_IS_DEFAULT_DEF (e))
430 return find_operand_rank (e);
432 stmt = SSA_NAME_DEF_STMT (e);
433 if (gimple_code (stmt) == GIMPLE_PHI)
434 return phi_rank (stmt);
436 if (!is_gimple_assign (stmt))
437 return bb_rank[gimple_bb (stmt)->index];
439 /* If we already have a rank for this expression, use that. */
440 rank = find_operand_rank (e);
441 if (rank != -1)
442 return rank;
444 /* Otherwise, find the maximum rank for the operands. As an
445 exception, remove the bias from loop-carried phis when propagating
446 the rank so that dependent operations are not also biased. */
447 /* Simply walk over all SSA uses - this takes advatage of the
448 fact that non-SSA operands are is_gimple_min_invariant and
449 thus have rank 0. */
450 rank = 0;
451 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
452 rank = propagate_rank (rank, op);
454 if (dump_file && (dump_flags & TDF_DETAILS))
456 fprintf (dump_file, "Rank for ");
457 print_generic_expr (dump_file, e);
458 fprintf (dump_file, " is %ld\n", (rank + 1));
461 /* Note the rank in the hashtable so we don't recompute it. */
462 insert_operand_rank (e, (rank + 1));
463 return (rank + 1);
466 /* Constants, globals, etc., are rank 0 */
467 return 0;
471 /* We want integer ones to end up last no matter what, since they are
472 the ones we can do the most with. */
473 #define INTEGER_CONST_TYPE 1 << 3
474 #define FLOAT_CONST_TYPE 1 << 2
475 #define OTHER_CONST_TYPE 1 << 1
477 /* Classify an invariant tree into integer, float, or other, so that
478 we can sort them to be near other constants of the same type. */
479 static inline int
480 constant_type (tree t)
482 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
483 return INTEGER_CONST_TYPE;
484 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
485 return FLOAT_CONST_TYPE;
486 else
487 return OTHER_CONST_TYPE;
490 /* qsort comparison function to sort operand entries PA and PB by rank
491 so that the sorted array is ordered by rank in decreasing order. */
492 static int
493 sort_by_operand_rank (const void *pa, const void *pb)
495 const operand_entry *oea = *(const operand_entry *const *)pa;
496 const operand_entry *oeb = *(const operand_entry *const *)pb;
498 /* It's nicer for optimize_expression if constants that are likely
499 to fold when added/multiplied//whatever are put next to each
500 other. Since all constants have rank 0, order them by type. */
501 if (oeb->rank == 0 && oea->rank == 0)
503 if (constant_type (oeb->op) != constant_type (oea->op))
504 return constant_type (oeb->op) - constant_type (oea->op);
505 else
506 /* To make sorting result stable, we use unique IDs to determine
507 order. */
508 return oeb->id > oea->id ? 1 : -1;
511 /* Lastly, make sure the versions that are the same go next to each
512 other. */
513 if (oeb->rank == oea->rank
514 && TREE_CODE (oea->op) == SSA_NAME
515 && TREE_CODE (oeb->op) == SSA_NAME)
517 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
518 versions of removed SSA_NAMEs, so if possible, prefer to sort
519 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
520 See PR60418. */
521 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
522 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
523 && !oea->stmt_to_insert
524 && !oeb->stmt_to_insert
525 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
527 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
528 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
529 basic_block bba = gimple_bb (stmta);
530 basic_block bbb = gimple_bb (stmtb);
531 if (bbb != bba)
533 if (bb_rank[bbb->index] != bb_rank[bba->index])
534 return bb_rank[bbb->index] - bb_rank[bba->index];
536 else
538 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
539 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
540 if (da != db)
541 return da ? 1 : -1;
545 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
546 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
547 else
548 return oeb->id > oea->id ? 1 : -1;
551 if (oeb->rank != oea->rank)
552 return oeb->rank > oea->rank ? 1 : -1;
553 else
554 return oeb->id > oea->id ? 1 : -1;
557 /* Add an operand entry to *OPS for the tree operand OP. */
559 static void
560 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
562 operand_entry *oe = operand_entry_pool.allocate ();
564 oe->op = op;
565 oe->rank = get_rank (op);
566 oe->id = next_operand_entry_id++;
567 oe->count = 1;
568 oe->stmt_to_insert = stmt_to_insert;
569 ops->safe_push (oe);
572 /* Add an operand entry to *OPS for the tree operand OP with repeat
573 count REPEAT. */
575 static void
576 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
577 HOST_WIDE_INT repeat)
579 operand_entry *oe = operand_entry_pool.allocate ();
581 oe->op = op;
582 oe->rank = get_rank (op);
583 oe->id = next_operand_entry_id++;
584 oe->count = repeat;
585 oe->stmt_to_insert = NULL;
586 ops->safe_push (oe);
588 reassociate_stats.pows_encountered++;
591 /* Return true if STMT is reassociable operation containing a binary
592 operation with tree code CODE, and is inside LOOP. */
594 static bool
595 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
597 basic_block bb = gimple_bb (stmt);
599 if (gimple_bb (stmt) == NULL)
600 return false;
602 if (!flow_bb_inside_loop_p (loop, bb))
603 return false;
605 if (is_gimple_assign (stmt)
606 && gimple_assign_rhs_code (stmt) == code
607 && has_single_use (gimple_assign_lhs (stmt)))
609 tree rhs1 = gimple_assign_rhs1 (stmt);
610 tree rhs2 = gimple_assign_rhs1 (stmt);
611 if (TREE_CODE (rhs1) == SSA_NAME
612 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
613 return false;
614 if (rhs2
615 && TREE_CODE (rhs2) == SSA_NAME
616 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
617 return false;
618 return true;
621 return false;
625 /* Return true if STMT is a nop-conversion. */
627 static bool
628 gimple_nop_conversion_p (gimple *stmt)
630 if (gassign *ass = dyn_cast <gassign *> (stmt))
632 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
633 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
634 TREE_TYPE (gimple_assign_rhs1 (ass))))
635 return true;
637 return false;
640 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
641 operand of the negate operation. Otherwise, return NULL. */
643 static tree
644 get_unary_op (tree name, enum tree_code opcode)
646 gimple *stmt = SSA_NAME_DEF_STMT (name);
648 /* Look through nop conversions (sign changes). */
649 if (gimple_nop_conversion_p (stmt)
650 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
651 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
653 if (!is_gimple_assign (stmt))
654 return NULL_TREE;
656 if (gimple_assign_rhs_code (stmt) == opcode)
657 return gimple_assign_rhs1 (stmt);
658 return NULL_TREE;
661 /* Return true if OP1 and OP2 have the same value if casted to either type. */
663 static bool
664 ops_equal_values_p (tree op1, tree op2)
666 if (op1 == op2)
667 return true;
669 tree orig_op1 = op1;
670 if (TREE_CODE (op1) == SSA_NAME)
672 gimple *stmt = SSA_NAME_DEF_STMT (op1);
673 if (gimple_nop_conversion_p (stmt))
675 op1 = gimple_assign_rhs1 (stmt);
676 if (op1 == op2)
677 return true;
681 if (TREE_CODE (op2) == SSA_NAME)
683 gimple *stmt = SSA_NAME_DEF_STMT (op2);
684 if (gimple_nop_conversion_p (stmt))
686 op2 = gimple_assign_rhs1 (stmt);
687 if (op1 == op2
688 || orig_op1 == op2)
689 return true;
693 return false;
697 /* If CURR and LAST are a pair of ops that OPCODE allows us to
698 eliminate through equivalences, do so, remove them from OPS, and
699 return true. Otherwise, return false. */
701 static bool
702 eliminate_duplicate_pair (enum tree_code opcode,
703 vec<operand_entry *> *ops,
704 bool *all_done,
705 unsigned int i,
706 operand_entry *curr,
707 operand_entry *last)
710 /* If we have two of the same op, and the opcode is & |, min, or max,
711 we can eliminate one of them.
712 If we have two of the same op, and the opcode is ^, we can
713 eliminate both of them. */
715 if (last && last->op == curr->op)
717 switch (opcode)
719 case MAX_EXPR:
720 case MIN_EXPR:
721 case BIT_IOR_EXPR:
722 case BIT_AND_EXPR:
723 if (dump_file && (dump_flags & TDF_DETAILS))
725 fprintf (dump_file, "Equivalence: ");
726 print_generic_expr (dump_file, curr->op);
727 fprintf (dump_file, " [&|minmax] ");
728 print_generic_expr (dump_file, last->op);
729 fprintf (dump_file, " -> ");
730 print_generic_stmt (dump_file, last->op);
733 ops->ordered_remove (i);
734 reassociate_stats.ops_eliminated ++;
736 return true;
738 case BIT_XOR_EXPR:
739 if (dump_file && (dump_flags & TDF_DETAILS))
741 fprintf (dump_file, "Equivalence: ");
742 print_generic_expr (dump_file, curr->op);
743 fprintf (dump_file, " ^ ");
744 print_generic_expr (dump_file, last->op);
745 fprintf (dump_file, " -> nothing\n");
748 reassociate_stats.ops_eliminated += 2;
750 if (ops->length () == 2)
752 ops->truncate (0);
753 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
754 *all_done = true;
756 else
758 ops->ordered_remove (i-1);
759 ops->ordered_remove (i-1);
762 return true;
764 default:
765 break;
768 return false;
771 static vec<tree> plus_negates;
773 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
774 expression, look in OPS for a corresponding positive operation to cancel
775 it out. If we find one, remove the other from OPS, replace
776 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
777 return false. */
779 static bool
780 eliminate_plus_minus_pair (enum tree_code opcode,
781 vec<operand_entry *> *ops,
782 unsigned int currindex,
783 operand_entry *curr)
785 tree negateop;
786 tree notop;
787 unsigned int i;
788 operand_entry *oe;
790 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
791 return false;
793 negateop = get_unary_op (curr->op, NEGATE_EXPR);
794 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
795 if (negateop == NULL_TREE && notop == NULL_TREE)
796 return false;
798 /* Any non-negated version will have a rank that is one less than
799 the current rank. So once we hit those ranks, if we don't find
800 one, we can stop. */
802 for (i = currindex + 1;
803 ops->iterate (i, &oe)
804 && oe->rank >= curr->rank - 1 ;
805 i++)
807 if (negateop
808 && ops_equal_values_p (oe->op, negateop))
810 if (dump_file && (dump_flags & TDF_DETAILS))
812 fprintf (dump_file, "Equivalence: ");
813 print_generic_expr (dump_file, negateop);
814 fprintf (dump_file, " + -");
815 print_generic_expr (dump_file, oe->op);
816 fprintf (dump_file, " -> 0\n");
819 ops->ordered_remove (i);
820 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
821 ops->ordered_remove (currindex);
822 reassociate_stats.ops_eliminated ++;
824 return true;
826 else if (notop
827 && ops_equal_values_p (oe->op, notop))
829 tree op_type = TREE_TYPE (oe->op);
831 if (dump_file && (dump_flags & TDF_DETAILS))
833 fprintf (dump_file, "Equivalence: ");
834 print_generic_expr (dump_file, notop);
835 fprintf (dump_file, " + ~");
836 print_generic_expr (dump_file, oe->op);
837 fprintf (dump_file, " -> -1\n");
840 ops->ordered_remove (i);
841 add_to_ops_vec (ops, build_all_ones_cst (op_type));
842 ops->ordered_remove (currindex);
843 reassociate_stats.ops_eliminated ++;
845 return true;
849 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
850 save it for later inspection in repropagate_negates(). */
851 if (negateop != NULL_TREE
852 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
853 plus_negates.safe_push (curr->op);
855 return false;
858 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
859 bitwise not expression, look in OPS for a corresponding operand to
860 cancel it out. If we find one, remove the other from OPS, replace
861 OPS[CURRINDEX] with 0, and return true. Otherwise, return
862 false. */
864 static bool
865 eliminate_not_pairs (enum tree_code opcode,
866 vec<operand_entry *> *ops,
867 unsigned int currindex,
868 operand_entry *curr)
870 tree notop;
871 unsigned int i;
872 operand_entry *oe;
874 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
875 || TREE_CODE (curr->op) != SSA_NAME)
876 return false;
878 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
879 if (notop == NULL_TREE)
880 return false;
882 /* Any non-not version will have a rank that is one less than
883 the current rank. So once we hit those ranks, if we don't find
884 one, we can stop. */
886 for (i = currindex + 1;
887 ops->iterate (i, &oe)
888 && oe->rank >= curr->rank - 1;
889 i++)
891 if (oe->op == notop)
893 if (dump_file && (dump_flags & TDF_DETAILS))
895 fprintf (dump_file, "Equivalence: ");
896 print_generic_expr (dump_file, notop);
897 if (opcode == BIT_AND_EXPR)
898 fprintf (dump_file, " & ~");
899 else if (opcode == BIT_IOR_EXPR)
900 fprintf (dump_file, " | ~");
901 print_generic_expr (dump_file, oe->op);
902 if (opcode == BIT_AND_EXPR)
903 fprintf (dump_file, " -> 0\n");
904 else if (opcode == BIT_IOR_EXPR)
905 fprintf (dump_file, " -> -1\n");
908 if (opcode == BIT_AND_EXPR)
909 oe->op = build_zero_cst (TREE_TYPE (oe->op));
910 else if (opcode == BIT_IOR_EXPR)
911 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
913 reassociate_stats.ops_eliminated += ops->length () - 1;
914 ops->truncate (0);
915 ops->quick_push (oe);
916 return true;
920 return false;
923 /* Use constant value that may be present in OPS to try to eliminate
924 operands. Note that this function is only really used when we've
925 eliminated ops for other reasons, or merged constants. Across
926 single statements, fold already does all of this, plus more. There
927 is little point in duplicating logic, so I've only included the
928 identities that I could ever construct testcases to trigger. */
930 static void
931 eliminate_using_constants (enum tree_code opcode,
932 vec<operand_entry *> *ops)
934 operand_entry *oelast = ops->last ();
935 tree type = TREE_TYPE (oelast->op);
937 if (oelast->rank == 0
938 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
940 switch (opcode)
942 case BIT_AND_EXPR:
943 if (integer_zerop (oelast->op))
945 if (ops->length () != 1)
947 if (dump_file && (dump_flags & TDF_DETAILS))
948 fprintf (dump_file, "Found & 0, removing all other ops\n");
950 reassociate_stats.ops_eliminated += ops->length () - 1;
952 ops->truncate (0);
953 ops->quick_push (oelast);
954 return;
957 else if (integer_all_onesp (oelast->op))
959 if (ops->length () != 1)
961 if (dump_file && (dump_flags & TDF_DETAILS))
962 fprintf (dump_file, "Found & -1, removing\n");
963 ops->pop ();
964 reassociate_stats.ops_eliminated++;
967 break;
968 case BIT_IOR_EXPR:
969 if (integer_all_onesp (oelast->op))
971 if (ops->length () != 1)
973 if (dump_file && (dump_flags & TDF_DETAILS))
974 fprintf (dump_file, "Found | -1, removing all other ops\n");
976 reassociate_stats.ops_eliminated += ops->length () - 1;
978 ops->truncate (0);
979 ops->quick_push (oelast);
980 return;
983 else if (integer_zerop (oelast->op))
985 if (ops->length () != 1)
987 if (dump_file && (dump_flags & TDF_DETAILS))
988 fprintf (dump_file, "Found | 0, removing\n");
989 ops->pop ();
990 reassociate_stats.ops_eliminated++;
993 break;
994 case MULT_EXPR:
995 if (integer_zerop (oelast->op)
996 || (FLOAT_TYPE_P (type)
997 && !HONOR_NANS (type)
998 && !HONOR_SIGNED_ZEROS (type)
999 && real_zerop (oelast->op)))
1001 if (ops->length () != 1)
1003 if (dump_file && (dump_flags & TDF_DETAILS))
1004 fprintf (dump_file, "Found * 0, removing all other ops\n");
1006 reassociate_stats.ops_eliminated += ops->length () - 1;
1007 ops->truncate (1);
1008 ops->quick_push (oelast);
1009 return;
1012 else if (integer_onep (oelast->op)
1013 || (FLOAT_TYPE_P (type)
1014 && !HONOR_SNANS (type)
1015 && real_onep (oelast->op)))
1017 if (ops->length () != 1)
1019 if (dump_file && (dump_flags & TDF_DETAILS))
1020 fprintf (dump_file, "Found * 1, removing\n");
1021 ops->pop ();
1022 reassociate_stats.ops_eliminated++;
1023 return;
1026 break;
1027 case BIT_XOR_EXPR:
1028 case PLUS_EXPR:
1029 case MINUS_EXPR:
1030 if (integer_zerop (oelast->op)
1031 || (FLOAT_TYPE_P (type)
1032 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1033 && fold_real_zero_addition_p (type, oelast->op,
1034 opcode == MINUS_EXPR)))
1036 if (ops->length () != 1)
1038 if (dump_file && (dump_flags & TDF_DETAILS))
1039 fprintf (dump_file, "Found [|^+] 0, removing\n");
1040 ops->pop ();
1041 reassociate_stats.ops_eliminated++;
1042 return;
1045 break;
1046 default:
1047 break;
1053 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1054 bool, bool);
1056 /* Structure for tracking and counting operands. */
1057 struct oecount {
1058 unsigned int cnt;
1059 unsigned int id;
1060 enum tree_code oecode;
1061 tree op;
1065 /* The heap for the oecount hashtable and the sorted list of operands. */
1066 static vec<oecount> cvec;
1069 /* Oecount hashtable helpers. */
1071 struct oecount_hasher : int_hash <int, 0, 1>
1073 static inline hashval_t hash (int);
1074 static inline bool equal (int, int);
1077 /* Hash function for oecount. */
1079 inline hashval_t
1080 oecount_hasher::hash (int p)
1082 const oecount *c = &cvec[p - 42];
1083 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1086 /* Comparison function for oecount. */
1088 inline bool
1089 oecount_hasher::equal (int p1, int p2)
1091 const oecount *c1 = &cvec[p1 - 42];
1092 const oecount *c2 = &cvec[p2 - 42];
1093 return c1->oecode == c2->oecode && c1->op == c2->op;
1096 /* Comparison function for qsort sorting oecount elements by count. */
1098 static int
1099 oecount_cmp (const void *p1, const void *p2)
1101 const oecount *c1 = (const oecount *)p1;
1102 const oecount *c2 = (const oecount *)p2;
1103 if (c1->cnt != c2->cnt)
1104 return c1->cnt > c2->cnt ? 1 : -1;
1105 else
1106 /* If counts are identical, use unique IDs to stabilize qsort. */
1107 return c1->id > c2->id ? 1 : -1;
1110 /* Return TRUE iff STMT represents a builtin call that raises OP
1111 to some exponent. */
1113 static bool
1114 stmt_is_power_of_op (gimple *stmt, tree op)
1116 if (!is_gimple_call (stmt))
1117 return false;
1119 switch (gimple_call_combined_fn (stmt))
1121 CASE_CFN_POW:
1122 CASE_CFN_POWI:
1123 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1125 default:
1126 return false;
1130 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1131 in place and return the result. Assumes that stmt_is_power_of_op
1132 was previously called for STMT and returned TRUE. */
1134 static HOST_WIDE_INT
1135 decrement_power (gimple *stmt)
1137 REAL_VALUE_TYPE c, cint;
1138 HOST_WIDE_INT power;
1139 tree arg1;
1141 switch (gimple_call_combined_fn (stmt))
1143 CASE_CFN_POW:
1144 arg1 = gimple_call_arg (stmt, 1);
1145 c = TREE_REAL_CST (arg1);
1146 power = real_to_integer (&c) - 1;
1147 real_from_integer (&cint, VOIDmode, power, SIGNED);
1148 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1149 return power;
1151 CASE_CFN_POWI:
1152 arg1 = gimple_call_arg (stmt, 1);
1153 power = TREE_INT_CST_LOW (arg1) - 1;
1154 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1155 return power;
1157 default:
1158 gcc_unreachable ();
1162 /* Replace SSA defined by STMT and replace all its uses with new
1163 SSA. Also return the new SSA. */
1165 static tree
1166 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1168 gimple *use_stmt;
1169 use_operand_p use;
1170 imm_use_iterator iter;
1171 tree new_lhs, new_debug_lhs = NULL_TREE;
1172 tree lhs = gimple_get_lhs (stmt);
1174 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1175 gimple_set_lhs (stmt, new_lhs);
1177 /* Also need to update GIMPLE_DEBUGs. */
1178 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1180 tree repl = new_lhs;
1181 if (is_gimple_debug (use_stmt))
1183 if (new_debug_lhs == NULL_TREE)
1185 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1186 gdebug *def_temp
1187 = gimple_build_debug_bind (new_debug_lhs,
1188 build2 (opcode, TREE_TYPE (lhs),
1189 new_lhs, op),
1190 stmt);
1191 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1192 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1193 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1194 gimple_set_uid (def_temp, gimple_uid (stmt));
1195 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1196 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1198 repl = new_debug_lhs;
1200 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1201 SET_USE (use, repl);
1202 update_stmt (use_stmt);
1204 return new_lhs;
1207 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1208 uses with new SSAs. Also do this for the stmt that defines DEF
1209 if *DEF is not OP. */
1211 static void
1212 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1213 vec<gimple *> &stmts_to_fix)
1215 unsigned i;
1216 gimple *stmt;
1218 if (*def != op
1219 && TREE_CODE (*def) == SSA_NAME
1220 && (stmt = SSA_NAME_DEF_STMT (*def))
1221 && gimple_code (stmt) != GIMPLE_NOP)
1222 *def = make_new_ssa_for_def (stmt, opcode, op);
1224 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1225 make_new_ssa_for_def (stmt, opcode, op);
1228 /* Find the single immediate use of STMT's LHS, and replace it
1229 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1230 replace *DEF with OP as well. */
1232 static void
1233 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1235 tree lhs;
1236 gimple *use_stmt;
1237 use_operand_p use;
1238 gimple_stmt_iterator gsi;
1240 if (is_gimple_call (stmt))
1241 lhs = gimple_call_lhs (stmt);
1242 else
1243 lhs = gimple_assign_lhs (stmt);
1245 gcc_assert (has_single_use (lhs));
1246 single_imm_use (lhs, &use, &use_stmt);
1247 if (lhs == *def)
1248 *def = op;
1249 SET_USE (use, op);
1250 if (TREE_CODE (op) != SSA_NAME)
1251 update_stmt (use_stmt);
1252 gsi = gsi_for_stmt (stmt);
1253 unlink_stmt_vdef (stmt);
1254 reassoc_remove_stmt (&gsi);
1255 release_defs (stmt);
1258 /* Walks the linear chain with result *DEF searching for an operation
1259 with operand OP and code OPCODE removing that from the chain. *DEF
1260 is updated if there is only one operand but no operation left. */
1262 static void
1263 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1265 tree orig_def = *def;
1266 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1267 /* PR72835 - Record the stmt chain that has to be updated such that
1268 we dont use the same LHS when the values computed are different. */
1269 auto_vec<gimple *, 64> stmts_to_fix;
1273 tree name;
1275 if (opcode == MULT_EXPR)
1277 if (stmt_is_power_of_op (stmt, op))
1279 if (decrement_power (stmt) == 1)
1281 if (stmts_to_fix.length () > 0)
1282 stmts_to_fix.pop ();
1283 propagate_op_to_single_use (op, stmt, def);
1285 break;
1287 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1289 if (gimple_assign_rhs1 (stmt) == op)
1291 tree cst = build_minus_one_cst (TREE_TYPE (op));
1292 if (stmts_to_fix.length () > 0)
1293 stmts_to_fix.pop ();
1294 propagate_op_to_single_use (cst, stmt, def);
1295 break;
1297 else if (integer_minus_onep (op)
1298 || real_minus_onep (op))
1300 gimple_assign_set_rhs_code
1301 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1302 break;
1307 name = gimple_assign_rhs1 (stmt);
1309 /* If this is the operation we look for and one of the operands
1310 is ours simply propagate the other operand into the stmts
1311 single use. */
1312 if (gimple_assign_rhs_code (stmt) == opcode
1313 && (name == op
1314 || gimple_assign_rhs2 (stmt) == op))
1316 if (name == op)
1317 name = gimple_assign_rhs2 (stmt);
1318 if (stmts_to_fix.length () > 0)
1319 stmts_to_fix.pop ();
1320 propagate_op_to_single_use (name, stmt, def);
1321 break;
1324 /* We might have a multiply of two __builtin_pow* calls, and
1325 the operand might be hiding in the rightmost one. Likewise
1326 this can happen for a negate. */
1327 if (opcode == MULT_EXPR
1328 && gimple_assign_rhs_code (stmt) == opcode
1329 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1330 && has_single_use (gimple_assign_rhs2 (stmt)))
1332 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1333 if (stmt_is_power_of_op (stmt2, op))
1335 if (decrement_power (stmt2) == 1)
1336 propagate_op_to_single_use (op, stmt2, def);
1337 else
1338 stmts_to_fix.safe_push (stmt2);
1339 break;
1341 else if (is_gimple_assign (stmt2)
1342 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1344 if (gimple_assign_rhs1 (stmt2) == op)
1346 tree cst = build_minus_one_cst (TREE_TYPE (op));
1347 propagate_op_to_single_use (cst, stmt2, def);
1348 break;
1350 else if (integer_minus_onep (op)
1351 || real_minus_onep (op))
1353 stmts_to_fix.safe_push (stmt2);
1354 gimple_assign_set_rhs_code
1355 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1356 break;
1361 /* Continue walking the chain. */
1362 gcc_assert (name != op
1363 && TREE_CODE (name) == SSA_NAME);
1364 stmt = SSA_NAME_DEF_STMT (name);
1365 stmts_to_fix.safe_push (stmt);
1367 while (1);
1369 if (stmts_to_fix.length () > 0 || *def == orig_def)
1370 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1373 /* Returns true if statement S1 dominates statement S2. Like
1374 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1376 static bool
1377 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1379 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1381 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1382 SSA_NAME. Assume it lives at the beginning of function and
1383 thus dominates everything. */
1384 if (!bb1 || s1 == s2)
1385 return true;
1387 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1388 if (!bb2)
1389 return false;
1391 if (bb1 == bb2)
1393 /* PHIs in the same basic block are assumed to be
1394 executed all in parallel, if only one stmt is a PHI,
1395 it dominates the other stmt in the same basic block. */
1396 if (gimple_code (s1) == GIMPLE_PHI)
1397 return true;
1399 if (gimple_code (s2) == GIMPLE_PHI)
1400 return false;
1402 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1404 if (gimple_uid (s1) < gimple_uid (s2))
1405 return true;
1407 if (gimple_uid (s1) > gimple_uid (s2))
1408 return false;
1410 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1411 unsigned int uid = gimple_uid (s1);
1412 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1414 gimple *s = gsi_stmt (gsi);
1415 if (gimple_uid (s) != uid)
1416 break;
1417 if (s == s2)
1418 return true;
1421 return false;
1424 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1427 /* Insert STMT after INSERT_POINT. */
1429 static void
1430 insert_stmt_after (gimple *stmt, gimple *insert_point)
1432 gimple_stmt_iterator gsi;
1433 basic_block bb;
1435 if (gimple_code (insert_point) == GIMPLE_PHI)
1436 bb = gimple_bb (insert_point);
1437 else if (!stmt_ends_bb_p (insert_point))
1439 gsi = gsi_for_stmt (insert_point);
1440 gimple_set_uid (stmt, gimple_uid (insert_point));
1441 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1442 return;
1444 else
1445 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1446 thus if it must end a basic block, it should be a call that can
1447 throw, or some assignment that can throw. If it throws, the LHS
1448 of it will not be initialized though, so only valid places using
1449 the SSA_NAME should be dominated by the fallthru edge. */
1450 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1451 gsi = gsi_after_labels (bb);
1452 if (gsi_end_p (gsi))
1454 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1455 gimple_set_uid (stmt,
1456 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1458 else
1459 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1460 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1463 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1464 the result. Places the statement after the definition of either
1465 OP1 or OP2. Returns the new statement. */
1467 static gimple *
1468 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1470 gimple *op1def = NULL, *op2def = NULL;
1471 gimple_stmt_iterator gsi;
1472 tree op;
1473 gassign *sum;
1475 /* Create the addition statement. */
1476 op = make_ssa_name (type);
1477 sum = gimple_build_assign (op, opcode, op1, op2);
1479 /* Find an insertion place and insert. */
1480 if (TREE_CODE (op1) == SSA_NAME)
1481 op1def = SSA_NAME_DEF_STMT (op1);
1482 if (TREE_CODE (op2) == SSA_NAME)
1483 op2def = SSA_NAME_DEF_STMT (op2);
1484 if ((!op1def || gimple_nop_p (op1def))
1485 && (!op2def || gimple_nop_p (op2def)))
1487 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1488 if (gsi_end_p (gsi))
1490 gimple_stmt_iterator gsi2
1491 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1492 gimple_set_uid (sum,
1493 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1495 else
1496 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1497 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1499 else
1501 gimple *insert_point;
1502 if ((!op1def || gimple_nop_p (op1def))
1503 || (op2def && !gimple_nop_p (op2def)
1504 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1505 insert_point = op2def;
1506 else
1507 insert_point = op1def;
1508 insert_stmt_after (sum, insert_point);
1510 update_stmt (sum);
1512 return sum;
1515 /* Perform un-distribution of divisions and multiplications.
1516 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1517 to (A + B) / X for real X.
1519 The algorithm is organized as follows.
1521 - First we walk the addition chain *OPS looking for summands that
1522 are defined by a multiplication or a real division. This results
1523 in the candidates bitmap with relevant indices into *OPS.
1525 - Second we build the chains of multiplications or divisions for
1526 these candidates, counting the number of occurrences of (operand, code)
1527 pairs in all of the candidates chains.
1529 - Third we sort the (operand, code) pairs by number of occurrence and
1530 process them starting with the pair with the most uses.
1532 * For each such pair we walk the candidates again to build a
1533 second candidate bitmap noting all multiplication/division chains
1534 that have at least one occurrence of (operand, code).
1536 * We build an alternate addition chain only covering these
1537 candidates with one (operand, code) operation removed from their
1538 multiplication/division chain.
1540 * The first candidate gets replaced by the alternate addition chain
1541 multiplied/divided by the operand.
1543 * All candidate chains get disabled for further processing and
1544 processing of (operand, code) pairs continues.
1546 The alternate addition chains built are re-processed by the main
1547 reassociation algorithm which allows optimizing a * x * y + b * y * x
1548 to (a + b ) * x * y in one invocation of the reassociation pass. */
1550 static bool
1551 undistribute_ops_list (enum tree_code opcode,
1552 vec<operand_entry *> *ops, struct loop *loop)
1554 unsigned int length = ops->length ();
1555 operand_entry *oe1;
1556 unsigned i, j;
1557 unsigned nr_candidates, nr_candidates2;
1558 sbitmap_iterator sbi0;
1559 vec<operand_entry *> *subops;
1560 bool changed = false;
1561 unsigned int next_oecount_id = 0;
1563 if (length <= 1
1564 || opcode != PLUS_EXPR)
1565 return false;
1567 /* Build a list of candidates to process. */
1568 auto_sbitmap candidates (length);
1569 bitmap_clear (candidates);
1570 nr_candidates = 0;
1571 FOR_EACH_VEC_ELT (*ops, i, oe1)
1573 enum tree_code dcode;
1574 gimple *oe1def;
1576 if (TREE_CODE (oe1->op) != SSA_NAME)
1577 continue;
1578 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1579 if (!is_gimple_assign (oe1def))
1580 continue;
1581 dcode = gimple_assign_rhs_code (oe1def);
1582 if ((dcode != MULT_EXPR
1583 && dcode != RDIV_EXPR)
1584 || !is_reassociable_op (oe1def, dcode, loop))
1585 continue;
1587 bitmap_set_bit (candidates, i);
1588 nr_candidates++;
1591 if (nr_candidates < 2)
1592 return false;
1594 if (dump_file && (dump_flags & TDF_DETAILS))
1596 fprintf (dump_file, "searching for un-distribute opportunities ");
1597 print_generic_expr (dump_file,
1598 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1599 fprintf (dump_file, " %d\n", nr_candidates);
1602 /* Build linearized sub-operand lists and the counting table. */
1603 cvec.create (0);
1605 hash_table<oecount_hasher> ctable (15);
1607 /* ??? Macro arguments cannot have multi-argument template types in
1608 them. This typedef is needed to workaround that limitation. */
1609 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1610 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1611 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1613 gimple *oedef;
1614 enum tree_code oecode;
1615 unsigned j;
1617 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1618 oecode = gimple_assign_rhs_code (oedef);
1619 linearize_expr_tree (&subops[i], oedef,
1620 associative_tree_code (oecode), false);
1622 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1624 oecount c;
1625 int *slot;
1626 int idx;
1627 c.oecode = oecode;
1628 c.cnt = 1;
1629 c.id = next_oecount_id++;
1630 c.op = oe1->op;
1631 cvec.safe_push (c);
1632 idx = cvec.length () + 41;
1633 slot = ctable.find_slot (idx, INSERT);
1634 if (!*slot)
1636 *slot = idx;
1638 else
1640 cvec.pop ();
1641 cvec[*slot - 42].cnt++;
1646 /* Sort the counting table. */
1647 cvec.qsort (oecount_cmp);
1649 if (dump_file && (dump_flags & TDF_DETAILS))
1651 oecount *c;
1652 fprintf (dump_file, "Candidates:\n");
1653 FOR_EACH_VEC_ELT (cvec, j, c)
1655 fprintf (dump_file, " %u %s: ", c->cnt,
1656 c->oecode == MULT_EXPR
1657 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1658 print_generic_expr (dump_file, c->op);
1659 fprintf (dump_file, "\n");
1663 /* Process the (operand, code) pairs in order of most occurrence. */
1664 auto_sbitmap candidates2 (length);
1665 while (!cvec.is_empty ())
1667 oecount *c = &cvec.last ();
1668 if (c->cnt < 2)
1669 break;
1671 /* Now collect the operands in the outer chain that contain
1672 the common operand in their inner chain. */
1673 bitmap_clear (candidates2);
1674 nr_candidates2 = 0;
1675 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1677 gimple *oedef;
1678 enum tree_code oecode;
1679 unsigned j;
1680 tree op = (*ops)[i]->op;
1682 /* If we undistributed in this chain already this may be
1683 a constant. */
1684 if (TREE_CODE (op) != SSA_NAME)
1685 continue;
1687 oedef = SSA_NAME_DEF_STMT (op);
1688 oecode = gimple_assign_rhs_code (oedef);
1689 if (oecode != c->oecode)
1690 continue;
1692 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1694 if (oe1->op == c->op)
1696 bitmap_set_bit (candidates2, i);
1697 ++nr_candidates2;
1698 break;
1703 if (nr_candidates2 >= 2)
1705 operand_entry *oe1, *oe2;
1706 gimple *prod;
1707 int first = bitmap_first_set_bit (candidates2);
1709 /* Build the new addition chain. */
1710 oe1 = (*ops)[first];
1711 if (dump_file && (dump_flags & TDF_DETAILS))
1713 fprintf (dump_file, "Building (");
1714 print_generic_expr (dump_file, oe1->op);
1716 zero_one_operation (&oe1->op, c->oecode, c->op);
1717 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1719 gimple *sum;
1720 oe2 = (*ops)[i];
1721 if (dump_file && (dump_flags & TDF_DETAILS))
1723 fprintf (dump_file, " + ");
1724 print_generic_expr (dump_file, oe2->op);
1726 zero_one_operation (&oe2->op, c->oecode, c->op);
1727 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1728 oe1->op, oe2->op, opcode);
1729 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1730 oe2->rank = 0;
1731 oe1->op = gimple_get_lhs (sum);
1734 /* Apply the multiplication/division. */
1735 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1736 oe1->op, c->op, c->oecode);
1737 if (dump_file && (dump_flags & TDF_DETAILS))
1739 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1740 print_generic_expr (dump_file, c->op);
1741 fprintf (dump_file, "\n");
1744 /* Record it in the addition chain and disable further
1745 undistribution with this op. */
1746 oe1->op = gimple_assign_lhs (prod);
1747 oe1->rank = get_rank (oe1->op);
1748 subops[first].release ();
1750 changed = true;
1753 cvec.pop ();
1756 for (i = 0; i < ops->length (); ++i)
1757 subops[i].release ();
1758 free (subops);
1759 cvec.release ();
1761 return changed;
1764 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1765 expression, examine the other OPS to see if any of them are comparisons
1766 of the same values, which we may be able to combine or eliminate.
1767 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1769 static bool
1770 eliminate_redundant_comparison (enum tree_code opcode,
1771 vec<operand_entry *> *ops,
1772 unsigned int currindex,
1773 operand_entry *curr)
1775 tree op1, op2;
1776 enum tree_code lcode, rcode;
1777 gimple *def1, *def2;
1778 int i;
1779 operand_entry *oe;
1781 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1782 return false;
1784 /* Check that CURR is a comparison. */
1785 if (TREE_CODE (curr->op) != SSA_NAME)
1786 return false;
1787 def1 = SSA_NAME_DEF_STMT (curr->op);
1788 if (!is_gimple_assign (def1))
1789 return false;
1790 lcode = gimple_assign_rhs_code (def1);
1791 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1792 return false;
1793 op1 = gimple_assign_rhs1 (def1);
1794 op2 = gimple_assign_rhs2 (def1);
1796 /* Now look for a similar comparison in the remaining OPS. */
1797 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1799 tree t;
1801 if (TREE_CODE (oe->op) != SSA_NAME)
1802 continue;
1803 def2 = SSA_NAME_DEF_STMT (oe->op);
1804 if (!is_gimple_assign (def2))
1805 continue;
1806 rcode = gimple_assign_rhs_code (def2);
1807 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1808 continue;
1810 /* If we got here, we have a match. See if we can combine the
1811 two comparisons. */
1812 if (opcode == BIT_IOR_EXPR)
1813 t = maybe_fold_or_comparisons (lcode, op1, op2,
1814 rcode, gimple_assign_rhs1 (def2),
1815 gimple_assign_rhs2 (def2));
1816 else
1817 t = maybe_fold_and_comparisons (lcode, op1, op2,
1818 rcode, gimple_assign_rhs1 (def2),
1819 gimple_assign_rhs2 (def2));
1820 if (!t)
1821 continue;
1823 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1824 always give us a boolean_type_node value back. If the original
1825 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1826 we need to convert. */
1827 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1828 t = fold_convert (TREE_TYPE (curr->op), t);
1830 if (TREE_CODE (t) != INTEGER_CST
1831 && !operand_equal_p (t, curr->op, 0))
1833 enum tree_code subcode;
1834 tree newop1, newop2;
1835 if (!COMPARISON_CLASS_P (t))
1836 continue;
1837 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1838 STRIP_USELESS_TYPE_CONVERSION (newop1);
1839 STRIP_USELESS_TYPE_CONVERSION (newop2);
1840 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1841 continue;
1844 if (dump_file && (dump_flags & TDF_DETAILS))
1846 fprintf (dump_file, "Equivalence: ");
1847 print_generic_expr (dump_file, curr->op);
1848 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1849 print_generic_expr (dump_file, oe->op);
1850 fprintf (dump_file, " -> ");
1851 print_generic_expr (dump_file, t);
1852 fprintf (dump_file, "\n");
1855 /* Now we can delete oe, as it has been subsumed by the new combined
1856 expression t. */
1857 ops->ordered_remove (i);
1858 reassociate_stats.ops_eliminated ++;
1860 /* If t is the same as curr->op, we're done. Otherwise we must
1861 replace curr->op with t. Special case is if we got a constant
1862 back, in which case we add it to the end instead of in place of
1863 the current entry. */
1864 if (TREE_CODE (t) == INTEGER_CST)
1866 ops->ordered_remove (currindex);
1867 add_to_ops_vec (ops, t);
1869 else if (!operand_equal_p (t, curr->op, 0))
1871 gimple *sum;
1872 enum tree_code subcode;
1873 tree newop1;
1874 tree newop2;
1875 gcc_assert (COMPARISON_CLASS_P (t));
1876 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1877 STRIP_USELESS_TYPE_CONVERSION (newop1);
1878 STRIP_USELESS_TYPE_CONVERSION (newop2);
1879 gcc_checking_assert (is_gimple_val (newop1)
1880 && is_gimple_val (newop2));
1881 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1882 curr->op = gimple_get_lhs (sum);
1884 return true;
1887 return false;
1891 /* Transform repeated addition of same values into multiply with
1892 constant. */
1893 static bool
1894 transform_add_to_multiply (vec<operand_entry *> *ops)
1896 operand_entry *oe;
1897 tree op = NULL_TREE;
1898 int j;
1899 int i, start = -1, end = 0, count = 0;
1900 auto_vec<std::pair <int, int> > indxs;
1901 bool changed = false;
1903 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1904 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1905 || !flag_unsafe_math_optimizations))
1906 return false;
1908 /* Look for repeated operands. */
1909 FOR_EACH_VEC_ELT (*ops, i, oe)
1911 if (start == -1)
1913 count = 1;
1914 op = oe->op;
1915 start = i;
1917 else if (operand_equal_p (oe->op, op, 0))
1919 count++;
1920 end = i;
1922 else
1924 if (count > 1)
1925 indxs.safe_push (std::make_pair (start, end));
1926 count = 1;
1927 op = oe->op;
1928 start = i;
1932 if (count > 1)
1933 indxs.safe_push (std::make_pair (start, end));
1935 for (j = indxs.length () - 1; j >= 0; --j)
1937 /* Convert repeated operand addition to multiplication. */
1938 start = indxs[j].first;
1939 end = indxs[j].second;
1940 op = (*ops)[start]->op;
1941 count = end - start + 1;
1942 for (i = end; i >= start; --i)
1943 ops->unordered_remove (i);
1944 tree tmp = make_ssa_name (TREE_TYPE (op));
1945 tree cst = build_int_cst (integer_type_node, count);
1946 gassign *mul_stmt
1947 = gimple_build_assign (tmp, MULT_EXPR,
1948 op, fold_convert (TREE_TYPE (op), cst));
1949 gimple_set_visited (mul_stmt, true);
1950 add_to_ops_vec (ops, tmp, mul_stmt);
1951 changed = true;
1954 return changed;
1958 /* Perform various identities and other optimizations on the list of
1959 operand entries, stored in OPS. The tree code for the binary
1960 operation between all the operands is OPCODE. */
1962 static void
1963 optimize_ops_list (enum tree_code opcode,
1964 vec<operand_entry *> *ops)
1966 unsigned int length = ops->length ();
1967 unsigned int i;
1968 operand_entry *oe;
1969 operand_entry *oelast = NULL;
1970 bool iterate = false;
1972 if (length == 1)
1973 return;
1975 oelast = ops->last ();
1977 /* If the last two are constants, pop the constants off, merge them
1978 and try the next two. */
1979 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1981 operand_entry *oelm1 = (*ops)[length - 2];
1983 if (oelm1->rank == 0
1984 && is_gimple_min_invariant (oelm1->op)
1985 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1986 TREE_TYPE (oelast->op)))
1988 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1989 oelm1->op, oelast->op);
1991 if (folded && is_gimple_min_invariant (folded))
1993 if (dump_file && (dump_flags & TDF_DETAILS))
1994 fprintf (dump_file, "Merging constants\n");
1996 ops->pop ();
1997 ops->pop ();
1999 add_to_ops_vec (ops, folded);
2000 reassociate_stats.constants_eliminated++;
2002 optimize_ops_list (opcode, ops);
2003 return;
2008 eliminate_using_constants (opcode, ops);
2009 oelast = NULL;
2011 for (i = 0; ops->iterate (i, &oe);)
2013 bool done = false;
2015 if (eliminate_not_pairs (opcode, ops, i, oe))
2016 return;
2017 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2018 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2019 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2021 if (done)
2022 return;
2023 iterate = true;
2024 oelast = NULL;
2025 continue;
2027 oelast = oe;
2028 i++;
2031 length = ops->length ();
2032 oelast = ops->last ();
2034 if (iterate)
2035 optimize_ops_list (opcode, ops);
2038 /* The following functions are subroutines to optimize_range_tests and allow
2039 it to try to change a logical combination of comparisons into a range
2040 test.
2042 For example, both
2043 X == 2 || X == 5 || X == 3 || X == 4
2045 X >= 2 && X <= 5
2046 are converted to
2047 (unsigned) (X - 2) <= 3
2049 For more information see comments above fold_test_range in fold-const.c,
2050 this implementation is for GIMPLE. */
2052 struct range_entry
2054 tree exp;
2055 tree low;
2056 tree high;
2057 bool in_p;
2058 bool strict_overflow_p;
2059 unsigned int idx, next;
2062 /* This is similar to make_range in fold-const.c, but on top of
2063 GIMPLE instead of trees. If EXP is non-NULL, it should be
2064 an SSA_NAME and STMT argument is ignored, otherwise STMT
2065 argument should be a GIMPLE_COND. */
2067 static void
2068 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2070 int in_p;
2071 tree low, high;
2072 bool is_bool, strict_overflow_p;
2074 r->exp = NULL_TREE;
2075 r->in_p = false;
2076 r->strict_overflow_p = false;
2077 r->low = NULL_TREE;
2078 r->high = NULL_TREE;
2079 if (exp != NULL_TREE
2080 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2081 return;
2083 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2084 and see if we can refine the range. Some of the cases below may not
2085 happen, but it doesn't seem worth worrying about this. We "continue"
2086 the outer loop when we've changed something; otherwise we "break"
2087 the switch, which will "break" the while. */
2088 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2089 high = low;
2090 in_p = 0;
2091 strict_overflow_p = false;
2092 is_bool = false;
2093 if (exp == NULL_TREE)
2094 is_bool = true;
2095 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2097 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2098 is_bool = true;
2099 else
2100 return;
2102 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2103 is_bool = true;
2105 while (1)
2107 enum tree_code code;
2108 tree arg0, arg1, exp_type;
2109 tree nexp;
2110 location_t loc;
2112 if (exp != NULL_TREE)
2114 if (TREE_CODE (exp) != SSA_NAME
2115 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2116 break;
2118 stmt = SSA_NAME_DEF_STMT (exp);
2119 if (!is_gimple_assign (stmt))
2120 break;
2122 code = gimple_assign_rhs_code (stmt);
2123 arg0 = gimple_assign_rhs1 (stmt);
2124 arg1 = gimple_assign_rhs2 (stmt);
2125 exp_type = TREE_TYPE (exp);
2127 else
2129 code = gimple_cond_code (stmt);
2130 arg0 = gimple_cond_lhs (stmt);
2131 arg1 = gimple_cond_rhs (stmt);
2132 exp_type = boolean_type_node;
2135 if (TREE_CODE (arg0) != SSA_NAME)
2136 break;
2137 loc = gimple_location (stmt);
2138 switch (code)
2140 case BIT_NOT_EXPR:
2141 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2142 /* Ensure the range is either +[-,0], +[0,0],
2143 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2144 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2145 or similar expression of unconditional true or
2146 false, it should not be negated. */
2147 && ((high && integer_zerop (high))
2148 || (low && integer_onep (low))))
2150 in_p = !in_p;
2151 exp = arg0;
2152 continue;
2154 break;
2155 case SSA_NAME:
2156 exp = arg0;
2157 continue;
2158 CASE_CONVERT:
2159 if (is_bool)
2160 goto do_default;
2161 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2163 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2164 is_bool = true;
2165 else
2166 return;
2168 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2169 is_bool = true;
2170 goto do_default;
2171 case EQ_EXPR:
2172 case NE_EXPR:
2173 case LT_EXPR:
2174 case LE_EXPR:
2175 case GE_EXPR:
2176 case GT_EXPR:
2177 is_bool = true;
2178 /* FALLTHRU */
2179 default:
2180 if (!is_bool)
2181 return;
2182 do_default:
2183 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2184 &low, &high, &in_p,
2185 &strict_overflow_p);
2186 if (nexp != NULL_TREE)
2188 exp = nexp;
2189 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2190 continue;
2192 break;
2194 break;
2196 if (is_bool)
2198 r->exp = exp;
2199 r->in_p = in_p;
2200 r->low = low;
2201 r->high = high;
2202 r->strict_overflow_p = strict_overflow_p;
2206 /* Comparison function for qsort. Sort entries
2207 without SSA_NAME exp first, then with SSA_NAMEs sorted
2208 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2209 by increasing ->low and if ->low is the same, by increasing
2210 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2211 maximum. */
2213 static int
2214 range_entry_cmp (const void *a, const void *b)
2216 const struct range_entry *p = (const struct range_entry *) a;
2217 const struct range_entry *q = (const struct range_entry *) b;
2219 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2221 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2223 /* Group range_entries for the same SSA_NAME together. */
2224 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2225 return -1;
2226 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2227 return 1;
2228 /* If ->low is different, NULL low goes first, then by
2229 ascending low. */
2230 if (p->low != NULL_TREE)
2232 if (q->low != NULL_TREE)
2234 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2235 p->low, q->low);
2236 if (tem && integer_onep (tem))
2237 return -1;
2238 tem = fold_binary (GT_EXPR, boolean_type_node,
2239 p->low, q->low);
2240 if (tem && integer_onep (tem))
2241 return 1;
2243 else
2244 return 1;
2246 else if (q->low != NULL_TREE)
2247 return -1;
2248 /* If ->high is different, NULL high goes last, before that by
2249 ascending high. */
2250 if (p->high != NULL_TREE)
2252 if (q->high != NULL_TREE)
2254 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2255 p->high, q->high);
2256 if (tem && integer_onep (tem))
2257 return -1;
2258 tem = fold_binary (GT_EXPR, boolean_type_node,
2259 p->high, q->high);
2260 if (tem && integer_onep (tem))
2261 return 1;
2263 else
2264 return -1;
2266 else if (q->high != NULL_TREE)
2267 return 1;
2268 /* If both ranges are the same, sort below by ascending idx. */
2270 else
2271 return 1;
2273 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2274 return -1;
2276 if (p->idx < q->idx)
2277 return -1;
2278 else
2280 gcc_checking_assert (p->idx > q->idx);
2281 return 1;
2285 /* Helper routine of optimize_range_test.
2286 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2287 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2288 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2289 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2290 an array of COUNT pointers to other ranges. Return
2291 true if the range merge has been successful.
2292 If OPCODE is ERROR_MARK, this is called from within
2293 maybe_optimize_range_tests and is performing inter-bb range optimization.
2294 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2295 oe->rank. */
2297 static bool
2298 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2299 struct range_entry **otherrangep,
2300 unsigned int count, enum tree_code opcode,
2301 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2302 bool in_p, tree low, tree high, bool strict_overflow_p)
2304 operand_entry *oe = (*ops)[range->idx];
2305 tree op = oe->op;
2306 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2307 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2308 location_t loc = gimple_location (stmt);
2309 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2310 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2311 in_p, low, high);
2312 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2313 gimple_stmt_iterator gsi;
2314 unsigned int i, uid;
2316 if (tem == NULL_TREE)
2317 return false;
2319 /* If op is default def SSA_NAME, there is no place to insert the
2320 new comparison. Give up, unless we can use OP itself as the
2321 range test. */
2322 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2324 if (op == range->exp
2325 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2326 || TREE_CODE (optype) == BOOLEAN_TYPE)
2327 && (op == tem
2328 || (TREE_CODE (tem) == EQ_EXPR
2329 && TREE_OPERAND (tem, 0) == op
2330 && integer_onep (TREE_OPERAND (tem, 1))))
2331 && opcode != BIT_IOR_EXPR
2332 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2334 stmt = NULL;
2335 tem = op;
2337 else
2338 return false;
2341 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2342 warning_at (loc, OPT_Wstrict_overflow,
2343 "assuming signed overflow does not occur "
2344 "when simplifying range test");
2346 if (dump_file && (dump_flags & TDF_DETAILS))
2348 struct range_entry *r;
2349 fprintf (dump_file, "Optimizing range tests ");
2350 print_generic_expr (dump_file, range->exp);
2351 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2352 print_generic_expr (dump_file, range->low);
2353 fprintf (dump_file, ", ");
2354 print_generic_expr (dump_file, range->high);
2355 fprintf (dump_file, "]");
2356 for (i = 0; i < count; i++)
2358 if (otherrange)
2359 r = otherrange + i;
2360 else
2361 r = otherrangep[i];
2362 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2363 print_generic_expr (dump_file, r->low);
2364 fprintf (dump_file, ", ");
2365 print_generic_expr (dump_file, r->high);
2366 fprintf (dump_file, "]");
2368 fprintf (dump_file, "\n into ");
2369 print_generic_expr (dump_file, tem);
2370 fprintf (dump_file, "\n");
2373 if (opcode == BIT_IOR_EXPR
2374 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2375 tem = invert_truthvalue_loc (loc, tem);
2377 tem = fold_convert_loc (loc, optype, tem);
2378 if (stmt)
2380 gsi = gsi_for_stmt (stmt);
2381 uid = gimple_uid (stmt);
2383 else
2385 gsi = gsi_none ();
2386 uid = 0;
2388 if (stmt == NULL)
2389 gcc_checking_assert (tem == op);
2390 /* In rare cases range->exp can be equal to lhs of stmt.
2391 In that case we have to insert after the stmt rather then before
2392 it. If stmt is a PHI, insert it at the start of the basic block. */
2393 else if (op != range->exp)
2395 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2396 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2397 GSI_SAME_STMT);
2398 gsi_prev (&gsi);
2400 else if (gimple_code (stmt) != GIMPLE_PHI)
2402 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2403 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2404 GSI_CONTINUE_LINKING);
2406 else
2408 gsi = gsi_after_labels (gimple_bb (stmt));
2409 if (!gsi_end_p (gsi))
2410 uid = gimple_uid (gsi_stmt (gsi));
2411 else
2413 gsi = gsi_start_bb (gimple_bb (stmt));
2414 uid = 1;
2415 while (!gsi_end_p (gsi))
2417 uid = gimple_uid (gsi_stmt (gsi));
2418 gsi_next (&gsi);
2421 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2422 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2423 GSI_SAME_STMT);
2424 if (gsi_end_p (gsi))
2425 gsi = gsi_last_bb (gimple_bb (stmt));
2426 else
2427 gsi_prev (&gsi);
2429 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2430 if (gimple_uid (gsi_stmt (gsi)))
2431 break;
2432 else
2433 gimple_set_uid (gsi_stmt (gsi), uid);
2435 oe->op = tem;
2436 range->exp = exp;
2437 range->low = low;
2438 range->high = high;
2439 range->in_p = in_p;
2440 range->strict_overflow_p = false;
2442 for (i = 0; i < count; i++)
2444 if (otherrange)
2445 range = otherrange + i;
2446 else
2447 range = otherrangep[i];
2448 oe = (*ops)[range->idx];
2449 /* Now change all the other range test immediate uses, so that
2450 those tests will be optimized away. */
2451 if (opcode == ERROR_MARK)
2453 if (oe->op)
2454 oe->op = build_int_cst (TREE_TYPE (oe->op),
2455 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2456 else
2457 oe->op = (oe->rank == BIT_IOR_EXPR
2458 ? boolean_false_node : boolean_true_node);
2460 else
2461 oe->op = error_mark_node;
2462 range->exp = NULL_TREE;
2463 range->low = NULL_TREE;
2464 range->high = NULL_TREE;
2466 return true;
2469 /* Optimize X == CST1 || X == CST2
2470 if popcount (CST1 ^ CST2) == 1 into
2471 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2472 Similarly for ranges. E.g.
2473 X != 2 && X != 3 && X != 10 && X != 11
2474 will be transformed by the previous optimization into
2475 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2476 and this loop can transform that into
2477 !(((X & ~8) - 2U) <= 1U). */
2479 static bool
2480 optimize_range_tests_xor (enum tree_code opcode, tree type,
2481 tree lowi, tree lowj, tree highi, tree highj,
2482 vec<operand_entry *> *ops,
2483 struct range_entry *rangei,
2484 struct range_entry *rangej)
2486 tree lowxor, highxor, tem, exp;
2487 /* Check lowi ^ lowj == highi ^ highj and
2488 popcount (lowi ^ lowj) == 1. */
2489 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2490 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2491 return false;
2492 if (!integer_pow2p (lowxor))
2493 return false;
2494 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2495 if (!tree_int_cst_equal (lowxor, highxor))
2496 return false;
2498 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2499 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2500 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2501 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2502 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2503 NULL, rangei->in_p, lowj, highj,
2504 rangei->strict_overflow_p
2505 || rangej->strict_overflow_p))
2506 return true;
2507 return false;
2510 /* Optimize X == CST1 || X == CST2
2511 if popcount (CST2 - CST1) == 1 into
2512 ((X - CST1) & ~(CST2 - CST1)) == 0.
2513 Similarly for ranges. E.g.
2514 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2515 || X == 75 || X == 45
2516 will be transformed by the previous optimization into
2517 (X - 43U) <= 3U || (X - 75U) <= 3U
2518 and this loop can transform that into
2519 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2520 static bool
2521 optimize_range_tests_diff (enum tree_code opcode, tree type,
2522 tree lowi, tree lowj, tree highi, tree highj,
2523 vec<operand_entry *> *ops,
2524 struct range_entry *rangei,
2525 struct range_entry *rangej)
2527 tree tem1, tem2, mask;
2528 /* Check highi - lowi == highj - lowj. */
2529 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2530 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2531 return false;
2532 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2533 if (!tree_int_cst_equal (tem1, tem2))
2534 return false;
2535 /* Check popcount (lowj - lowi) == 1. */
2536 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2537 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2538 return false;
2539 if (!integer_pow2p (tem1))
2540 return false;
2542 type = unsigned_type_for (type);
2543 tem1 = fold_convert (type, tem1);
2544 tem2 = fold_convert (type, tem2);
2545 lowi = fold_convert (type, lowi);
2546 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2547 tem1 = fold_binary (MINUS_EXPR, type,
2548 fold_convert (type, rangei->exp), lowi);
2549 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2550 lowj = build_int_cst (type, 0);
2551 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2552 NULL, rangei->in_p, lowj, tem2,
2553 rangei->strict_overflow_p
2554 || rangej->strict_overflow_p))
2555 return true;
2556 return false;
2559 /* It does some common checks for function optimize_range_tests_xor and
2560 optimize_range_tests_diff.
2561 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2562 Else it calls optimize_range_tests_diff. */
2564 static bool
2565 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2566 bool optimize_xor, vec<operand_entry *> *ops,
2567 struct range_entry *ranges)
2569 int i, j;
2570 bool any_changes = false;
2571 for (i = first; i < length; i++)
2573 tree lowi, highi, lowj, highj, type, tem;
2575 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2576 continue;
2577 type = TREE_TYPE (ranges[i].exp);
2578 if (!INTEGRAL_TYPE_P (type))
2579 continue;
2580 lowi = ranges[i].low;
2581 if (lowi == NULL_TREE)
2582 lowi = TYPE_MIN_VALUE (type);
2583 highi = ranges[i].high;
2584 if (highi == NULL_TREE)
2585 continue;
2586 for (j = i + 1; j < length && j < i + 64; j++)
2588 bool changes;
2589 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2590 continue;
2591 lowj = ranges[j].low;
2592 if (lowj == NULL_TREE)
2593 continue;
2594 highj = ranges[j].high;
2595 if (highj == NULL_TREE)
2596 highj = TYPE_MAX_VALUE (type);
2597 /* Check lowj > highi. */
2598 tem = fold_binary (GT_EXPR, boolean_type_node,
2599 lowj, highi);
2600 if (tem == NULL_TREE || !integer_onep (tem))
2601 continue;
2602 if (optimize_xor)
2603 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2604 highi, highj, ops,
2605 ranges + i, ranges + j);
2606 else
2607 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2608 highi, highj, ops,
2609 ranges + i, ranges + j);
2610 if (changes)
2612 any_changes = true;
2613 break;
2617 return any_changes;
2620 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2621 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2622 EXP on success, NULL otherwise. */
2624 static tree
2625 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2626 wide_int *mask, tree *totallowp)
2628 tree tem = int_const_binop (MINUS_EXPR, high, low);
2629 if (tem == NULL_TREE
2630 || TREE_CODE (tem) != INTEGER_CST
2631 || TREE_OVERFLOW (tem)
2632 || tree_int_cst_sgn (tem) == -1
2633 || compare_tree_int (tem, prec) != -1)
2634 return NULL_TREE;
2636 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2637 *mask = wi::shifted_mask (0, max, false, prec);
2638 if (TREE_CODE (exp) == BIT_AND_EXPR
2639 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2641 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2642 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2643 if (wi::popcount (msk) == 1
2644 && wi::ltu_p (msk, prec - max))
2646 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2647 max += msk.to_uhwi ();
2648 exp = TREE_OPERAND (exp, 0);
2649 if (integer_zerop (low)
2650 && TREE_CODE (exp) == PLUS_EXPR
2651 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2653 tree ret = TREE_OPERAND (exp, 0);
2654 STRIP_NOPS (ret);
2655 widest_int bias
2656 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2657 TYPE_PRECISION (TREE_TYPE (low))));
2658 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2659 if (totallowp)
2661 *totallowp = tbias;
2662 return ret;
2664 else if (!tree_int_cst_lt (totallow, tbias))
2665 return NULL_TREE;
2666 bias = wi::to_widest (tbias);
2667 bias -= wi::to_widest (totallow);
2668 if (bias >= 0 && bias < prec - max)
2670 *mask = wi::lshift (*mask, bias);
2671 return ret;
2676 if (totallowp)
2677 return exp;
2678 if (!tree_int_cst_lt (totallow, low))
2679 return exp;
2680 tem = int_const_binop (MINUS_EXPR, low, totallow);
2681 if (tem == NULL_TREE
2682 || TREE_CODE (tem) != INTEGER_CST
2683 || TREE_OVERFLOW (tem)
2684 || compare_tree_int (tem, prec - max) == 1)
2685 return NULL_TREE;
2687 *mask = wi::lshift (*mask, wi::to_widest (tem));
2688 return exp;
2691 /* Attempt to optimize small range tests using bit test.
2692 E.g.
2693 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2694 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2695 has been by earlier optimizations optimized into:
2696 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2697 As all the 43 through 82 range is less than 64 numbers,
2698 for 64-bit word targets optimize that into:
2699 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2701 static bool
2702 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2703 vec<operand_entry *> *ops,
2704 struct range_entry *ranges)
2706 int i, j;
2707 bool any_changes = false;
2708 int prec = GET_MODE_BITSIZE (word_mode);
2709 auto_vec<struct range_entry *, 64> candidates;
2711 for (i = first; i < length - 2; i++)
2713 tree lowi, highi, lowj, highj, type;
2715 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2716 continue;
2717 type = TREE_TYPE (ranges[i].exp);
2718 if (!INTEGRAL_TYPE_P (type))
2719 continue;
2720 lowi = ranges[i].low;
2721 if (lowi == NULL_TREE)
2722 lowi = TYPE_MIN_VALUE (type);
2723 highi = ranges[i].high;
2724 if (highi == NULL_TREE)
2725 continue;
2726 wide_int mask;
2727 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2728 highi, &mask, &lowi);
2729 if (exp == NULL_TREE)
2730 continue;
2731 bool strict_overflow_p = ranges[i].strict_overflow_p;
2732 candidates.truncate (0);
2733 int end = MIN (i + 64, length);
2734 for (j = i + 1; j < end; j++)
2736 tree exp2;
2737 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2738 continue;
2739 if (ranges[j].exp == exp)
2741 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2743 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2744 if (exp2 == exp)
2746 else if (TREE_CODE (exp2) == PLUS_EXPR)
2748 exp2 = TREE_OPERAND (exp2, 0);
2749 STRIP_NOPS (exp2);
2750 if (exp2 != exp)
2751 continue;
2753 else
2754 continue;
2756 else
2757 continue;
2758 lowj = ranges[j].low;
2759 if (lowj == NULL_TREE)
2760 continue;
2761 highj = ranges[j].high;
2762 if (highj == NULL_TREE)
2763 highj = TYPE_MAX_VALUE (type);
2764 wide_int mask2;
2765 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2766 highj, &mask2, NULL);
2767 if (exp2 != exp)
2768 continue;
2769 mask |= mask2;
2770 strict_overflow_p |= ranges[j].strict_overflow_p;
2771 candidates.safe_push (&ranges[j]);
2774 /* If we need otherwise 3 or more comparisons, use a bit test. */
2775 if (candidates.length () >= 2)
2777 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2778 wi::to_widest (lowi)
2779 + prec - 1 - wi::clz (mask));
2780 operand_entry *oe = (*ops)[ranges[i].idx];
2781 tree op = oe->op;
2782 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2783 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2784 location_t loc = gimple_location (stmt);
2785 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2787 /* See if it isn't cheaper to pretend the minimum value of the
2788 range is 0, if maximum value is small enough.
2789 We can avoid then subtraction of the minimum value, but the
2790 mask constant could be perhaps more expensive. */
2791 if (compare_tree_int (lowi, 0) > 0
2792 && compare_tree_int (high, prec) < 0)
2794 int cost_diff;
2795 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2796 rtx reg = gen_raw_REG (word_mode, 10000);
2797 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2798 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2799 GEN_INT (-m)), speed_p);
2800 rtx r = immed_wide_int_const (mask, word_mode);
2801 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2802 word_mode, speed_p);
2803 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2804 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2805 word_mode, speed_p);
2806 if (cost_diff > 0)
2808 mask = wi::lshift (mask, m);
2809 lowi = build_zero_cst (TREE_TYPE (lowi));
2813 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2814 false, lowi, high);
2815 if (tem == NULL_TREE || is_gimple_val (tem))
2816 continue;
2817 tree etype = unsigned_type_for (TREE_TYPE (exp));
2818 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2819 fold_convert_loc (loc, etype, exp),
2820 fold_convert_loc (loc, etype, lowi));
2821 exp = fold_convert_loc (loc, integer_type_node, exp);
2822 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2823 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2824 build_int_cst (word_type, 1), exp);
2825 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2826 wide_int_to_tree (word_type, mask));
2827 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2828 build_zero_cst (word_type));
2829 if (is_gimple_val (exp))
2830 continue;
2832 /* The shift might have undefined behavior if TEM is true,
2833 but reassociate_bb isn't prepared to have basic blocks
2834 split when it is running. So, temporarily emit a code
2835 with BIT_IOR_EXPR instead of &&, and fix it up in
2836 branch_fixup. */
2837 gimple_seq seq;
2838 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2839 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2840 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2841 gimple_seq seq2;
2842 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2843 gimple_seq_add_seq_without_update (&seq, seq2);
2844 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2845 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2846 gimple *g = gimple_build_assign (make_ssa_name (optype),
2847 BIT_IOR_EXPR, tem, exp);
2848 gimple_set_location (g, loc);
2849 gimple_seq_add_stmt_without_update (&seq, g);
2850 exp = gimple_assign_lhs (g);
2851 tree val = build_zero_cst (optype);
2852 if (update_range_test (&ranges[i], NULL, candidates.address (),
2853 candidates.length (), opcode, ops, exp,
2854 seq, false, val, val, strict_overflow_p))
2856 any_changes = true;
2857 reassoc_branch_fixups.safe_push (tem);
2859 else
2860 gimple_seq_discard (seq);
2863 return any_changes;
2866 /* Attempt to optimize for signed a and b where b is known to be >= 0:
2867 a >= 0 && a < b into (unsigned) a < (unsigned) b
2868 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
2870 static bool
2871 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
2872 vec<operand_entry *> *ops,
2873 struct range_entry *ranges)
2875 int i;
2876 bool any_changes = false;
2877 hash_map<tree, int> *map = NULL;
2879 for (i = first; i < length; i++)
2881 if (ranges[i].exp == NULL_TREE
2882 || TREE_CODE (ranges[i].exp) != SSA_NAME
2883 || !ranges[i].in_p)
2884 continue;
2886 tree type = TREE_TYPE (ranges[i].exp);
2887 if (!INTEGRAL_TYPE_P (type)
2888 || TYPE_UNSIGNED (type)
2889 || ranges[i].low == NULL_TREE
2890 || !integer_zerop (ranges[i].low)
2891 || ranges[i].high != NULL_TREE)
2892 continue;
2893 /* EXP >= 0 here. */
2894 if (map == NULL)
2895 map = new hash_map <tree, int>;
2896 map->put (ranges[i].exp, i);
2899 if (map == NULL)
2900 return false;
2902 for (i = 0; i < length; i++)
2904 if (ranges[i].low == NULL_TREE
2905 || ranges[i].high == NULL_TREE
2906 || !integer_zerop (ranges[i].low)
2907 || !integer_zerop (ranges[i].high))
2908 continue;
2910 gimple *stmt;
2911 tree_code ccode;
2912 tree rhs1, rhs2;
2913 if (ranges[i].exp)
2915 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
2916 continue;
2917 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
2918 if (!is_gimple_assign (stmt))
2919 continue;
2920 ccode = gimple_assign_rhs_code (stmt);
2921 rhs1 = gimple_assign_rhs1 (stmt);
2922 rhs2 = gimple_assign_rhs2 (stmt);
2924 else
2926 operand_entry *oe = (*ops)[ranges[i].idx];
2927 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2928 if (gimple_code (stmt) != GIMPLE_COND)
2929 continue;
2930 ccode = gimple_cond_code (stmt);
2931 rhs1 = gimple_cond_lhs (stmt);
2932 rhs2 = gimple_cond_rhs (stmt);
2935 if (TREE_CODE (rhs1) != SSA_NAME
2936 || rhs2 == NULL_TREE
2937 || TREE_CODE (rhs2) != SSA_NAME)
2938 continue;
2940 switch (ccode)
2942 case GT_EXPR:
2943 case GE_EXPR:
2944 if (!ranges[i].in_p)
2945 std::swap (rhs1, rhs2);
2946 ccode = swap_tree_comparison (ccode);
2947 break;
2948 case LT_EXPR:
2949 case LE_EXPR:
2950 if (ranges[i].in_p)
2951 std::swap (rhs1, rhs2);
2952 break;
2953 default:
2954 continue;
2957 int *idx = map->get (rhs1);
2958 if (idx == NULL)
2959 continue;
2961 wide_int nz = get_nonzero_bits (rhs2);
2962 if (wi::neg_p (nz))
2963 continue;
2965 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
2966 and RHS2 is known to be RHS2 >= 0. */
2967 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
2969 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2970 if ((ranges[*idx].strict_overflow_p
2971 || ranges[i].strict_overflow_p)
2972 && issue_strict_overflow_warning (wc))
2973 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
2974 "assuming signed overflow does not occur "
2975 "when simplifying range test");
2977 if (dump_file && (dump_flags & TDF_DETAILS))
2979 struct range_entry *r = &ranges[*idx];
2980 fprintf (dump_file, "Optimizing range test ");
2981 print_generic_expr (dump_file, r->exp);
2982 fprintf (dump_file, " +[");
2983 print_generic_expr (dump_file, r->low);
2984 fprintf (dump_file, ", ");
2985 print_generic_expr (dump_file, r->high);
2986 fprintf (dump_file, "] and comparison ");
2987 print_generic_expr (dump_file, rhs1);
2988 fprintf (dump_file, " %s ", op_symbol_code (ccode));
2989 print_generic_expr (dump_file, rhs2);
2990 fprintf (dump_file, "\n into (");
2991 print_generic_expr (dump_file, utype);
2992 fprintf (dump_file, ") ");
2993 print_generic_expr (dump_file, rhs1);
2994 fprintf (dump_file, " %s (", op_symbol_code (ccode));
2995 print_generic_expr (dump_file, utype);
2996 fprintf (dump_file, ") ");
2997 print_generic_expr (dump_file, rhs2);
2998 fprintf (dump_file, "\n");
3001 if (ranges[i].in_p)
3002 std::swap (rhs1, rhs2);
3004 unsigned int uid = gimple_uid (stmt);
3005 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3006 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3007 gimple_set_uid (g, uid);
3008 rhs1 = gimple_assign_lhs (g);
3009 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3010 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3011 gimple_set_uid (g, uid);
3012 rhs2 = gimple_assign_lhs (g);
3013 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3014 if (tree_swap_operands_p (rhs1, rhs2))
3016 std::swap (rhs1, rhs2);
3017 ccode = swap_tree_comparison (ccode);
3019 if (gimple_code (stmt) == GIMPLE_COND)
3021 gcond *c = as_a <gcond *> (stmt);
3022 gimple_cond_set_code (c, ccode);
3023 gimple_cond_set_lhs (c, rhs1);
3024 gimple_cond_set_rhs (c, rhs2);
3025 update_stmt (stmt);
3027 else
3029 operand_entry *oe = (*ops)[ranges[i].idx];
3030 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3031 if (!INTEGRAL_TYPE_P (ctype)
3032 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3033 && TYPE_PRECISION (ctype) != 1))
3034 ctype = boolean_type_node;
3035 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3036 gimple_set_uid (g, uid);
3037 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3038 if (oe->op && ctype != TREE_TYPE (oe->op))
3040 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3041 NOP_EXPR, gimple_assign_lhs (g));
3042 gimple_set_uid (g, uid);
3043 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3045 ranges[i].exp = gimple_assign_lhs (g);
3046 oe->op = ranges[i].exp;
3047 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3048 ranges[i].high = ranges[i].low;
3050 ranges[i].strict_overflow_p = false;
3051 operand_entry *oe = (*ops)[ranges[*idx].idx];
3052 /* Now change all the other range test immediate uses, so that
3053 those tests will be optimized away. */
3054 if (opcode == ERROR_MARK)
3056 if (oe->op)
3057 oe->op = build_int_cst (TREE_TYPE (oe->op),
3058 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3059 else
3060 oe->op = (oe->rank == BIT_IOR_EXPR
3061 ? boolean_false_node : boolean_true_node);
3063 else
3064 oe->op = error_mark_node;
3065 ranges[*idx].exp = NULL_TREE;
3066 ranges[*idx].low = NULL_TREE;
3067 ranges[*idx].high = NULL_TREE;
3068 any_changes = true;
3071 delete map;
3072 return any_changes;
3075 /* Optimize range tests, similarly how fold_range_test optimizes
3076 it on trees. The tree code for the binary
3077 operation between all the operands is OPCODE.
3078 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3079 maybe_optimize_range_tests for inter-bb range optimization.
3080 In that case if oe->op is NULL, oe->id is bb->index whose
3081 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3082 the actual opcode. */
3084 static bool
3085 optimize_range_tests (enum tree_code opcode,
3086 vec<operand_entry *> *ops)
3088 unsigned int length = ops->length (), i, j, first;
3089 operand_entry *oe;
3090 struct range_entry *ranges;
3091 bool any_changes = false;
3093 if (length == 1)
3094 return false;
3096 ranges = XNEWVEC (struct range_entry, length);
3097 for (i = 0; i < length; i++)
3099 oe = (*ops)[i];
3100 ranges[i].idx = i;
3101 init_range_entry (ranges + i, oe->op,
3102 oe->op
3103 ? NULL
3104 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3105 /* For | invert it now, we will invert it again before emitting
3106 the optimized expression. */
3107 if (opcode == BIT_IOR_EXPR
3108 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3109 ranges[i].in_p = !ranges[i].in_p;
3112 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3113 for (i = 0; i < length; i++)
3114 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3115 break;
3117 /* Try to merge ranges. */
3118 for (first = i; i < length; i++)
3120 tree low = ranges[i].low;
3121 tree high = ranges[i].high;
3122 int in_p = ranges[i].in_p;
3123 bool strict_overflow_p = ranges[i].strict_overflow_p;
3124 int update_fail_count = 0;
3126 for (j = i + 1; j < length; j++)
3128 if (ranges[i].exp != ranges[j].exp)
3129 break;
3130 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3131 ranges[j].in_p, ranges[j].low, ranges[j].high))
3132 break;
3133 strict_overflow_p |= ranges[j].strict_overflow_p;
3136 if (j == i + 1)
3137 continue;
3139 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3140 opcode, ops, ranges[i].exp, NULL, in_p,
3141 low, high, strict_overflow_p))
3143 i = j - 1;
3144 any_changes = true;
3146 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3147 while update_range_test would fail. */
3148 else if (update_fail_count == 64)
3149 i = j - 1;
3150 else
3151 ++update_fail_count;
3154 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3155 ops, ranges);
3157 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3158 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3159 ops, ranges);
3160 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3161 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3162 ops, ranges);
3163 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3164 ranges);
3166 if (any_changes && opcode != ERROR_MARK)
3168 j = 0;
3169 FOR_EACH_VEC_ELT (*ops, i, oe)
3171 if (oe->op == error_mark_node)
3172 continue;
3173 else if (i != j)
3174 (*ops)[j] = oe;
3175 j++;
3177 ops->truncate (j);
3180 XDELETEVEC (ranges);
3181 return any_changes;
3184 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3185 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3186 otherwise the comparison code. */
3188 static tree_code
3189 ovce_extract_ops (tree var, gassign **rets, bool *reti)
3191 if (TREE_CODE (var) != SSA_NAME)
3192 return ERROR_MARK;
3194 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3195 if (stmt == NULL)
3196 return ERROR_MARK;
3198 /* ??? If we start creating more COND_EXPR, we could perform
3199 this same optimization with them. For now, simplify. */
3200 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3201 return ERROR_MARK;
3203 tree cond = gimple_assign_rhs1 (stmt);
3204 tree_code cmp = TREE_CODE (cond);
3205 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3206 return ERROR_MARK;
3208 /* ??? For now, allow only canonical true and false result vectors.
3209 We could expand this to other constants should the need arise,
3210 but at the moment we don't create them. */
3211 tree t = gimple_assign_rhs2 (stmt);
3212 tree f = gimple_assign_rhs3 (stmt);
3213 bool inv;
3214 if (integer_all_onesp (t))
3215 inv = false;
3216 else if (integer_all_onesp (f))
3218 cmp = invert_tree_comparison (cmp, false);
3219 inv = true;
3221 else
3222 return ERROR_MARK;
3223 if (!integer_zerop (f))
3224 return ERROR_MARK;
3226 /* Success! */
3227 if (rets)
3228 *rets = stmt;
3229 if (reti)
3230 *reti = inv;
3231 return cmp;
3234 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3235 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3237 static bool
3238 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3240 unsigned int length = ops->length (), i, j;
3241 bool any_changes = false;
3243 if (length == 1)
3244 return false;
3246 for (i = 0; i < length; ++i)
3248 tree elt0 = (*ops)[i]->op;
3250 gassign *stmt0;
3251 bool invert;
3252 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3253 if (cmp0 == ERROR_MARK)
3254 continue;
3256 for (j = i + 1; j < length; ++j)
3258 tree &elt1 = (*ops)[j]->op;
3260 gassign *stmt1;
3261 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3262 if (cmp1 == ERROR_MARK)
3263 continue;
3265 tree cond0 = gimple_assign_rhs1 (stmt0);
3266 tree x0 = TREE_OPERAND (cond0, 0);
3267 tree y0 = TREE_OPERAND (cond0, 1);
3269 tree cond1 = gimple_assign_rhs1 (stmt1);
3270 tree x1 = TREE_OPERAND (cond1, 0);
3271 tree y1 = TREE_OPERAND (cond1, 1);
3273 tree comb;
3274 if (opcode == BIT_AND_EXPR)
3275 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3276 else if (opcode == BIT_IOR_EXPR)
3277 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3278 else
3279 gcc_unreachable ();
3280 if (comb == NULL)
3281 continue;
3283 /* Success! */
3284 if (dump_file && (dump_flags & TDF_DETAILS))
3286 fprintf (dump_file, "Transforming ");
3287 print_generic_expr (dump_file, cond0);
3288 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3289 print_generic_expr (dump_file, cond1);
3290 fprintf (dump_file, " into ");
3291 print_generic_expr (dump_file, comb);
3292 fputc ('\n', dump_file);
3295 gimple_assign_set_rhs1 (stmt0, comb);
3296 if (invert)
3297 std::swap (*gimple_assign_rhs2_ptr (stmt0),
3298 *gimple_assign_rhs3_ptr (stmt0));
3299 update_stmt (stmt0);
3301 elt1 = error_mark_node;
3302 any_changes = true;
3306 if (any_changes)
3308 operand_entry *oe;
3309 j = 0;
3310 FOR_EACH_VEC_ELT (*ops, i, oe)
3312 if (oe->op == error_mark_node)
3313 continue;
3314 else if (i != j)
3315 (*ops)[j] = oe;
3316 j++;
3318 ops->truncate (j);
3321 return any_changes;
3324 /* Return true if STMT is a cast like:
3325 <bb N>:
3327 _123 = (int) _234;
3329 <bb M>:
3330 # _345 = PHI <_123(N), 1(...), 1(...)>
3331 where _234 has bool type, _123 has single use and
3332 bb N has a single successor M. This is commonly used in
3333 the last block of a range test.
3335 Also Return true if STMT is tcc_compare like:
3336 <bb N>:
3338 _234 = a_2(D) == 2;
3340 <bb M>:
3341 # _345 = PHI <_234(N), 1(...), 1(...)>
3342 _346 = (int) _345;
3343 where _234 has booltype, single use and
3344 bb N has a single successor M. This is commonly used in
3345 the last block of a range test. */
3347 static bool
3348 final_range_test_p (gimple *stmt)
3350 basic_block bb, rhs_bb, lhs_bb;
3351 edge e;
3352 tree lhs, rhs;
3353 use_operand_p use_p;
3354 gimple *use_stmt;
3356 if (!gimple_assign_cast_p (stmt)
3357 && (!is_gimple_assign (stmt)
3358 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3359 != tcc_comparison)))
3360 return false;
3361 bb = gimple_bb (stmt);
3362 if (!single_succ_p (bb))
3363 return false;
3364 e = single_succ_edge (bb);
3365 if (e->flags & EDGE_COMPLEX)
3366 return false;
3368 lhs = gimple_assign_lhs (stmt);
3369 rhs = gimple_assign_rhs1 (stmt);
3370 if (gimple_assign_cast_p (stmt)
3371 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3372 || TREE_CODE (rhs) != SSA_NAME
3373 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3374 return false;
3376 if (!gimple_assign_cast_p (stmt)
3377 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3378 return false;
3380 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3381 if (!single_imm_use (lhs, &use_p, &use_stmt))
3382 return false;
3384 if (gimple_code (use_stmt) != GIMPLE_PHI
3385 || gimple_bb (use_stmt) != e->dest)
3386 return false;
3388 /* And that the rhs is defined in the same loop. */
3389 if (gimple_assign_cast_p (stmt))
3391 if (TREE_CODE (rhs) != SSA_NAME
3392 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3393 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3394 return false;
3396 else
3398 if (TREE_CODE (lhs) != SSA_NAME
3399 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3400 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3401 return false;
3404 return true;
3407 /* Return true if BB is suitable basic block for inter-bb range test
3408 optimization. If BACKWARD is true, BB should be the only predecessor
3409 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3410 or compared with to find a common basic block to which all conditions
3411 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3412 be the only predecessor of BB. */
3414 static bool
3415 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3416 bool backward)
3418 edge_iterator ei, ei2;
3419 edge e, e2;
3420 gimple *stmt;
3421 gphi_iterator gsi;
3422 bool other_edge_seen = false;
3423 bool is_cond;
3425 if (test_bb == bb)
3426 return false;
3427 /* Check last stmt first. */
3428 stmt = last_stmt (bb);
3429 if (stmt == NULL
3430 || (gimple_code (stmt) != GIMPLE_COND
3431 && (backward || !final_range_test_p (stmt)))
3432 || gimple_visited_p (stmt)
3433 || stmt_could_throw_p (stmt)
3434 || *other_bb == bb)
3435 return false;
3436 is_cond = gimple_code (stmt) == GIMPLE_COND;
3437 if (is_cond)
3439 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3440 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3441 to *OTHER_BB (if not set yet, try to find it out). */
3442 if (EDGE_COUNT (bb->succs) != 2)
3443 return false;
3444 FOR_EACH_EDGE (e, ei, bb->succs)
3446 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3447 return false;
3448 if (e->dest == test_bb)
3450 if (backward)
3451 continue;
3452 else
3453 return false;
3455 if (e->dest == bb)
3456 return false;
3457 if (*other_bb == NULL)
3459 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3460 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3461 return false;
3462 else if (e->dest == e2->dest)
3463 *other_bb = e->dest;
3464 if (*other_bb == NULL)
3465 return false;
3467 if (e->dest == *other_bb)
3468 other_edge_seen = true;
3469 else if (backward)
3470 return false;
3472 if (*other_bb == NULL || !other_edge_seen)
3473 return false;
3475 else if (single_succ (bb) != *other_bb)
3476 return false;
3478 /* Now check all PHIs of *OTHER_BB. */
3479 e = find_edge (bb, *other_bb);
3480 e2 = find_edge (test_bb, *other_bb);
3481 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3483 gphi *phi = gsi.phi ();
3484 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3485 corresponding to BB and TEST_BB predecessor must be the same. */
3486 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3487 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3489 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3490 one of the PHIs should have the lhs of the last stmt in
3491 that block as PHI arg and that PHI should have 0 or 1
3492 corresponding to it in all other range test basic blocks
3493 considered. */
3494 if (!is_cond)
3496 if (gimple_phi_arg_def (phi, e->dest_idx)
3497 == gimple_assign_lhs (stmt)
3498 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3499 || integer_onep (gimple_phi_arg_def (phi,
3500 e2->dest_idx))))
3501 continue;
3503 else
3505 gimple *test_last = last_stmt (test_bb);
3506 if (gimple_code (test_last) != GIMPLE_COND
3507 && gimple_phi_arg_def (phi, e2->dest_idx)
3508 == gimple_assign_lhs (test_last)
3509 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3510 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3511 continue;
3514 return false;
3517 return true;
3520 /* Return true if BB doesn't have side-effects that would disallow
3521 range test optimization, all SSA_NAMEs set in the bb are consumed
3522 in the bb and there are no PHIs. */
3524 static bool
3525 no_side_effect_bb (basic_block bb)
3527 gimple_stmt_iterator gsi;
3528 gimple *last;
3530 if (!gimple_seq_empty_p (phi_nodes (bb)))
3531 return false;
3532 last = last_stmt (bb);
3533 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3535 gimple *stmt = gsi_stmt (gsi);
3536 tree lhs;
3537 imm_use_iterator imm_iter;
3538 use_operand_p use_p;
3540 if (is_gimple_debug (stmt))
3541 continue;
3542 if (gimple_has_side_effects (stmt))
3543 return false;
3544 if (stmt == last)
3545 return true;
3546 if (!is_gimple_assign (stmt))
3547 return false;
3548 lhs = gimple_assign_lhs (stmt);
3549 if (TREE_CODE (lhs) != SSA_NAME)
3550 return false;
3551 if (gimple_assign_rhs_could_trap_p (stmt))
3552 return false;
3553 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3555 gimple *use_stmt = USE_STMT (use_p);
3556 if (is_gimple_debug (use_stmt))
3557 continue;
3558 if (gimple_bb (use_stmt) != bb)
3559 return false;
3562 return false;
3565 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3566 return true and fill in *OPS recursively. */
3568 static bool
3569 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3570 struct loop *loop)
3572 gimple *stmt = SSA_NAME_DEF_STMT (var);
3573 tree rhs[2];
3574 int i;
3576 if (!is_reassociable_op (stmt, code, loop))
3577 return false;
3579 rhs[0] = gimple_assign_rhs1 (stmt);
3580 rhs[1] = gimple_assign_rhs2 (stmt);
3581 gimple_set_visited (stmt, true);
3582 for (i = 0; i < 2; i++)
3583 if (TREE_CODE (rhs[i]) == SSA_NAME
3584 && !get_ops (rhs[i], code, ops, loop)
3585 && has_single_use (rhs[i]))
3587 operand_entry *oe = operand_entry_pool.allocate ();
3589 oe->op = rhs[i];
3590 oe->rank = code;
3591 oe->id = 0;
3592 oe->count = 1;
3593 oe->stmt_to_insert = NULL;
3594 ops->safe_push (oe);
3596 return true;
3599 /* Find the ops that were added by get_ops starting from VAR, see if
3600 they were changed during update_range_test and if yes, create new
3601 stmts. */
3603 static tree
3604 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3605 unsigned int *pidx, struct loop *loop)
3607 gimple *stmt = SSA_NAME_DEF_STMT (var);
3608 tree rhs[4];
3609 int i;
3611 if (!is_reassociable_op (stmt, code, loop))
3612 return NULL;
3614 rhs[0] = gimple_assign_rhs1 (stmt);
3615 rhs[1] = gimple_assign_rhs2 (stmt);
3616 rhs[2] = rhs[0];
3617 rhs[3] = rhs[1];
3618 for (i = 0; i < 2; i++)
3619 if (TREE_CODE (rhs[i]) == SSA_NAME)
3621 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3622 if (rhs[2 + i] == NULL_TREE)
3624 if (has_single_use (rhs[i]))
3625 rhs[2 + i] = ops[(*pidx)++]->op;
3626 else
3627 rhs[2 + i] = rhs[i];
3630 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3631 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3633 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3634 var = make_ssa_name (TREE_TYPE (var));
3635 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3636 rhs[2], rhs[3]);
3637 gimple_set_uid (g, gimple_uid (stmt));
3638 gimple_set_visited (g, true);
3639 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3641 return var;
3644 /* Structure to track the initial value passed to get_ops and
3645 the range in the ops vector for each basic block. */
3647 struct inter_bb_range_test_entry
3649 tree op;
3650 unsigned int first_idx, last_idx;
3653 /* Inter-bb range test optimization.
3655 Returns TRUE if a gimple conditional is optimized to a true/false,
3656 otherwise return FALSE.
3658 This indicates to the caller that it should run a CFG cleanup pass
3659 once reassociation is completed. */
3661 static bool
3662 maybe_optimize_range_tests (gimple *stmt)
3664 basic_block first_bb = gimple_bb (stmt);
3665 basic_block last_bb = first_bb;
3666 basic_block other_bb = NULL;
3667 basic_block bb;
3668 edge_iterator ei;
3669 edge e;
3670 auto_vec<operand_entry *> ops;
3671 auto_vec<inter_bb_range_test_entry> bbinfo;
3672 bool any_changes = false;
3673 bool cfg_cleanup_needed = false;
3675 /* Consider only basic blocks that end with GIMPLE_COND or
3676 a cast statement satisfying final_range_test_p. All
3677 but the last bb in the first_bb .. last_bb range
3678 should end with GIMPLE_COND. */
3679 if (gimple_code (stmt) == GIMPLE_COND)
3681 if (EDGE_COUNT (first_bb->succs) != 2)
3682 return cfg_cleanup_needed;
3684 else if (final_range_test_p (stmt))
3685 other_bb = single_succ (first_bb);
3686 else
3687 return cfg_cleanup_needed;
3689 if (stmt_could_throw_p (stmt))
3690 return cfg_cleanup_needed;
3692 /* As relative ordering of post-dominator sons isn't fixed,
3693 maybe_optimize_range_tests can be called first on any
3694 bb in the range we want to optimize. So, start searching
3695 backwards, if first_bb can be set to a predecessor. */
3696 while (single_pred_p (first_bb))
3698 basic_block pred_bb = single_pred (first_bb);
3699 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3700 break;
3701 if (!no_side_effect_bb (first_bb))
3702 break;
3703 first_bb = pred_bb;
3705 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3706 Before starting forward search in last_bb successors, find
3707 out the other_bb. */
3708 if (first_bb == last_bb)
3710 other_bb = NULL;
3711 /* As non-GIMPLE_COND last stmt always terminates the range,
3712 if forward search didn't discover anything, just give up. */
3713 if (gimple_code (stmt) != GIMPLE_COND)
3714 return cfg_cleanup_needed;
3715 /* Look at both successors. Either it ends with a GIMPLE_COND
3716 and satisfies suitable_cond_bb, or ends with a cast and
3717 other_bb is that cast's successor. */
3718 FOR_EACH_EDGE (e, ei, first_bb->succs)
3719 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3720 || e->dest == first_bb)
3721 return cfg_cleanup_needed;
3722 else if (single_pred_p (e->dest))
3724 stmt = last_stmt (e->dest);
3725 if (stmt
3726 && gimple_code (stmt) == GIMPLE_COND
3727 && EDGE_COUNT (e->dest->succs) == 2)
3729 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3730 break;
3731 else
3732 other_bb = NULL;
3734 else if (stmt
3735 && final_range_test_p (stmt)
3736 && find_edge (first_bb, single_succ (e->dest)))
3738 other_bb = single_succ (e->dest);
3739 if (other_bb == first_bb)
3740 other_bb = NULL;
3743 if (other_bb == NULL)
3744 return cfg_cleanup_needed;
3746 /* Now do the forward search, moving last_bb to successor bbs
3747 that aren't other_bb. */
3748 while (EDGE_COUNT (last_bb->succs) == 2)
3750 FOR_EACH_EDGE (e, ei, last_bb->succs)
3751 if (e->dest != other_bb)
3752 break;
3753 if (e == NULL)
3754 break;
3755 if (!single_pred_p (e->dest))
3756 break;
3757 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3758 break;
3759 if (!no_side_effect_bb (e->dest))
3760 break;
3761 last_bb = e->dest;
3763 if (first_bb == last_bb)
3764 return cfg_cleanup_needed;
3765 /* Here basic blocks first_bb through last_bb's predecessor
3766 end with GIMPLE_COND, all of them have one of the edges to
3767 other_bb and another to another block in the range,
3768 all blocks except first_bb don't have side-effects and
3769 last_bb ends with either GIMPLE_COND, or cast satisfying
3770 final_range_test_p. */
3771 for (bb = last_bb; ; bb = single_pred (bb))
3773 enum tree_code code;
3774 tree lhs, rhs;
3775 inter_bb_range_test_entry bb_ent;
3777 bb_ent.op = NULL_TREE;
3778 bb_ent.first_idx = ops.length ();
3779 bb_ent.last_idx = bb_ent.first_idx;
3780 e = find_edge (bb, other_bb);
3781 stmt = last_stmt (bb);
3782 gimple_set_visited (stmt, true);
3783 if (gimple_code (stmt) != GIMPLE_COND)
3785 use_operand_p use_p;
3786 gimple *phi;
3787 edge e2;
3788 unsigned int d;
3790 lhs = gimple_assign_lhs (stmt);
3791 rhs = gimple_assign_rhs1 (stmt);
3792 gcc_assert (bb == last_bb);
3794 /* stmt is
3795 _123 = (int) _234;
3797 _234 = a_2(D) == 2;
3799 followed by:
3800 <bb M>:
3801 # _345 = PHI <_123(N), 1(...), 1(...)>
3803 or 0 instead of 1. If it is 0, the _234
3804 range test is anded together with all the
3805 other range tests, if it is 1, it is ored with
3806 them. */
3807 single_imm_use (lhs, &use_p, &phi);
3808 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3809 e2 = find_edge (first_bb, other_bb);
3810 d = e2->dest_idx;
3811 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3812 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3813 code = BIT_AND_EXPR;
3814 else
3816 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3817 code = BIT_IOR_EXPR;
3820 /* If _234 SSA_NAME_DEF_STMT is
3821 _234 = _567 | _789;
3822 (or &, corresponding to 1/0 in the phi arguments,
3823 push into ops the individual range test arguments
3824 of the bitwise or resp. and, recursively. */
3825 if (TREE_CODE (rhs) == SSA_NAME
3826 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3827 != tcc_comparison)
3828 && !get_ops (rhs, code, &ops,
3829 loop_containing_stmt (stmt))
3830 && has_single_use (rhs))
3832 /* Otherwise, push the _234 range test itself. */
3833 operand_entry *oe = operand_entry_pool.allocate ();
3835 oe->op = rhs;
3836 oe->rank = code;
3837 oe->id = 0;
3838 oe->count = 1;
3839 oe->stmt_to_insert = NULL;
3840 ops.safe_push (oe);
3841 bb_ent.last_idx++;
3842 bb_ent.op = rhs;
3844 else if (is_gimple_assign (stmt)
3845 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3846 == tcc_comparison)
3847 && !get_ops (lhs, code, &ops,
3848 loop_containing_stmt (stmt))
3849 && has_single_use (lhs))
3851 operand_entry *oe = operand_entry_pool.allocate ();
3852 oe->op = lhs;
3853 oe->rank = code;
3854 oe->id = 0;
3855 oe->count = 1;
3856 ops.safe_push (oe);
3857 bb_ent.last_idx++;
3858 bb_ent.op = lhs;
3860 else
3862 bb_ent.last_idx = ops.length ();
3863 bb_ent.op = rhs;
3865 bbinfo.safe_push (bb_ent);
3866 continue;
3868 /* Otherwise stmt is GIMPLE_COND. */
3869 code = gimple_cond_code (stmt);
3870 lhs = gimple_cond_lhs (stmt);
3871 rhs = gimple_cond_rhs (stmt);
3872 if (TREE_CODE (lhs) == SSA_NAME
3873 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3874 && ((code != EQ_EXPR && code != NE_EXPR)
3875 || rhs != boolean_false_node
3876 /* Either push into ops the individual bitwise
3877 or resp. and operands, depending on which
3878 edge is other_bb. */
3879 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3880 ^ (code == EQ_EXPR))
3881 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3882 loop_containing_stmt (stmt))))
3884 /* Or push the GIMPLE_COND stmt itself. */
3885 operand_entry *oe = operand_entry_pool.allocate ();
3887 oe->op = NULL;
3888 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3889 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3890 /* oe->op = NULL signs that there is no SSA_NAME
3891 for the range test, and oe->id instead is the
3892 basic block number, at which's end the GIMPLE_COND
3893 is. */
3894 oe->id = bb->index;
3895 oe->count = 1;
3896 oe->stmt_to_insert = NULL;
3897 ops.safe_push (oe);
3898 bb_ent.op = NULL;
3899 bb_ent.last_idx++;
3901 else if (ops.length () > bb_ent.first_idx)
3903 bb_ent.op = lhs;
3904 bb_ent.last_idx = ops.length ();
3906 bbinfo.safe_push (bb_ent);
3907 if (bb == first_bb)
3908 break;
3910 if (ops.length () > 1)
3911 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3912 if (any_changes)
3914 unsigned int idx, max_idx = 0;
3915 /* update_ops relies on has_single_use predicates returning the
3916 same values as it did during get_ops earlier. Additionally it
3917 never removes statements, only adds new ones and it should walk
3918 from the single imm use and check the predicate already before
3919 making those changes.
3920 On the other side, the handling of GIMPLE_COND directly can turn
3921 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3922 it needs to be done in a separate loop afterwards. */
3923 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3925 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3926 && bbinfo[idx].op != NULL_TREE)
3928 tree new_op;
3930 max_idx = idx;
3931 stmt = last_stmt (bb);
3932 new_op = update_ops (bbinfo[idx].op,
3933 (enum tree_code)
3934 ops[bbinfo[idx].first_idx]->rank,
3935 ops, &bbinfo[idx].first_idx,
3936 loop_containing_stmt (stmt));
3937 if (new_op == NULL_TREE)
3939 gcc_assert (bb == last_bb);
3940 new_op = ops[bbinfo[idx].first_idx++]->op;
3942 if (bbinfo[idx].op != new_op)
3944 imm_use_iterator iter;
3945 use_operand_p use_p;
3946 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
3948 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3949 if (is_gimple_debug (use_stmt))
3950 continue;
3951 else if (gimple_code (use_stmt) == GIMPLE_COND
3952 || gimple_code (use_stmt) == GIMPLE_PHI)
3953 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3954 SET_USE (use_p, new_op);
3955 else if ((is_gimple_assign (use_stmt)
3956 && (TREE_CODE_CLASS
3957 (gimple_assign_rhs_code (use_stmt))
3958 == tcc_comparison)))
3959 cast_or_tcc_cmp_stmt = use_stmt;
3960 else if (gimple_assign_cast_p (use_stmt))
3961 cast_or_tcc_cmp_stmt = use_stmt;
3962 else
3963 gcc_unreachable ();
3965 if (cast_or_tcc_cmp_stmt)
3967 gcc_assert (bb == last_bb);
3968 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
3969 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3970 enum tree_code rhs_code
3971 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
3972 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
3973 : CONVERT_EXPR;
3974 gassign *g;
3975 if (is_gimple_min_invariant (new_op))
3977 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3978 g = gimple_build_assign (new_lhs, new_op);
3980 else
3981 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3982 gimple_stmt_iterator gsi
3983 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
3984 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
3985 gimple_set_visited (g, true);
3986 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3987 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3988 if (is_gimple_debug (use_stmt))
3989 continue;
3990 else if (gimple_code (use_stmt) == GIMPLE_COND
3991 || gimple_code (use_stmt) == GIMPLE_PHI)
3992 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3993 SET_USE (use_p, new_lhs);
3994 else
3995 gcc_unreachable ();
3999 if (bb == first_bb)
4000 break;
4002 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4004 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4005 && bbinfo[idx].op == NULL_TREE
4006 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4008 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4010 if (idx > max_idx)
4011 max_idx = idx;
4013 /* If we collapse the conditional to a true/false
4014 condition, then bubble that knowledge up to our caller. */
4015 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4017 gimple_cond_make_false (cond_stmt);
4018 cfg_cleanup_needed = true;
4020 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4022 gimple_cond_make_true (cond_stmt);
4023 cfg_cleanup_needed = true;
4025 else
4027 gimple_cond_set_code (cond_stmt, NE_EXPR);
4028 gimple_cond_set_lhs (cond_stmt,
4029 ops[bbinfo[idx].first_idx]->op);
4030 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4032 update_stmt (cond_stmt);
4034 if (bb == first_bb)
4035 break;
4038 /* The above changes could result in basic blocks after the first
4039 modified one, up to and including last_bb, to be executed even if
4040 they would not be in the original program. If the value ranges of
4041 assignment lhs' in those bbs were dependent on the conditions
4042 guarding those basic blocks which now can change, the VRs might
4043 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4044 are only used within the same bb, it should be not a big deal if
4045 we just reset all the VRs in those bbs. See PR68671. */
4046 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4047 reset_flow_sensitive_info_in_bb (bb);
4049 return cfg_cleanup_needed;
4052 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4053 of STMT in it's operands. This is also known as a "destructive
4054 update" operation. */
4056 static bool
4057 is_phi_for_stmt (gimple *stmt, tree operand)
4059 gimple *def_stmt;
4060 gphi *def_phi;
4061 tree lhs;
4062 use_operand_p arg_p;
4063 ssa_op_iter i;
4065 if (TREE_CODE (operand) != SSA_NAME)
4066 return false;
4068 lhs = gimple_assign_lhs (stmt);
4070 def_stmt = SSA_NAME_DEF_STMT (operand);
4071 def_phi = dyn_cast <gphi *> (def_stmt);
4072 if (!def_phi)
4073 return false;
4075 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4076 if (lhs == USE_FROM_PTR (arg_p))
4077 return true;
4078 return false;
4081 /* Remove def stmt of VAR if VAR has zero uses and recurse
4082 on rhs1 operand if so. */
4084 static void
4085 remove_visited_stmt_chain (tree var)
4087 gimple *stmt;
4088 gimple_stmt_iterator gsi;
4090 while (1)
4092 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4093 return;
4094 stmt = SSA_NAME_DEF_STMT (var);
4095 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4097 var = gimple_assign_rhs1 (stmt);
4098 gsi = gsi_for_stmt (stmt);
4099 reassoc_remove_stmt (&gsi);
4100 release_defs (stmt);
4102 else
4103 return;
4107 /* This function checks three consequtive operands in
4108 passed operands vector OPS starting from OPINDEX and
4109 swaps two operands if it is profitable for binary operation
4110 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4112 We pair ops with the same rank if possible.
4114 The alternative we try is to see if STMT is a destructive
4115 update style statement, which is like:
4116 b = phi (a, ...)
4117 a = c + b;
4118 In that case, we want to use the destructive update form to
4119 expose the possible vectorizer sum reduction opportunity.
4120 In that case, the third operand will be the phi node. This
4121 check is not performed if STMT is null.
4123 We could, of course, try to be better as noted above, and do a
4124 lot of work to try to find these opportunities in >3 operand
4125 cases, but it is unlikely to be worth it. */
4127 static void
4128 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4129 unsigned int opindex, gimple *stmt)
4131 operand_entry *oe1, *oe2, *oe3;
4133 oe1 = ops[opindex];
4134 oe2 = ops[opindex + 1];
4135 oe3 = ops[opindex + 2];
4137 if ((oe1->rank == oe2->rank
4138 && oe2->rank != oe3->rank)
4139 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4140 && !is_phi_for_stmt (stmt, oe1->op)
4141 && !is_phi_for_stmt (stmt, oe2->op)))
4142 std::swap (*oe1, *oe3);
4143 else if ((oe1->rank == oe3->rank
4144 && oe2->rank != oe3->rank)
4145 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4146 && !is_phi_for_stmt (stmt, oe1->op)
4147 && !is_phi_for_stmt (stmt, oe3->op)))
4148 std::swap (*oe1, *oe2);
4151 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4152 two definitions, otherwise return STMT. */
4154 static inline gimple *
4155 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4157 if (TREE_CODE (rhs1) == SSA_NAME
4158 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4159 stmt = SSA_NAME_DEF_STMT (rhs1);
4160 if (TREE_CODE (rhs2) == SSA_NAME
4161 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4162 stmt = SSA_NAME_DEF_STMT (rhs2);
4163 return stmt;
4166 /* If the stmt that defines operand has to be inserted, insert it
4167 before the use. */
4168 static void
4169 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4171 gcc_assert (is_gimple_assign (stmt_to_insert));
4172 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4173 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4174 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4175 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4176 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4178 /* If the insert point is not stmt, then insert_point would be
4179 the point where operand rhs1 or rhs2 is defined. In this case,
4180 stmt_to_insert has to be inserted afterwards. This would
4181 only happen when the stmt insertion point is flexible. */
4182 if (stmt == insert_point)
4183 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4184 else
4185 insert_stmt_after (stmt_to_insert, insert_point);
4189 /* Recursively rewrite our linearized statements so that the operators
4190 match those in OPS[OPINDEX], putting the computation in rank
4191 order. Return new lhs. */
4193 static tree
4194 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4195 vec<operand_entry *> ops, bool changed)
4197 tree rhs1 = gimple_assign_rhs1 (stmt);
4198 tree rhs2 = gimple_assign_rhs2 (stmt);
4199 tree lhs = gimple_assign_lhs (stmt);
4200 operand_entry *oe;
4202 /* The final recursion case for this function is that you have
4203 exactly two operations left.
4204 If we had exactly one op in the entire list to start with, we
4205 would have never called this function, and the tail recursion
4206 rewrites them one at a time. */
4207 if (opindex + 2 == ops.length ())
4209 operand_entry *oe1, *oe2;
4211 oe1 = ops[opindex];
4212 oe2 = ops[opindex + 1];
4214 if (rhs1 != oe1->op || rhs2 != oe2->op)
4216 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4217 unsigned int uid = gimple_uid (stmt);
4219 if (dump_file && (dump_flags & TDF_DETAILS))
4221 fprintf (dump_file, "Transforming ");
4222 print_gimple_stmt (dump_file, stmt, 0);
4225 /* If the stmt that defines operand has to be inserted, insert it
4226 before the use. */
4227 if (oe1->stmt_to_insert)
4228 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4229 if (oe2->stmt_to_insert)
4230 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4231 /* Even when changed is false, reassociation could have e.g. removed
4232 some redundant operations, so unless we are just swapping the
4233 arguments or unless there is no change at all (then we just
4234 return lhs), force creation of a new SSA_NAME. */
4235 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4237 gimple *insert_point
4238 = find_insert_point (stmt, oe1->op, oe2->op);
4239 lhs = make_ssa_name (TREE_TYPE (lhs));
4240 stmt
4241 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4242 oe1->op, oe2->op);
4243 gimple_set_uid (stmt, uid);
4244 gimple_set_visited (stmt, true);
4245 if (insert_point == gsi_stmt (gsi))
4246 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4247 else
4248 insert_stmt_after (stmt, insert_point);
4250 else
4252 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4253 == stmt);
4254 gimple_assign_set_rhs1 (stmt, oe1->op);
4255 gimple_assign_set_rhs2 (stmt, oe2->op);
4256 update_stmt (stmt);
4259 if (rhs1 != oe1->op && rhs1 != oe2->op)
4260 remove_visited_stmt_chain (rhs1);
4262 if (dump_file && (dump_flags & TDF_DETAILS))
4264 fprintf (dump_file, " into ");
4265 print_gimple_stmt (dump_file, stmt, 0);
4268 return lhs;
4271 /* If we hit here, we should have 3 or more ops left. */
4272 gcc_assert (opindex + 2 < ops.length ());
4274 /* Rewrite the next operator. */
4275 oe = ops[opindex];
4277 /* If the stmt that defines operand has to be inserted, insert it
4278 before the use. */
4279 if (oe->stmt_to_insert)
4280 insert_stmt_before_use (stmt, oe->stmt_to_insert);
4282 /* Recurse on the LHS of the binary operator, which is guaranteed to
4283 be the non-leaf side. */
4284 tree new_rhs1
4285 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4286 changed || oe->op != rhs2);
4288 if (oe->op != rhs2 || new_rhs1 != rhs1)
4290 if (dump_file && (dump_flags & TDF_DETAILS))
4292 fprintf (dump_file, "Transforming ");
4293 print_gimple_stmt (dump_file, stmt, 0);
4296 /* If changed is false, this is either opindex == 0
4297 or all outer rhs2's were equal to corresponding oe->op,
4298 and powi_result is NULL.
4299 That means lhs is equivalent before and after reassociation.
4300 Otherwise ensure the old lhs SSA_NAME is not reused and
4301 create a new stmt as well, so that any debug stmts will be
4302 properly adjusted. */
4303 if (changed)
4305 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4306 unsigned int uid = gimple_uid (stmt);
4307 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4309 lhs = make_ssa_name (TREE_TYPE (lhs));
4310 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4311 new_rhs1, oe->op);
4312 gimple_set_uid (stmt, uid);
4313 gimple_set_visited (stmt, true);
4314 if (insert_point == gsi_stmt (gsi))
4315 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4316 else
4317 insert_stmt_after (stmt, insert_point);
4319 else
4321 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4322 == stmt);
4323 gimple_assign_set_rhs1 (stmt, new_rhs1);
4324 gimple_assign_set_rhs2 (stmt, oe->op);
4325 update_stmt (stmt);
4328 if (dump_file && (dump_flags & TDF_DETAILS))
4330 fprintf (dump_file, " into ");
4331 print_gimple_stmt (dump_file, stmt, 0);
4334 return lhs;
4337 /* Find out how many cycles we need to compute statements chain.
4338 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4339 maximum number of independent statements we may execute per cycle. */
4341 static int
4342 get_required_cycles (int ops_num, int cpu_width)
4344 int res;
4345 int elog;
4346 unsigned int rest;
4348 /* While we have more than 2 * cpu_width operands
4349 we may reduce number of operands by cpu_width
4350 per cycle. */
4351 res = ops_num / (2 * cpu_width);
4353 /* Remained operands count may be reduced twice per cycle
4354 until we have only one operand. */
4355 rest = (unsigned)(ops_num - res * cpu_width);
4356 elog = exact_log2 (rest);
4357 if (elog >= 0)
4358 res += elog;
4359 else
4360 res += floor_log2 (rest) + 1;
4362 return res;
4365 /* Returns an optimal number of registers to use for computation of
4366 given statements. */
4368 static int
4369 get_reassociation_width (int ops_num, enum tree_code opc,
4370 machine_mode mode)
4372 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4373 int width;
4374 int width_min;
4375 int cycles_best;
4377 if (param_width > 0)
4378 width = param_width;
4379 else
4380 width = targetm.sched.reassociation_width (opc, mode);
4382 if (width == 1)
4383 return width;
4385 /* Get the minimal time required for sequence computation. */
4386 cycles_best = get_required_cycles (ops_num, width);
4388 /* Check if we may use less width and still compute sequence for
4389 the same time. It will allow us to reduce registers usage.
4390 get_required_cycles is monotonically increasing with lower width
4391 so we can perform a binary search for the minimal width that still
4392 results in the optimal cycle count. */
4393 width_min = 1;
4394 while (width > width_min)
4396 int width_mid = (width + width_min) / 2;
4398 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4399 width = width_mid;
4400 else if (width_min < width_mid)
4401 width_min = width_mid;
4402 else
4403 break;
4406 return width;
4409 /* Recursively rewrite our linearized statements so that the operators
4410 match those in OPS[OPINDEX], putting the computation in rank
4411 order and trying to allow operations to be executed in
4412 parallel. */
4414 static void
4415 rewrite_expr_tree_parallel (gassign *stmt, int width,
4416 vec<operand_entry *> ops)
4418 enum tree_code opcode = gimple_assign_rhs_code (stmt);
4419 int op_num = ops.length ();
4420 gcc_assert (op_num > 0);
4421 int stmt_num = op_num - 1;
4422 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4423 int op_index = op_num - 1;
4424 int stmt_index = 0;
4425 int ready_stmts_end = 0;
4426 int i = 0;
4427 gimple *stmt1 = NULL, *stmt2 = NULL;
4428 tree last_rhs1 = gimple_assign_rhs1 (stmt);
4430 /* We start expression rewriting from the top statements.
4431 So, in this loop we create a full list of statements
4432 we will work with. */
4433 stmts[stmt_num - 1] = stmt;
4434 for (i = stmt_num - 2; i >= 0; i--)
4435 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4437 for (i = 0; i < stmt_num; i++)
4439 tree op1, op2;
4441 /* Determine whether we should use results of
4442 already handled statements or not. */
4443 if (ready_stmts_end == 0
4444 && (i - stmt_index >= width || op_index < 1))
4445 ready_stmts_end = i;
4447 /* Now we choose operands for the next statement. Non zero
4448 value in ready_stmts_end means here that we should use
4449 the result of already generated statements as new operand. */
4450 if (ready_stmts_end > 0)
4452 op1 = gimple_assign_lhs (stmts[stmt_index++]);
4453 if (ready_stmts_end > stmt_index)
4454 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4455 else if (op_index >= 0)
4457 operand_entry *oe = ops[op_index--];
4458 stmt2 = oe->stmt_to_insert;
4459 op2 = oe->op;
4461 else
4463 gcc_assert (stmt_index < i);
4464 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4467 if (stmt_index >= ready_stmts_end)
4468 ready_stmts_end = 0;
4470 else
4472 if (op_index > 1)
4473 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4474 operand_entry *oe2 = ops[op_index--];
4475 operand_entry *oe1 = ops[op_index--];
4476 op2 = oe2->op;
4477 stmt2 = oe2->stmt_to_insert;
4478 op1 = oe1->op;
4479 stmt1 = oe1->stmt_to_insert;
4482 /* If we emit the last statement then we should put
4483 operands into the last statement. It will also
4484 break the loop. */
4485 if (op_index < 0 && stmt_index == i)
4486 i = stmt_num - 1;
4488 if (dump_file && (dump_flags & TDF_DETAILS))
4490 fprintf (dump_file, "Transforming ");
4491 print_gimple_stmt (dump_file, stmts[i], 0);
4494 /* If the stmt that defines operand has to be inserted, insert it
4495 before the use. */
4496 if (stmt1)
4497 insert_stmt_before_use (stmts[i], stmt1);
4498 if (stmt2)
4499 insert_stmt_before_use (stmts[i], stmt2);
4500 stmt1 = stmt2 = NULL;
4502 /* We keep original statement only for the last one. All
4503 others are recreated. */
4504 if (i == stmt_num - 1)
4506 gimple_assign_set_rhs1 (stmts[i], op1);
4507 gimple_assign_set_rhs2 (stmts[i], op2);
4508 update_stmt (stmts[i]);
4510 else
4512 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4514 if (dump_file && (dump_flags & TDF_DETAILS))
4516 fprintf (dump_file, " into ");
4517 print_gimple_stmt (dump_file, stmts[i], 0);
4521 remove_visited_stmt_chain (last_rhs1);
4524 /* Transform STMT, which is really (A +B) + (C + D) into the left
4525 linear form, ((A+B)+C)+D.
4526 Recurse on D if necessary. */
4528 static void
4529 linearize_expr (gimple *stmt)
4531 gimple_stmt_iterator gsi;
4532 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4533 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4534 gimple *oldbinrhs = binrhs;
4535 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4536 gimple *newbinrhs = NULL;
4537 struct loop *loop = loop_containing_stmt (stmt);
4538 tree lhs = gimple_assign_lhs (stmt);
4540 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4541 && is_reassociable_op (binrhs, rhscode, loop));
4543 gsi = gsi_for_stmt (stmt);
4545 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4546 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4547 gimple_assign_rhs_code (binrhs),
4548 gimple_assign_lhs (binlhs),
4549 gimple_assign_rhs2 (binrhs));
4550 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4551 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4552 gimple_set_uid (binrhs, gimple_uid (stmt));
4554 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4555 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4557 if (dump_file && (dump_flags & TDF_DETAILS))
4559 fprintf (dump_file, "Linearized: ");
4560 print_gimple_stmt (dump_file, stmt, 0);
4563 reassociate_stats.linearized++;
4564 update_stmt (stmt);
4566 gsi = gsi_for_stmt (oldbinrhs);
4567 reassoc_remove_stmt (&gsi);
4568 release_defs (oldbinrhs);
4570 gimple_set_visited (stmt, true);
4571 gimple_set_visited (binlhs, true);
4572 gimple_set_visited (binrhs, true);
4574 /* Tail recurse on the new rhs if it still needs reassociation. */
4575 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4576 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4577 want to change the algorithm while converting to tuples. */
4578 linearize_expr (stmt);
4581 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4582 it. Otherwise, return NULL. */
4584 static gimple *
4585 get_single_immediate_use (tree lhs)
4587 use_operand_p immuse;
4588 gimple *immusestmt;
4590 if (TREE_CODE (lhs) == SSA_NAME
4591 && single_imm_use (lhs, &immuse, &immusestmt)
4592 && is_gimple_assign (immusestmt))
4593 return immusestmt;
4595 return NULL;
4598 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4599 representing the negated value. Insertions of any necessary
4600 instructions go before GSI.
4601 This function is recursive in that, if you hand it "a_5" as the
4602 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4603 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4605 static tree
4606 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4608 gimple *negatedefstmt = NULL;
4609 tree resultofnegate;
4610 gimple_stmt_iterator gsi;
4611 unsigned int uid;
4613 /* If we are trying to negate a name, defined by an add, negate the
4614 add operands instead. */
4615 if (TREE_CODE (tonegate) == SSA_NAME)
4616 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4617 if (TREE_CODE (tonegate) == SSA_NAME
4618 && is_gimple_assign (negatedefstmt)
4619 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4620 && has_single_use (gimple_assign_lhs (negatedefstmt))
4621 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4623 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4624 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4625 tree lhs = gimple_assign_lhs (negatedefstmt);
4626 gimple *g;
4628 gsi = gsi_for_stmt (negatedefstmt);
4629 rhs1 = negate_value (rhs1, &gsi);
4631 gsi = gsi_for_stmt (negatedefstmt);
4632 rhs2 = negate_value (rhs2, &gsi);
4634 gsi = gsi_for_stmt (negatedefstmt);
4635 lhs = make_ssa_name (TREE_TYPE (lhs));
4636 gimple_set_visited (negatedefstmt, true);
4637 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4638 gimple_set_uid (g, gimple_uid (negatedefstmt));
4639 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4640 return lhs;
4643 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4644 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4645 NULL_TREE, true, GSI_SAME_STMT);
4646 gsi = *gsip;
4647 uid = gimple_uid (gsi_stmt (gsi));
4648 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4650 gimple *stmt = gsi_stmt (gsi);
4651 if (gimple_uid (stmt) != 0)
4652 break;
4653 gimple_set_uid (stmt, uid);
4655 return resultofnegate;
4658 /* Return true if we should break up the subtract in STMT into an add
4659 with negate. This is true when we the subtract operands are really
4660 adds, or the subtract itself is used in an add expression. In
4661 either case, breaking up the subtract into an add with negate
4662 exposes the adds to reassociation. */
4664 static bool
4665 should_break_up_subtract (gimple *stmt)
4667 tree lhs = gimple_assign_lhs (stmt);
4668 tree binlhs = gimple_assign_rhs1 (stmt);
4669 tree binrhs = gimple_assign_rhs2 (stmt);
4670 gimple *immusestmt;
4671 struct loop *loop = loop_containing_stmt (stmt);
4673 if (TREE_CODE (binlhs) == SSA_NAME
4674 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4675 return true;
4677 if (TREE_CODE (binrhs) == SSA_NAME
4678 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4679 return true;
4681 if (TREE_CODE (lhs) == SSA_NAME
4682 && (immusestmt = get_single_immediate_use (lhs))
4683 && is_gimple_assign (immusestmt)
4684 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4685 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4686 return true;
4687 return false;
4690 /* Transform STMT from A - B into A + -B. */
4692 static void
4693 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4695 tree rhs1 = gimple_assign_rhs1 (stmt);
4696 tree rhs2 = gimple_assign_rhs2 (stmt);
4698 if (dump_file && (dump_flags & TDF_DETAILS))
4700 fprintf (dump_file, "Breaking up subtract ");
4701 print_gimple_stmt (dump_file, stmt, 0);
4704 rhs2 = negate_value (rhs2, gsip);
4705 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4706 update_stmt (stmt);
4709 /* Determine whether STMT is a builtin call that raises an SSA name
4710 to an integer power and has only one use. If so, and this is early
4711 reassociation and unsafe math optimizations are permitted, place
4712 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4713 If any of these conditions does not hold, return FALSE. */
4715 static bool
4716 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4718 tree arg1;
4719 REAL_VALUE_TYPE c, cint;
4721 switch (gimple_call_combined_fn (stmt))
4723 CASE_CFN_POW:
4724 if (flag_errno_math)
4725 return false;
4727 *base = gimple_call_arg (stmt, 0);
4728 arg1 = gimple_call_arg (stmt, 1);
4730 if (TREE_CODE (arg1) != REAL_CST)
4731 return false;
4733 c = TREE_REAL_CST (arg1);
4735 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4736 return false;
4738 *exponent = real_to_integer (&c);
4739 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4740 if (!real_identical (&c, &cint))
4741 return false;
4743 break;
4745 CASE_CFN_POWI:
4746 *base = gimple_call_arg (stmt, 0);
4747 arg1 = gimple_call_arg (stmt, 1);
4749 if (!tree_fits_shwi_p (arg1))
4750 return false;
4752 *exponent = tree_to_shwi (arg1);
4753 break;
4755 default:
4756 return false;
4759 /* Expanding negative exponents is generally unproductive, so we don't
4760 complicate matters with those. Exponents of zero and one should
4761 have been handled by expression folding. */
4762 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4763 return false;
4765 return true;
4768 /* Try to derive and add operand entry for OP to *OPS. Return false if
4769 unsuccessful. */
4771 static bool
4772 try_special_add_to_ops (vec<operand_entry *> *ops,
4773 enum tree_code code,
4774 tree op, gimple* def_stmt)
4776 tree base = NULL_TREE;
4777 HOST_WIDE_INT exponent = 0;
4779 if (TREE_CODE (op) != SSA_NAME
4780 || ! has_single_use (op))
4781 return false;
4783 if (code == MULT_EXPR
4784 && reassoc_insert_powi_p
4785 && flag_unsafe_math_optimizations
4786 && is_gimple_call (def_stmt)
4787 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
4789 add_repeat_to_ops_vec (ops, base, exponent);
4790 gimple_set_visited (def_stmt, true);
4791 return true;
4793 else if (code == MULT_EXPR
4794 && is_gimple_assign (def_stmt)
4795 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
4796 && !HONOR_SNANS (TREE_TYPE (op))
4797 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
4798 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
4800 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4801 tree cst = build_minus_one_cst (TREE_TYPE (op));
4802 add_to_ops_vec (ops, rhs1);
4803 add_to_ops_vec (ops, cst);
4804 gimple_set_visited (def_stmt, true);
4805 return true;
4808 return false;
4811 /* Recursively linearize a binary expression that is the RHS of STMT.
4812 Place the operands of the expression tree in the vector named OPS. */
4814 static void
4815 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4816 bool is_associative, bool set_visited)
4818 tree binlhs = gimple_assign_rhs1 (stmt);
4819 tree binrhs = gimple_assign_rhs2 (stmt);
4820 gimple *binlhsdef = NULL, *binrhsdef = NULL;
4821 bool binlhsisreassoc = false;
4822 bool binrhsisreassoc = false;
4823 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4824 struct loop *loop = loop_containing_stmt (stmt);
4826 if (set_visited)
4827 gimple_set_visited (stmt, true);
4829 if (TREE_CODE (binlhs) == SSA_NAME)
4831 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4832 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4833 && !stmt_could_throw_p (binlhsdef));
4836 if (TREE_CODE (binrhs) == SSA_NAME)
4838 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4839 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4840 && !stmt_could_throw_p (binrhsdef));
4843 /* If the LHS is not reassociable, but the RHS is, we need to swap
4844 them. If neither is reassociable, there is nothing we can do, so
4845 just put them in the ops vector. If the LHS is reassociable,
4846 linearize it. If both are reassociable, then linearize the RHS
4847 and the LHS. */
4849 if (!binlhsisreassoc)
4851 /* If this is not a associative operation like division, give up. */
4852 if (!is_associative)
4854 add_to_ops_vec (ops, binrhs);
4855 return;
4858 if (!binrhsisreassoc)
4860 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4861 add_to_ops_vec (ops, binrhs);
4863 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
4864 add_to_ops_vec (ops, binlhs);
4866 return;
4869 if (dump_file && (dump_flags & TDF_DETAILS))
4871 fprintf (dump_file, "swapping operands of ");
4872 print_gimple_stmt (dump_file, stmt, 0);
4875 swap_ssa_operands (stmt,
4876 gimple_assign_rhs1_ptr (stmt),
4877 gimple_assign_rhs2_ptr (stmt));
4878 update_stmt (stmt);
4880 if (dump_file && (dump_flags & TDF_DETAILS))
4882 fprintf (dump_file, " is now ");
4883 print_gimple_stmt (dump_file, stmt, 0);
4886 /* We want to make it so the lhs is always the reassociative op,
4887 so swap. */
4888 std::swap (binlhs, binrhs);
4890 else if (binrhsisreassoc)
4892 linearize_expr (stmt);
4893 binlhs = gimple_assign_rhs1 (stmt);
4894 binrhs = gimple_assign_rhs2 (stmt);
4897 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4898 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4899 rhscode, loop));
4900 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4901 is_associative, set_visited);
4903 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4904 add_to_ops_vec (ops, binrhs);
4907 /* Repropagate the negates back into subtracts, since no other pass
4908 currently does it. */
4910 static void
4911 repropagate_negates (void)
4913 unsigned int i = 0;
4914 tree negate;
4916 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4918 gimple *user = get_single_immediate_use (negate);
4920 if (!user || !is_gimple_assign (user))
4921 continue;
4923 /* The negate operand can be either operand of a PLUS_EXPR
4924 (it can be the LHS if the RHS is a constant for example).
4926 Force the negate operand to the RHS of the PLUS_EXPR, then
4927 transform the PLUS_EXPR into a MINUS_EXPR. */
4928 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4930 /* If the negated operand appears on the LHS of the
4931 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4932 to force the negated operand to the RHS of the PLUS_EXPR. */
4933 if (gimple_assign_rhs1 (user) == negate)
4935 swap_ssa_operands (user,
4936 gimple_assign_rhs1_ptr (user),
4937 gimple_assign_rhs2_ptr (user));
4940 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4941 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4942 if (gimple_assign_rhs2 (user) == negate)
4944 tree rhs1 = gimple_assign_rhs1 (user);
4945 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
4946 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4947 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4948 update_stmt (user);
4951 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4953 if (gimple_assign_rhs1 (user) == negate)
4955 /* We have
4956 x = -a
4957 y = x - b
4958 which we transform into
4959 x = a + b
4960 y = -x .
4961 This pushes down the negate which we possibly can merge
4962 into some other operation, hence insert it into the
4963 plus_negates vector. */
4964 gimple *feed = SSA_NAME_DEF_STMT (negate);
4965 tree a = gimple_assign_rhs1 (feed);
4966 tree b = gimple_assign_rhs2 (user);
4967 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4968 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4969 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4970 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4971 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4972 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4973 user = gsi_stmt (gsi2);
4974 update_stmt (user);
4975 reassoc_remove_stmt (&gsi);
4976 release_defs (feed);
4977 plus_negates.safe_push (gimple_assign_lhs (user));
4979 else
4981 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4982 rid of one operation. */
4983 gimple *feed = SSA_NAME_DEF_STMT (negate);
4984 tree a = gimple_assign_rhs1 (feed);
4985 tree rhs1 = gimple_assign_rhs1 (user);
4986 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4987 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4988 update_stmt (gsi_stmt (gsi));
4994 /* Returns true if OP is of a type for which we can do reassociation.
4995 That is for integral or non-saturating fixed-point types, and for
4996 floating point type when associative-math is enabled. */
4998 static bool
4999 can_reassociate_p (tree op)
5001 tree type = TREE_TYPE (op);
5002 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5003 return false;
5004 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5005 || NON_SAT_FIXED_POINT_TYPE_P (type)
5006 || (flag_associative_math && FLOAT_TYPE_P (type)))
5007 return true;
5008 return false;
5011 /* Break up subtract operations in block BB.
5013 We do this top down because we don't know whether the subtract is
5014 part of a possible chain of reassociation except at the top.
5016 IE given
5017 d = f + g
5018 c = a + e
5019 b = c - d
5020 q = b - r
5021 k = t - q
5023 we want to break up k = t - q, but we won't until we've transformed q
5024 = b - r, which won't be broken up until we transform b = c - d.
5026 En passant, clear the GIMPLE visited flag on every statement
5027 and set UIDs within each basic block. */
5029 static void
5030 break_up_subtract_bb (basic_block bb)
5032 gimple_stmt_iterator gsi;
5033 basic_block son;
5034 unsigned int uid = 1;
5036 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5038 gimple *stmt = gsi_stmt (gsi);
5039 gimple_set_visited (stmt, false);
5040 gimple_set_uid (stmt, uid++);
5042 if (!is_gimple_assign (stmt)
5043 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5044 continue;
5046 /* Look for simple gimple subtract operations. */
5047 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5049 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5050 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5051 continue;
5053 /* Check for a subtract used only in an addition. If this
5054 is the case, transform it into add of a negate for better
5055 reassociation. IE transform C = A-B into C = A + -B if C
5056 is only used in an addition. */
5057 if (should_break_up_subtract (stmt))
5058 break_up_subtract (stmt, &gsi);
5060 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5061 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5062 plus_negates.safe_push (gimple_assign_lhs (stmt));
5064 for (son = first_dom_son (CDI_DOMINATORS, bb);
5065 son;
5066 son = next_dom_son (CDI_DOMINATORS, son))
5067 break_up_subtract_bb (son);
5070 /* Used for repeated factor analysis. */
5071 struct repeat_factor
5073 /* An SSA name that occurs in a multiply chain. */
5074 tree factor;
5076 /* Cached rank of the factor. */
5077 unsigned rank;
5079 /* Number of occurrences of the factor in the chain. */
5080 HOST_WIDE_INT count;
5082 /* An SSA name representing the product of this factor and
5083 all factors appearing later in the repeated factor vector. */
5084 tree repr;
5088 static vec<repeat_factor> repeat_factor_vec;
5090 /* Used for sorting the repeat factor vector. Sort primarily by
5091 ascending occurrence count, secondarily by descending rank. */
5093 static int
5094 compare_repeat_factors (const void *x1, const void *x2)
5096 const repeat_factor *rf1 = (const repeat_factor *) x1;
5097 const repeat_factor *rf2 = (const repeat_factor *) x2;
5099 if (rf1->count != rf2->count)
5100 return rf1->count - rf2->count;
5102 return rf2->rank - rf1->rank;
5105 /* Look for repeated operands in OPS in the multiply tree rooted at
5106 STMT. Replace them with an optimal sequence of multiplies and powi
5107 builtin calls, and remove the used operands from OPS. Return an
5108 SSA name representing the value of the replacement sequence. */
5110 static tree
5111 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5113 unsigned i, j, vec_len;
5114 int ii;
5115 operand_entry *oe;
5116 repeat_factor *rf1, *rf2;
5117 repeat_factor rfnew;
5118 tree result = NULL_TREE;
5119 tree target_ssa, iter_result;
5120 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5121 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5122 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5123 gimple *mul_stmt, *pow_stmt;
5125 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5126 target. */
5127 if (!powi_fndecl)
5128 return NULL_TREE;
5130 /* Allocate the repeated factor vector. */
5131 repeat_factor_vec.create (10);
5133 /* Scan the OPS vector for all SSA names in the product and build
5134 up a vector of occurrence counts for each factor. */
5135 FOR_EACH_VEC_ELT (*ops, i, oe)
5137 if (TREE_CODE (oe->op) == SSA_NAME)
5139 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5141 if (rf1->factor == oe->op)
5143 rf1->count += oe->count;
5144 break;
5148 if (j >= repeat_factor_vec.length ())
5150 rfnew.factor = oe->op;
5151 rfnew.rank = oe->rank;
5152 rfnew.count = oe->count;
5153 rfnew.repr = NULL_TREE;
5154 repeat_factor_vec.safe_push (rfnew);
5159 /* Sort the repeated factor vector by (a) increasing occurrence count,
5160 and (b) decreasing rank. */
5161 repeat_factor_vec.qsort (compare_repeat_factors);
5163 /* It is generally best to combine as many base factors as possible
5164 into a product before applying __builtin_powi to the result.
5165 However, the sort order chosen for the repeated factor vector
5166 allows us to cache partial results for the product of the base
5167 factors for subsequent use. When we already have a cached partial
5168 result from a previous iteration, it is best to make use of it
5169 before looking for another __builtin_pow opportunity.
5171 As an example, consider x * x * y * y * y * z * z * z * z.
5172 We want to first compose the product x * y * z, raise it to the
5173 second power, then multiply this by y * z, and finally multiply
5174 by z. This can be done in 5 multiplies provided we cache y * z
5175 for use in both expressions:
5177 t1 = y * z
5178 t2 = t1 * x
5179 t3 = t2 * t2
5180 t4 = t1 * t3
5181 result = t4 * z
5183 If we instead ignored the cached y * z and first multiplied by
5184 the __builtin_pow opportunity z * z, we would get the inferior:
5186 t1 = y * z
5187 t2 = t1 * x
5188 t3 = t2 * t2
5189 t4 = z * z
5190 t5 = t3 * t4
5191 result = t5 * y */
5193 vec_len = repeat_factor_vec.length ();
5195 /* Repeatedly look for opportunities to create a builtin_powi call. */
5196 while (true)
5198 HOST_WIDE_INT power;
5200 /* First look for the largest cached product of factors from
5201 preceding iterations. If found, create a builtin_powi for
5202 it if the minimum occurrence count for its factors is at
5203 least 2, or just use this cached product as our next
5204 multiplicand if the minimum occurrence count is 1. */
5205 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5207 if (rf1->repr && rf1->count > 0)
5208 break;
5211 if (j < vec_len)
5213 power = rf1->count;
5215 if (power == 1)
5217 iter_result = rf1->repr;
5219 if (dump_file && (dump_flags & TDF_DETAILS))
5221 unsigned elt;
5222 repeat_factor *rf;
5223 fputs ("Multiplying by cached product ", dump_file);
5224 for (elt = j; elt < vec_len; elt++)
5226 rf = &repeat_factor_vec[elt];
5227 print_generic_expr (dump_file, rf->factor);
5228 if (elt < vec_len - 1)
5229 fputs (" * ", dump_file);
5231 fputs ("\n", dump_file);
5234 else
5236 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5237 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5238 build_int_cst (integer_type_node,
5239 power));
5240 gimple_call_set_lhs (pow_stmt, iter_result);
5241 gimple_set_location (pow_stmt, gimple_location (stmt));
5242 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5243 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5245 if (dump_file && (dump_flags & TDF_DETAILS))
5247 unsigned elt;
5248 repeat_factor *rf;
5249 fputs ("Building __builtin_pow call for cached product (",
5250 dump_file);
5251 for (elt = j; elt < vec_len; elt++)
5253 rf = &repeat_factor_vec[elt];
5254 print_generic_expr (dump_file, rf->factor);
5255 if (elt < vec_len - 1)
5256 fputs (" * ", dump_file);
5258 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5259 power);
5263 else
5265 /* Otherwise, find the first factor in the repeated factor
5266 vector whose occurrence count is at least 2. If no such
5267 factor exists, there are no builtin_powi opportunities
5268 remaining. */
5269 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5271 if (rf1->count >= 2)
5272 break;
5275 if (j >= vec_len)
5276 break;
5278 power = rf1->count;
5280 if (dump_file && (dump_flags & TDF_DETAILS))
5282 unsigned elt;
5283 repeat_factor *rf;
5284 fputs ("Building __builtin_pow call for (", dump_file);
5285 for (elt = j; elt < vec_len; elt++)
5287 rf = &repeat_factor_vec[elt];
5288 print_generic_expr (dump_file, rf->factor);
5289 if (elt < vec_len - 1)
5290 fputs (" * ", dump_file);
5292 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5295 reassociate_stats.pows_created++;
5297 /* Visit each element of the vector in reverse order (so that
5298 high-occurrence elements are visited first, and within the
5299 same occurrence count, lower-ranked elements are visited
5300 first). Form a linear product of all elements in this order
5301 whose occurrencce count is at least that of element J.
5302 Record the SSA name representing the product of each element
5303 with all subsequent elements in the vector. */
5304 if (j == vec_len - 1)
5305 rf1->repr = rf1->factor;
5306 else
5308 for (ii = vec_len - 2; ii >= (int)j; ii--)
5310 tree op1, op2;
5312 rf1 = &repeat_factor_vec[ii];
5313 rf2 = &repeat_factor_vec[ii + 1];
5315 /* Init the last factor's representative to be itself. */
5316 if (!rf2->repr)
5317 rf2->repr = rf2->factor;
5319 op1 = rf1->factor;
5320 op2 = rf2->repr;
5322 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5323 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5324 op1, op2);
5325 gimple_set_location (mul_stmt, gimple_location (stmt));
5326 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5327 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5328 rf1->repr = target_ssa;
5330 /* Don't reprocess the multiply we just introduced. */
5331 gimple_set_visited (mul_stmt, true);
5335 /* Form a call to __builtin_powi for the maximum product
5336 just formed, raised to the power obtained earlier. */
5337 rf1 = &repeat_factor_vec[j];
5338 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5339 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5340 build_int_cst (integer_type_node,
5341 power));
5342 gimple_call_set_lhs (pow_stmt, iter_result);
5343 gimple_set_location (pow_stmt, gimple_location (stmt));
5344 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5345 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5348 /* If we previously formed at least one other builtin_powi call,
5349 form the product of this one and those others. */
5350 if (result)
5352 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5353 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5354 result, iter_result);
5355 gimple_set_location (mul_stmt, gimple_location (stmt));
5356 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5357 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5358 gimple_set_visited (mul_stmt, true);
5359 result = new_result;
5361 else
5362 result = iter_result;
5364 /* Decrement the occurrence count of each element in the product
5365 by the count found above, and remove this many copies of each
5366 factor from OPS. */
5367 for (i = j; i < vec_len; i++)
5369 unsigned k = power;
5370 unsigned n;
5372 rf1 = &repeat_factor_vec[i];
5373 rf1->count -= power;
5375 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5377 if (oe->op == rf1->factor)
5379 if (oe->count <= k)
5381 ops->ordered_remove (n);
5382 k -= oe->count;
5384 if (k == 0)
5385 break;
5387 else
5389 oe->count -= k;
5390 break;
5397 /* At this point all elements in the repeated factor vector have a
5398 remaining occurrence count of 0 or 1, and those with a count of 1
5399 don't have cached representatives. Re-sort the ops vector and
5400 clean up. */
5401 ops->qsort (sort_by_operand_rank);
5402 repeat_factor_vec.release ();
5404 /* Return the final product computed herein. Note that there may
5405 still be some elements with single occurrence count left in OPS;
5406 those will be handled by the normal reassociation logic. */
5407 return result;
5410 /* Attempt to optimize
5411 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5412 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5414 static void
5415 attempt_builtin_copysign (vec<operand_entry *> *ops)
5417 operand_entry *oe;
5418 unsigned int i;
5419 unsigned int length = ops->length ();
5420 tree cst = ops->last ()->op;
5422 if (length == 1 || TREE_CODE (cst) != REAL_CST)
5423 return;
5425 FOR_EACH_VEC_ELT (*ops, i, oe)
5427 if (TREE_CODE (oe->op) == SSA_NAME
5428 && has_single_use (oe->op))
5430 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5431 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5433 tree arg0, arg1;
5434 switch (gimple_call_combined_fn (old_call))
5436 CASE_CFN_COPYSIGN:
5437 arg0 = gimple_call_arg (old_call, 0);
5438 arg1 = gimple_call_arg (old_call, 1);
5439 /* The first argument of copysign must be a constant,
5440 otherwise there's nothing to do. */
5441 if (TREE_CODE (arg0) == REAL_CST)
5443 tree type = TREE_TYPE (arg0);
5444 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5445 /* If we couldn't fold to a single constant, skip it.
5446 That happens e.g. for inexact multiplication when
5447 -frounding-math. */
5448 if (mul == NULL_TREE)
5449 break;
5450 /* Instead of adjusting OLD_CALL, let's build a new
5451 call to not leak the LHS and prevent keeping bogus
5452 debug statements. DCE will clean up the old call. */
5453 gcall *new_call;
5454 if (gimple_call_internal_p (old_call))
5455 new_call = gimple_build_call_internal
5456 (IFN_COPYSIGN, 2, mul, arg1);
5457 else
5458 new_call = gimple_build_call
5459 (gimple_call_fndecl (old_call), 2, mul, arg1);
5460 tree lhs = make_ssa_name (type);
5461 gimple_call_set_lhs (new_call, lhs);
5462 gimple_set_location (new_call,
5463 gimple_location (old_call));
5464 insert_stmt_after (new_call, old_call);
5465 /* We've used the constant, get rid of it. */
5466 ops->pop ();
5467 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5468 /* Handle the CST1 < 0 case by negating the result. */
5469 if (cst1_neg)
5471 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5472 gimple *negate_stmt
5473 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5474 insert_stmt_after (negate_stmt, new_call);
5475 oe->op = negrhs;
5477 else
5478 oe->op = lhs;
5479 if (dump_file && (dump_flags & TDF_DETAILS))
5481 fprintf (dump_file, "Optimizing copysign: ");
5482 print_generic_expr (dump_file, cst);
5483 fprintf (dump_file, " * COPYSIGN (");
5484 print_generic_expr (dump_file, arg0);
5485 fprintf (dump_file, ", ");
5486 print_generic_expr (dump_file, arg1);
5487 fprintf (dump_file, ") into %sCOPYSIGN (",
5488 cst1_neg ? "-" : "");
5489 print_generic_expr (dump_file, mul);
5490 fprintf (dump_file, ", ");
5491 print_generic_expr (dump_file, arg1);
5492 fprintf (dump_file, "\n");
5494 return;
5496 break;
5497 default:
5498 break;
5505 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5507 static void
5508 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5510 tree rhs1;
5512 if (dump_file && (dump_flags & TDF_DETAILS))
5514 fprintf (dump_file, "Transforming ");
5515 print_gimple_stmt (dump_file, stmt, 0);
5518 rhs1 = gimple_assign_rhs1 (stmt);
5519 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5520 update_stmt (stmt);
5521 remove_visited_stmt_chain (rhs1);
5523 if (dump_file && (dump_flags & TDF_DETAILS))
5525 fprintf (dump_file, " into ");
5526 print_gimple_stmt (dump_file, stmt, 0);
5530 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5532 static void
5533 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5534 tree rhs1, tree rhs2)
5536 if (dump_file && (dump_flags & TDF_DETAILS))
5538 fprintf (dump_file, "Transforming ");
5539 print_gimple_stmt (dump_file, stmt, 0);
5542 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5543 update_stmt (gsi_stmt (*gsi));
5544 remove_visited_stmt_chain (rhs1);
5546 if (dump_file && (dump_flags & TDF_DETAILS))
5548 fprintf (dump_file, " into ");
5549 print_gimple_stmt (dump_file, stmt, 0);
5553 /* Reassociate expressions in basic block BB and its post-dominator as
5554 children.
5556 Bubble up return status from maybe_optimize_range_tests. */
5558 static bool
5559 reassociate_bb (basic_block bb)
5561 gimple_stmt_iterator gsi;
5562 basic_block son;
5563 gimple *stmt = last_stmt (bb);
5564 bool cfg_cleanup_needed = false;
5566 if (stmt && !gimple_visited_p (stmt))
5567 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5569 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5571 stmt = gsi_stmt (gsi);
5573 if (is_gimple_assign (stmt)
5574 && !stmt_could_throw_p (stmt))
5576 tree lhs, rhs1, rhs2;
5577 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5579 /* If this is not a gimple binary expression, there is
5580 nothing for us to do with it. */
5581 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5582 continue;
5584 /* If this was part of an already processed statement,
5585 we don't need to touch it again. */
5586 if (gimple_visited_p (stmt))
5588 /* This statement might have become dead because of previous
5589 reassociations. */
5590 if (has_zero_uses (gimple_get_lhs (stmt)))
5592 reassoc_remove_stmt (&gsi);
5593 release_defs (stmt);
5594 /* We might end up removing the last stmt above which
5595 places the iterator to the end of the sequence.
5596 Reset it to the last stmt in this case which might
5597 be the end of the sequence as well if we removed
5598 the last statement of the sequence. In which case
5599 we need to bail out. */
5600 if (gsi_end_p (gsi))
5602 gsi = gsi_last_bb (bb);
5603 if (gsi_end_p (gsi))
5604 break;
5607 continue;
5610 lhs = gimple_assign_lhs (stmt);
5611 rhs1 = gimple_assign_rhs1 (stmt);
5612 rhs2 = gimple_assign_rhs2 (stmt);
5614 /* For non-bit or min/max operations we can't associate
5615 all types. Verify that here. */
5616 if (rhs_code != BIT_IOR_EXPR
5617 && rhs_code != BIT_AND_EXPR
5618 && rhs_code != BIT_XOR_EXPR
5619 && rhs_code != MIN_EXPR
5620 && rhs_code != MAX_EXPR
5621 && (!can_reassociate_p (lhs)
5622 || !can_reassociate_p (rhs1)
5623 || !can_reassociate_p (rhs2)))
5624 continue;
5626 if (associative_tree_code (rhs_code))
5628 auto_vec<operand_entry *> ops;
5629 tree powi_result = NULL_TREE;
5630 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5632 /* There may be no immediate uses left by the time we
5633 get here because we may have eliminated them all. */
5634 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5635 continue;
5637 gimple_set_visited (stmt, true);
5638 linearize_expr_tree (&ops, stmt, true, true);
5639 ops.qsort (sort_by_operand_rank);
5640 optimize_ops_list (rhs_code, &ops);
5641 if (undistribute_ops_list (rhs_code, &ops,
5642 loop_containing_stmt (stmt)))
5644 ops.qsort (sort_by_operand_rank);
5645 optimize_ops_list (rhs_code, &ops);
5648 if (rhs_code == PLUS_EXPR
5649 && transform_add_to_multiply (&ops))
5650 ops.qsort (sort_by_operand_rank);
5652 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5654 if (is_vector)
5655 optimize_vec_cond_expr (rhs_code, &ops);
5656 else
5657 optimize_range_tests (rhs_code, &ops);
5660 if (rhs_code == MULT_EXPR && !is_vector)
5662 attempt_builtin_copysign (&ops);
5664 if (reassoc_insert_powi_p
5665 && flag_unsafe_math_optimizations)
5666 powi_result = attempt_builtin_powi (stmt, &ops);
5669 operand_entry *last;
5670 bool negate_result = false;
5671 if (ops.length () > 1
5672 && rhs_code == MULT_EXPR)
5674 last = ops.last ();
5675 if ((integer_minus_onep (last->op)
5676 || real_minus_onep (last->op))
5677 && !HONOR_SNANS (TREE_TYPE (lhs))
5678 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5679 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5681 ops.pop ();
5682 negate_result = true;
5686 tree new_lhs = lhs;
5687 /* If the operand vector is now empty, all operands were
5688 consumed by the __builtin_powi optimization. */
5689 if (ops.length () == 0)
5690 transform_stmt_to_copy (&gsi, stmt, powi_result);
5691 else if (ops.length () == 1)
5693 tree last_op = ops.last ()->op;
5695 /* If the stmt that defines operand has to be inserted, insert it
5696 before the use. */
5697 if (ops.last ()->stmt_to_insert)
5698 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5699 if (powi_result)
5700 transform_stmt_to_multiply (&gsi, stmt, last_op,
5701 powi_result);
5702 else
5703 transform_stmt_to_copy (&gsi, stmt, last_op);
5705 else
5707 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5708 int ops_num = ops.length ();
5709 int width = get_reassociation_width (ops_num, rhs_code, mode);
5711 if (dump_file && (dump_flags & TDF_DETAILS))
5712 fprintf (dump_file,
5713 "Width = %d was chosen for reassociation\n", width);
5715 if (width > 1
5716 && ops.length () > 3)
5717 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5718 width, ops);
5719 else
5721 /* When there are three operands left, we want
5722 to make sure the ones that get the double
5723 binary op are chosen wisely. */
5724 int len = ops.length ();
5725 if (len >= 3)
5726 swap_ops_for_binary_stmt (ops, len - 3, stmt);
5728 new_lhs = rewrite_expr_tree (stmt, 0, ops,
5729 powi_result != NULL
5730 || negate_result);
5733 /* If we combined some repeated factors into a
5734 __builtin_powi call, multiply that result by the
5735 reassociated operands. */
5736 if (powi_result)
5738 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5739 tree type = TREE_TYPE (lhs);
5740 tree target_ssa = make_temp_ssa_name (type, NULL,
5741 "reassocpow");
5742 gimple_set_lhs (lhs_stmt, target_ssa);
5743 update_stmt (lhs_stmt);
5744 if (lhs != new_lhs)
5746 target_ssa = new_lhs;
5747 new_lhs = lhs;
5749 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5750 powi_result, target_ssa);
5751 gimple_set_location (mul_stmt, gimple_location (stmt));
5752 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5753 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5757 if (negate_result)
5759 stmt = SSA_NAME_DEF_STMT (lhs);
5760 tree tmp = make_ssa_name (TREE_TYPE (lhs));
5761 gimple_set_lhs (stmt, tmp);
5762 if (lhs != new_lhs)
5763 tmp = new_lhs;
5764 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
5765 tmp);
5766 gimple_set_uid (neg_stmt, gimple_uid (stmt));
5767 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
5768 update_stmt (stmt);
5773 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5774 son;
5775 son = next_dom_son (CDI_POST_DOMINATORS, son))
5776 cfg_cleanup_needed |= reassociate_bb (son);
5778 return cfg_cleanup_needed;
5781 /* Add jumps around shifts for range tests turned into bit tests.
5782 For each SSA_NAME VAR we have code like:
5783 VAR = ...; // final stmt of range comparison
5784 // bit test here...;
5785 OTHERVAR = ...; // final stmt of the bit test sequence
5786 RES = VAR | OTHERVAR;
5787 Turn the above into:
5788 VAR = ...;
5789 if (VAR != 0)
5790 goto <l3>;
5791 else
5792 goto <l2>;
5793 <l2>:
5794 // bit test here...;
5795 OTHERVAR = ...;
5796 <l3>:
5797 # RES = PHI<1(l1), OTHERVAR(l2)>; */
5799 static void
5800 branch_fixup (void)
5802 tree var;
5803 unsigned int i;
5805 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
5807 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
5808 gimple *use_stmt;
5809 use_operand_p use;
5810 bool ok = single_imm_use (var, &use, &use_stmt);
5811 gcc_assert (ok
5812 && is_gimple_assign (use_stmt)
5813 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5814 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5816 basic_block cond_bb = gimple_bb (def_stmt);
5817 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5818 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5820 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5821 gimple *g = gimple_build_cond (NE_EXPR, var,
5822 build_zero_cst (TREE_TYPE (var)),
5823 NULL_TREE, NULL_TREE);
5824 location_t loc = gimple_location (use_stmt);
5825 gimple_set_location (g, loc);
5826 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5828 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5829 etrue->probability = REG_BR_PROB_BASE / 2;
5830 etrue->count = cond_bb->count.apply_scale (1, 2);
5831 edge efalse = find_edge (cond_bb, then_bb);
5832 efalse->flags = EDGE_FALSE_VALUE;
5833 efalse->probability -= etrue->probability;
5834 efalse->count -= etrue->count;
5835 then_bb->count -= etrue->count;
5837 tree othervar = NULL_TREE;
5838 if (gimple_assign_rhs1 (use_stmt) == var)
5839 othervar = gimple_assign_rhs2 (use_stmt);
5840 else if (gimple_assign_rhs2 (use_stmt) == var)
5841 othervar = gimple_assign_rhs1 (use_stmt);
5842 else
5843 gcc_unreachable ();
5844 tree lhs = gimple_assign_lhs (use_stmt);
5845 gphi *phi = create_phi_node (lhs, merge_bb);
5846 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5847 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5848 gsi = gsi_for_stmt (use_stmt);
5849 gsi_remove (&gsi, true);
5851 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5852 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5854 reassoc_branch_fixups.release ();
5857 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5858 void debug_ops_vector (vec<operand_entry *> ops);
5860 /* Dump the operand entry vector OPS to FILE. */
5862 void
5863 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5865 operand_entry *oe;
5866 unsigned int i;
5868 FOR_EACH_VEC_ELT (ops, i, oe)
5870 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5871 print_generic_expr (file, oe->op);
5872 fprintf (file, "\n");
5876 /* Dump the operand entry vector OPS to STDERR. */
5878 DEBUG_FUNCTION void
5879 debug_ops_vector (vec<operand_entry *> ops)
5881 dump_ops_vector (stderr, ops);
5884 /* Bubble up return status from reassociate_bb. */
5886 static bool
5887 do_reassoc (void)
5889 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5890 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5893 /* Initialize the reassociation pass. */
5895 static void
5896 init_reassoc (void)
5898 int i;
5899 long rank = 2;
5900 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5902 /* Find the loops, so that we can prevent moving calculations in
5903 them. */
5904 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5906 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5908 next_operand_entry_id = 0;
5910 /* Reverse RPO (Reverse Post Order) will give us something where
5911 deeper loops come later. */
5912 pre_and_rev_post_order_compute (NULL, bbs, false);
5913 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5914 operand_rank = new hash_map<tree, long>;
5916 /* Give each default definition a distinct rank. This includes
5917 parameters and the static chain. Walk backwards over all
5918 SSA names so that we get proper rank ordering according
5919 to tree_swap_operands_p. */
5920 for (i = num_ssa_names - 1; i > 0; --i)
5922 tree name = ssa_name (i);
5923 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5924 insert_operand_rank (name, ++rank);
5927 /* Set up rank for each BB */
5928 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5929 bb_rank[bbs[i]] = ++rank << 16;
5931 free (bbs);
5932 calculate_dominance_info (CDI_POST_DOMINATORS);
5933 plus_negates = vNULL;
5936 /* Cleanup after the reassociation pass, and print stats if
5937 requested. */
5939 static void
5940 fini_reassoc (void)
5942 statistics_counter_event (cfun, "Linearized",
5943 reassociate_stats.linearized);
5944 statistics_counter_event (cfun, "Constants eliminated",
5945 reassociate_stats.constants_eliminated);
5946 statistics_counter_event (cfun, "Ops eliminated",
5947 reassociate_stats.ops_eliminated);
5948 statistics_counter_event (cfun, "Statements rewritten",
5949 reassociate_stats.rewritten);
5950 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5951 reassociate_stats.pows_encountered);
5952 statistics_counter_event (cfun, "Built-in powi calls created",
5953 reassociate_stats.pows_created);
5955 delete operand_rank;
5956 operand_entry_pool.release ();
5957 free (bb_rank);
5958 plus_negates.release ();
5959 free_dominance_info (CDI_POST_DOMINATORS);
5960 loop_optimizer_finalize ();
5963 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
5964 insertion of __builtin_powi calls.
5966 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5967 optimization of a gimple conditional. Otherwise returns zero. */
5969 static unsigned int
5970 execute_reassoc (bool insert_powi_p)
5972 reassoc_insert_powi_p = insert_powi_p;
5974 init_reassoc ();
5976 bool cfg_cleanup_needed;
5977 cfg_cleanup_needed = do_reassoc ();
5978 repropagate_negates ();
5979 branch_fixup ();
5981 fini_reassoc ();
5982 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
5985 namespace {
5987 const pass_data pass_data_reassoc =
5989 GIMPLE_PASS, /* type */
5990 "reassoc", /* name */
5991 OPTGROUP_NONE, /* optinfo_flags */
5992 TV_TREE_REASSOC, /* tv_id */
5993 ( PROP_cfg | PROP_ssa ), /* properties_required */
5994 0, /* properties_provided */
5995 0, /* properties_destroyed */
5996 0, /* todo_flags_start */
5997 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6000 class pass_reassoc : public gimple_opt_pass
6002 public:
6003 pass_reassoc (gcc::context *ctxt)
6004 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6007 /* opt_pass methods: */
6008 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
6009 void set_pass_param (unsigned int n, bool param)
6011 gcc_assert (n == 0);
6012 insert_powi_p = param;
6014 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
6015 virtual unsigned int execute (function *)
6016 { return execute_reassoc (insert_powi_p); }
6018 private:
6019 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6020 point 3a in the pass header comment. */
6021 bool insert_powi_p;
6022 }; // class pass_reassoc
6024 } // anon namespace
6026 gimple_opt_pass *
6027 make_pass_reassoc (gcc::context *ctxt)
6029 return new pass_reassoc (ctxt);