2017-09-26 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobb2d0f57e644927c98085751ac9791ee66e8630d5
1 /* Reassociation for trees.
2 Copyright (C) 2005-2017 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "params.h"
52 #include "builtins.h"
53 #include "gimplify.h"
54 #include "case-cfn-macros.h"
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
60 It consists of five steps:
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_*
70 3. Optimization of the operand lists, eliminating things like a +
71 -a, a & a, etc.
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
80 5. Repropagate negates, as nothing else will clean it up ATM.
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
101 a (1), b (1), c (1), d (2), e (2)
104 We start with our merge worklist empty, and the ops list with all of
105 those on it.
107 You want to first merge all leaves of the same rank, as much as
108 possible.
110 So first build a binary op of
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
122 Then build a binary op of d + e
123 mergetmp2 = d + e
125 and put mergetmp2 on the merge worklist.
127 so merge worklist = {mergetmp, c, mergetmp2}
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
132 So we have
134 build binary op
135 mergetmp3 = mergetmp + c
137 worklist = {mergetmp2, mergetmp3}
139 mergetmp4 = mergetmp2 + mergetmp3
141 worklist = {mergetmp4}
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
152 So why don't we do this?
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
160 mergetmp = op1 + op2
161 newstmt = mergetmp + op3
163 instead of
164 mergetmp = op2 + op3
165 newstmt = mergetmp + op1
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
169 can do.
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179 static bool reassoc_insert_powi_p;
181 /* Statistics */
182 static struct
184 int linearized;
185 int constants_eliminated;
186 int ops_eliminated;
187 int rewritten;
188 int pows_encountered;
189 int pows_created;
190 } reassociate_stats;
192 /* Operator, rank pair. */
193 struct operand_entry
195 unsigned int rank;
196 unsigned int id;
197 tree op;
198 unsigned int count;
199 gimple *stmt_to_insert;
202 static object_allocator<operand_entry> operand_entry_pool
203 ("operand entry pool");
205 /* This is used to assign a unique ID to each struct operand_entry
206 so that qsort results are identical on different hosts. */
207 static unsigned int next_operand_entry_id;
209 /* Starting rank number for a given basic block, so that we can rank
210 operations using unmovable instructions in that BB based on the bb
211 depth. */
212 static long *bb_rank;
214 /* Operand->rank hashtable. */
215 static hash_map<tree, long> *operand_rank;
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218 all basic blocks the CFG should be adjusted - basic blocks
219 split right after that SSA_NAME's definition statement and before
220 the only use, which must be a bit ior. */
221 static vec<tree> reassoc_branch_fixups;
223 /* Forward decls. */
224 static long get_rank (tree);
225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228 possibly added by gsi_remove. */
230 bool
231 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
233 gimple *stmt = gsi_stmt (*gsi);
235 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
236 return gsi_remove (gsi, true);
238 gimple_stmt_iterator prev = *gsi;
239 gsi_prev (&prev);
240 unsigned uid = gimple_uid (stmt);
241 basic_block bb = gimple_bb (stmt);
242 bool ret = gsi_remove (gsi, true);
243 if (!gsi_end_p (prev))
244 gsi_next (&prev);
245 else
246 prev = gsi_start_bb (bb);
247 gimple *end_stmt = gsi_stmt (*gsi);
248 while ((stmt = gsi_stmt (prev)) != end_stmt)
250 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
251 gimple_set_uid (stmt, uid);
252 gsi_next (&prev);
254 return ret;
257 /* Bias amount for loop-carried phis. We want this to be larger than
258 the depth of any reassociation tree we can see, but not larger than
259 the rank difference between two blocks. */
260 #define PHI_LOOP_BIAS (1 << 15)
262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
263 an innermost loop, and the phi has only a single use which is inside
264 the loop, then the rank is the block rank of the loop latch plus an
265 extra bias for the loop-carried dependence. This causes expressions
266 calculated into an accumulator variable to be independent for each
267 iteration of the loop. If STMT is some other phi, the rank is the
268 block rank of its containing block. */
269 static long
270 phi_rank (gimple *stmt)
272 basic_block bb = gimple_bb (stmt);
273 struct loop *father = bb->loop_father;
274 tree res;
275 unsigned i;
276 use_operand_p use;
277 gimple *use_stmt;
279 /* We only care about real loops (those with a latch). */
280 if (!father->latch)
281 return bb_rank[bb->index];
283 /* Interesting phis must be in headers of innermost loops. */
284 if (bb != father->header
285 || father->inner)
286 return bb_rank[bb->index];
288 /* Ignore virtual SSA_NAMEs. */
289 res = gimple_phi_result (stmt);
290 if (virtual_operand_p (res))
291 return bb_rank[bb->index];
293 /* The phi definition must have a single use, and that use must be
294 within the loop. Otherwise this isn't an accumulator pattern. */
295 if (!single_imm_use (res, &use, &use_stmt)
296 || gimple_bb (use_stmt)->loop_father != father)
297 return bb_rank[bb->index];
299 /* Look for phi arguments from within the loop. If found, bias this phi. */
300 for (i = 0; i < gimple_phi_num_args (stmt); i++)
302 tree arg = gimple_phi_arg_def (stmt, i);
303 if (TREE_CODE (arg) == SSA_NAME
304 && !SSA_NAME_IS_DEFAULT_DEF (arg))
306 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
307 if (gimple_bb (def_stmt)->loop_father == father)
308 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
312 /* Must be an uninteresting phi. */
313 return bb_rank[bb->index];
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317 loop-carried dependence of an innermost loop, return TRUE; else
318 return FALSE. */
319 static bool
320 loop_carried_phi (tree exp)
322 gimple *phi_stmt;
323 long block_rank;
325 if (TREE_CODE (exp) != SSA_NAME
326 || SSA_NAME_IS_DEFAULT_DEF (exp))
327 return false;
329 phi_stmt = SSA_NAME_DEF_STMT (exp);
331 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
332 return false;
334 /* Non-loop-carried phis have block rank. Loop-carried phis have
335 an additional bias added in. If this phi doesn't have block rank,
336 it's biased and should not be propagated. */
337 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
339 if (phi_rank (phi_stmt) != block_rank)
340 return true;
342 return false;
345 /* Return the maximum of RANK and the rank that should be propagated
346 from expression OP. For most operands, this is just the rank of OP.
347 For loop-carried phis, the value is zero to avoid undoing the bias
348 in favor of the phi. */
349 static long
350 propagate_rank (long rank, tree op)
352 long op_rank;
354 if (loop_carried_phi (op))
355 return rank;
357 op_rank = get_rank (op);
359 return MAX (rank, op_rank);
362 /* Look up the operand rank structure for expression E. */
364 static inline long
365 find_operand_rank (tree e)
367 long *slot = operand_rank->get (e);
368 return slot ? *slot : -1;
371 /* Insert {E,RANK} into the operand rank hashtable. */
373 static inline void
374 insert_operand_rank (tree e, long rank)
376 gcc_assert (rank > 0);
377 gcc_assert (!operand_rank->put (e, rank));
380 /* Given an expression E, return the rank of the expression. */
382 static long
383 get_rank (tree e)
385 /* SSA_NAME's have the rank of the expression they are the result
387 For globals and uninitialized values, the rank is 0.
388 For function arguments, use the pre-setup rank.
389 For PHI nodes, stores, asm statements, etc, we use the rank of
390 the BB.
391 For simple operations, the rank is the maximum rank of any of
392 its operands, or the bb_rank, whichever is less.
393 I make no claims that this is optimal, however, it gives good
394 results. */
396 /* We make an exception to the normal ranking system to break
397 dependences of accumulator variables in loops. Suppose we
398 have a simple one-block loop containing:
400 x_1 = phi(x_0, x_2)
401 b = a + x_1
402 c = b + d
403 x_2 = c + e
405 As shown, each iteration of the calculation into x is fully
406 dependent upon the iteration before it. We would prefer to
407 see this in the form:
409 x_1 = phi(x_0, x_2)
410 b = a + d
411 c = b + e
412 x_2 = c + x_1
414 If the loop is unrolled, the calculations of b and c from
415 different iterations can be interleaved.
417 To obtain this result during reassociation, we bias the rank
418 of the phi definition x_1 upward, when it is recognized as an
419 accumulator pattern. The artificial rank causes it to be
420 added last, providing the desired independence. */
422 if (TREE_CODE (e) == SSA_NAME)
424 ssa_op_iter iter;
425 gimple *stmt;
426 long rank;
427 tree op;
429 if (SSA_NAME_IS_DEFAULT_DEF (e))
430 return find_operand_rank (e);
432 stmt = SSA_NAME_DEF_STMT (e);
433 if (gimple_code (stmt) == GIMPLE_PHI)
434 return phi_rank (stmt);
436 if (!is_gimple_assign (stmt))
437 return bb_rank[gimple_bb (stmt)->index];
439 /* If we already have a rank for this expression, use that. */
440 rank = find_operand_rank (e);
441 if (rank != -1)
442 return rank;
444 /* Otherwise, find the maximum rank for the operands. As an
445 exception, remove the bias from loop-carried phis when propagating
446 the rank so that dependent operations are not also biased. */
447 /* Simply walk over all SSA uses - this takes advatage of the
448 fact that non-SSA operands are is_gimple_min_invariant and
449 thus have rank 0. */
450 rank = 0;
451 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
452 rank = propagate_rank (rank, op);
454 if (dump_file && (dump_flags & TDF_DETAILS))
456 fprintf (dump_file, "Rank for ");
457 print_generic_expr (dump_file, e);
458 fprintf (dump_file, " is %ld\n", (rank + 1));
461 /* Note the rank in the hashtable so we don't recompute it. */
462 insert_operand_rank (e, (rank + 1));
463 return (rank + 1);
466 /* Constants, globals, etc., are rank 0 */
467 return 0;
471 /* We want integer ones to end up last no matter what, since they are
472 the ones we can do the most with. */
473 #define INTEGER_CONST_TYPE 1 << 3
474 #define FLOAT_CONST_TYPE 1 << 2
475 #define OTHER_CONST_TYPE 1 << 1
477 /* Classify an invariant tree into integer, float, or other, so that
478 we can sort them to be near other constants of the same type. */
479 static inline int
480 constant_type (tree t)
482 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
483 return INTEGER_CONST_TYPE;
484 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
485 return FLOAT_CONST_TYPE;
486 else
487 return OTHER_CONST_TYPE;
490 /* qsort comparison function to sort operand entries PA and PB by rank
491 so that the sorted array is ordered by rank in decreasing order. */
492 static int
493 sort_by_operand_rank (const void *pa, const void *pb)
495 const operand_entry *oea = *(const operand_entry *const *)pa;
496 const operand_entry *oeb = *(const operand_entry *const *)pb;
498 /* It's nicer for optimize_expression if constants that are likely
499 to fold when added/multiplied//whatever are put next to each
500 other. Since all constants have rank 0, order them by type. */
501 if (oeb->rank == 0 && oea->rank == 0)
503 if (constant_type (oeb->op) != constant_type (oea->op))
504 return constant_type (oeb->op) - constant_type (oea->op);
505 else
506 /* To make sorting result stable, we use unique IDs to determine
507 order. */
508 return oeb->id > oea->id ? 1 : -1;
511 /* Lastly, make sure the versions that are the same go next to each
512 other. */
513 if (oeb->rank == oea->rank
514 && TREE_CODE (oea->op) == SSA_NAME
515 && TREE_CODE (oeb->op) == SSA_NAME)
517 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
518 versions of removed SSA_NAMEs, so if possible, prefer to sort
519 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
520 See PR60418. */
521 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
522 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
523 && !oea->stmt_to_insert
524 && !oeb->stmt_to_insert
525 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
527 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
528 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
529 basic_block bba = gimple_bb (stmta);
530 basic_block bbb = gimple_bb (stmtb);
531 if (bbb != bba)
533 if (bb_rank[bbb->index] != bb_rank[bba->index])
534 return bb_rank[bbb->index] - bb_rank[bba->index];
536 else
538 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
539 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
540 if (da != db)
541 return da ? 1 : -1;
545 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
546 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
547 else
548 return oeb->id > oea->id ? 1 : -1;
551 if (oeb->rank != oea->rank)
552 return oeb->rank > oea->rank ? 1 : -1;
553 else
554 return oeb->id > oea->id ? 1 : -1;
557 /* Add an operand entry to *OPS for the tree operand OP. */
559 static void
560 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
562 operand_entry *oe = operand_entry_pool.allocate ();
564 oe->op = op;
565 oe->rank = get_rank (op);
566 oe->id = next_operand_entry_id++;
567 oe->count = 1;
568 oe->stmt_to_insert = stmt_to_insert;
569 ops->safe_push (oe);
572 /* Add an operand entry to *OPS for the tree operand OP with repeat
573 count REPEAT. */
575 static void
576 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
577 HOST_WIDE_INT repeat)
579 operand_entry *oe = operand_entry_pool.allocate ();
581 oe->op = op;
582 oe->rank = get_rank (op);
583 oe->id = next_operand_entry_id++;
584 oe->count = repeat;
585 oe->stmt_to_insert = NULL;
586 ops->safe_push (oe);
588 reassociate_stats.pows_encountered++;
591 /* Return true if STMT is reassociable operation containing a binary
592 operation with tree code CODE, and is inside LOOP. */
594 static bool
595 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
597 basic_block bb = gimple_bb (stmt);
599 if (gimple_bb (stmt) == NULL)
600 return false;
602 if (!flow_bb_inside_loop_p (loop, bb))
603 return false;
605 if (is_gimple_assign (stmt)
606 && gimple_assign_rhs_code (stmt) == code
607 && has_single_use (gimple_assign_lhs (stmt)))
609 tree rhs1 = gimple_assign_rhs1 (stmt);
610 tree rhs2 = gimple_assign_rhs1 (stmt);
611 if (TREE_CODE (rhs1) == SSA_NAME
612 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
613 return false;
614 if (rhs2
615 && TREE_CODE (rhs2) == SSA_NAME
616 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
617 return false;
618 return true;
621 return false;
625 /* Return true if STMT is a nop-conversion. */
627 static bool
628 gimple_nop_conversion_p (gimple *stmt)
630 if (gassign *ass = dyn_cast <gassign *> (stmt))
632 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
633 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
634 TREE_TYPE (gimple_assign_rhs1 (ass))))
635 return true;
637 return false;
640 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
641 operand of the negate operation. Otherwise, return NULL. */
643 static tree
644 get_unary_op (tree name, enum tree_code opcode)
646 gimple *stmt = SSA_NAME_DEF_STMT (name);
648 /* Look through nop conversions (sign changes). */
649 if (gimple_nop_conversion_p (stmt)
650 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
651 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
653 if (!is_gimple_assign (stmt))
654 return NULL_TREE;
656 if (gimple_assign_rhs_code (stmt) == opcode)
657 return gimple_assign_rhs1 (stmt);
658 return NULL_TREE;
661 /* Return true if OP1 and OP2 have the same value if casted to either type. */
663 static bool
664 ops_equal_values_p (tree op1, tree op2)
666 if (op1 == op2)
667 return true;
669 tree orig_op1 = op1;
670 if (TREE_CODE (op1) == SSA_NAME)
672 gimple *stmt = SSA_NAME_DEF_STMT (op1);
673 if (gimple_nop_conversion_p (stmt))
675 op1 = gimple_assign_rhs1 (stmt);
676 if (op1 == op2)
677 return true;
681 if (TREE_CODE (op2) == SSA_NAME)
683 gimple *stmt = SSA_NAME_DEF_STMT (op2);
684 if (gimple_nop_conversion_p (stmt))
686 op2 = gimple_assign_rhs1 (stmt);
687 if (op1 == op2
688 || orig_op1 == op2)
689 return true;
693 return false;
697 /* If CURR and LAST are a pair of ops that OPCODE allows us to
698 eliminate through equivalences, do so, remove them from OPS, and
699 return true. Otherwise, return false. */
701 static bool
702 eliminate_duplicate_pair (enum tree_code opcode,
703 vec<operand_entry *> *ops,
704 bool *all_done,
705 unsigned int i,
706 operand_entry *curr,
707 operand_entry *last)
710 /* If we have two of the same op, and the opcode is & |, min, or max,
711 we can eliminate one of them.
712 If we have two of the same op, and the opcode is ^, we can
713 eliminate both of them. */
715 if (last && last->op == curr->op)
717 switch (opcode)
719 case MAX_EXPR:
720 case MIN_EXPR:
721 case BIT_IOR_EXPR:
722 case BIT_AND_EXPR:
723 if (dump_file && (dump_flags & TDF_DETAILS))
725 fprintf (dump_file, "Equivalence: ");
726 print_generic_expr (dump_file, curr->op);
727 fprintf (dump_file, " [&|minmax] ");
728 print_generic_expr (dump_file, last->op);
729 fprintf (dump_file, " -> ");
730 print_generic_stmt (dump_file, last->op);
733 ops->ordered_remove (i);
734 reassociate_stats.ops_eliminated ++;
736 return true;
738 case BIT_XOR_EXPR:
739 if (dump_file && (dump_flags & TDF_DETAILS))
741 fprintf (dump_file, "Equivalence: ");
742 print_generic_expr (dump_file, curr->op);
743 fprintf (dump_file, " ^ ");
744 print_generic_expr (dump_file, last->op);
745 fprintf (dump_file, " -> nothing\n");
748 reassociate_stats.ops_eliminated += 2;
750 if (ops->length () == 2)
752 ops->truncate (0);
753 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
754 *all_done = true;
756 else
758 ops->ordered_remove (i-1);
759 ops->ordered_remove (i-1);
762 return true;
764 default:
765 break;
768 return false;
771 static vec<tree> plus_negates;
773 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
774 expression, look in OPS for a corresponding positive operation to cancel
775 it out. If we find one, remove the other from OPS, replace
776 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
777 return false. */
779 static bool
780 eliminate_plus_minus_pair (enum tree_code opcode,
781 vec<operand_entry *> *ops,
782 unsigned int currindex,
783 operand_entry *curr)
785 tree negateop;
786 tree notop;
787 unsigned int i;
788 operand_entry *oe;
790 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
791 return false;
793 negateop = get_unary_op (curr->op, NEGATE_EXPR);
794 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
795 if (negateop == NULL_TREE && notop == NULL_TREE)
796 return false;
798 /* Any non-negated version will have a rank that is one less than
799 the current rank. So once we hit those ranks, if we don't find
800 one, we can stop. */
802 for (i = currindex + 1;
803 ops->iterate (i, &oe)
804 && oe->rank >= curr->rank - 1 ;
805 i++)
807 if (negateop
808 && ops_equal_values_p (oe->op, negateop))
810 if (dump_file && (dump_flags & TDF_DETAILS))
812 fprintf (dump_file, "Equivalence: ");
813 print_generic_expr (dump_file, negateop);
814 fprintf (dump_file, " + -");
815 print_generic_expr (dump_file, oe->op);
816 fprintf (dump_file, " -> 0\n");
819 ops->ordered_remove (i);
820 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
821 ops->ordered_remove (currindex);
822 reassociate_stats.ops_eliminated ++;
824 return true;
826 else if (notop
827 && ops_equal_values_p (oe->op, notop))
829 tree op_type = TREE_TYPE (oe->op);
831 if (dump_file && (dump_flags & TDF_DETAILS))
833 fprintf (dump_file, "Equivalence: ");
834 print_generic_expr (dump_file, notop);
835 fprintf (dump_file, " + ~");
836 print_generic_expr (dump_file, oe->op);
837 fprintf (dump_file, " -> -1\n");
840 ops->ordered_remove (i);
841 add_to_ops_vec (ops, build_all_ones_cst (op_type));
842 ops->ordered_remove (currindex);
843 reassociate_stats.ops_eliminated ++;
845 return true;
849 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
850 save it for later inspection in repropagate_negates(). */
851 if (negateop != NULL_TREE
852 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
853 plus_negates.safe_push (curr->op);
855 return false;
858 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
859 bitwise not expression, look in OPS for a corresponding operand to
860 cancel it out. If we find one, remove the other from OPS, replace
861 OPS[CURRINDEX] with 0, and return true. Otherwise, return
862 false. */
864 static bool
865 eliminate_not_pairs (enum tree_code opcode,
866 vec<operand_entry *> *ops,
867 unsigned int currindex,
868 operand_entry *curr)
870 tree notop;
871 unsigned int i;
872 operand_entry *oe;
874 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
875 || TREE_CODE (curr->op) != SSA_NAME)
876 return false;
878 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
879 if (notop == NULL_TREE)
880 return false;
882 /* Any non-not version will have a rank that is one less than
883 the current rank. So once we hit those ranks, if we don't find
884 one, we can stop. */
886 for (i = currindex + 1;
887 ops->iterate (i, &oe)
888 && oe->rank >= curr->rank - 1;
889 i++)
891 if (oe->op == notop)
893 if (dump_file && (dump_flags & TDF_DETAILS))
895 fprintf (dump_file, "Equivalence: ");
896 print_generic_expr (dump_file, notop);
897 if (opcode == BIT_AND_EXPR)
898 fprintf (dump_file, " & ~");
899 else if (opcode == BIT_IOR_EXPR)
900 fprintf (dump_file, " | ~");
901 print_generic_expr (dump_file, oe->op);
902 if (opcode == BIT_AND_EXPR)
903 fprintf (dump_file, " -> 0\n");
904 else if (opcode == BIT_IOR_EXPR)
905 fprintf (dump_file, " -> -1\n");
908 if (opcode == BIT_AND_EXPR)
909 oe->op = build_zero_cst (TREE_TYPE (oe->op));
910 else if (opcode == BIT_IOR_EXPR)
911 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
913 reassociate_stats.ops_eliminated += ops->length () - 1;
914 ops->truncate (0);
915 ops->quick_push (oe);
916 return true;
920 return false;
923 /* Use constant value that may be present in OPS to try to eliminate
924 operands. Note that this function is only really used when we've
925 eliminated ops for other reasons, or merged constants. Across
926 single statements, fold already does all of this, plus more. There
927 is little point in duplicating logic, so I've only included the
928 identities that I could ever construct testcases to trigger. */
930 static void
931 eliminate_using_constants (enum tree_code opcode,
932 vec<operand_entry *> *ops)
934 operand_entry *oelast = ops->last ();
935 tree type = TREE_TYPE (oelast->op);
937 if (oelast->rank == 0
938 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
940 switch (opcode)
942 case BIT_AND_EXPR:
943 if (integer_zerop (oelast->op))
945 if (ops->length () != 1)
947 if (dump_file && (dump_flags & TDF_DETAILS))
948 fprintf (dump_file, "Found & 0, removing all other ops\n");
950 reassociate_stats.ops_eliminated += ops->length () - 1;
952 ops->truncate (0);
953 ops->quick_push (oelast);
954 return;
957 else if (integer_all_onesp (oelast->op))
959 if (ops->length () != 1)
961 if (dump_file && (dump_flags & TDF_DETAILS))
962 fprintf (dump_file, "Found & -1, removing\n");
963 ops->pop ();
964 reassociate_stats.ops_eliminated++;
967 break;
968 case BIT_IOR_EXPR:
969 if (integer_all_onesp (oelast->op))
971 if (ops->length () != 1)
973 if (dump_file && (dump_flags & TDF_DETAILS))
974 fprintf (dump_file, "Found | -1, removing all other ops\n");
976 reassociate_stats.ops_eliminated += ops->length () - 1;
978 ops->truncate (0);
979 ops->quick_push (oelast);
980 return;
983 else if (integer_zerop (oelast->op))
985 if (ops->length () != 1)
987 if (dump_file && (dump_flags & TDF_DETAILS))
988 fprintf (dump_file, "Found | 0, removing\n");
989 ops->pop ();
990 reassociate_stats.ops_eliminated++;
993 break;
994 case MULT_EXPR:
995 if (integer_zerop (oelast->op)
996 || (FLOAT_TYPE_P (type)
997 && !HONOR_NANS (type)
998 && !HONOR_SIGNED_ZEROS (type)
999 && real_zerop (oelast->op)))
1001 if (ops->length () != 1)
1003 if (dump_file && (dump_flags & TDF_DETAILS))
1004 fprintf (dump_file, "Found * 0, removing all other ops\n");
1006 reassociate_stats.ops_eliminated += ops->length () - 1;
1007 ops->truncate (1);
1008 ops->quick_push (oelast);
1009 return;
1012 else if (integer_onep (oelast->op)
1013 || (FLOAT_TYPE_P (type)
1014 && !HONOR_SNANS (type)
1015 && real_onep (oelast->op)))
1017 if (ops->length () != 1)
1019 if (dump_file && (dump_flags & TDF_DETAILS))
1020 fprintf (dump_file, "Found * 1, removing\n");
1021 ops->pop ();
1022 reassociate_stats.ops_eliminated++;
1023 return;
1026 break;
1027 case BIT_XOR_EXPR:
1028 case PLUS_EXPR:
1029 case MINUS_EXPR:
1030 if (integer_zerop (oelast->op)
1031 || (FLOAT_TYPE_P (type)
1032 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1033 && fold_real_zero_addition_p (type, oelast->op,
1034 opcode == MINUS_EXPR)))
1036 if (ops->length () != 1)
1038 if (dump_file && (dump_flags & TDF_DETAILS))
1039 fprintf (dump_file, "Found [|^+] 0, removing\n");
1040 ops->pop ();
1041 reassociate_stats.ops_eliminated++;
1042 return;
1045 break;
1046 default:
1047 break;
1053 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1054 bool, bool);
1056 /* Structure for tracking and counting operands. */
1057 struct oecount {
1058 unsigned int cnt;
1059 unsigned int id;
1060 enum tree_code oecode;
1061 tree op;
1065 /* The heap for the oecount hashtable and the sorted list of operands. */
1066 static vec<oecount> cvec;
1069 /* Oecount hashtable helpers. */
1071 struct oecount_hasher : int_hash <int, 0, 1>
1073 static inline hashval_t hash (int);
1074 static inline bool equal (int, int);
1077 /* Hash function for oecount. */
1079 inline hashval_t
1080 oecount_hasher::hash (int p)
1082 const oecount *c = &cvec[p - 42];
1083 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1086 /* Comparison function for oecount. */
1088 inline bool
1089 oecount_hasher::equal (int p1, int p2)
1091 const oecount *c1 = &cvec[p1 - 42];
1092 const oecount *c2 = &cvec[p2 - 42];
1093 return c1->oecode == c2->oecode && c1->op == c2->op;
1096 /* Comparison function for qsort sorting oecount elements by count. */
1098 static int
1099 oecount_cmp (const void *p1, const void *p2)
1101 const oecount *c1 = (const oecount *)p1;
1102 const oecount *c2 = (const oecount *)p2;
1103 if (c1->cnt != c2->cnt)
1104 return c1->cnt > c2->cnt ? 1 : -1;
1105 else
1106 /* If counts are identical, use unique IDs to stabilize qsort. */
1107 return c1->id > c2->id ? 1 : -1;
1110 /* Return TRUE iff STMT represents a builtin call that raises OP
1111 to some exponent. */
1113 static bool
1114 stmt_is_power_of_op (gimple *stmt, tree op)
1116 if (!is_gimple_call (stmt))
1117 return false;
1119 switch (gimple_call_combined_fn (stmt))
1121 CASE_CFN_POW:
1122 CASE_CFN_POWI:
1123 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1125 default:
1126 return false;
1130 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1131 in place and return the result. Assumes that stmt_is_power_of_op
1132 was previously called for STMT and returned TRUE. */
1134 static HOST_WIDE_INT
1135 decrement_power (gimple *stmt)
1137 REAL_VALUE_TYPE c, cint;
1138 HOST_WIDE_INT power;
1139 tree arg1;
1141 switch (gimple_call_combined_fn (stmt))
1143 CASE_CFN_POW:
1144 arg1 = gimple_call_arg (stmt, 1);
1145 c = TREE_REAL_CST (arg1);
1146 power = real_to_integer (&c) - 1;
1147 real_from_integer (&cint, VOIDmode, power, SIGNED);
1148 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1149 return power;
1151 CASE_CFN_POWI:
1152 arg1 = gimple_call_arg (stmt, 1);
1153 power = TREE_INT_CST_LOW (arg1) - 1;
1154 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1155 return power;
1157 default:
1158 gcc_unreachable ();
1162 /* Replace SSA defined by STMT and replace all its uses with new
1163 SSA. Also return the new SSA. */
1165 static tree
1166 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1168 gimple *use_stmt;
1169 use_operand_p use;
1170 imm_use_iterator iter;
1171 tree new_lhs, new_debug_lhs = NULL_TREE;
1172 tree lhs = gimple_get_lhs (stmt);
1174 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1175 gimple_set_lhs (stmt, new_lhs);
1177 /* Also need to update GIMPLE_DEBUGs. */
1178 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1180 tree repl = new_lhs;
1181 if (is_gimple_debug (use_stmt))
1183 if (new_debug_lhs == NULL_TREE)
1185 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1186 gdebug *def_temp
1187 = gimple_build_debug_bind (new_debug_lhs,
1188 build2 (opcode, TREE_TYPE (lhs),
1189 new_lhs, op),
1190 stmt);
1191 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1192 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1193 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1194 gimple_set_uid (def_temp, gimple_uid (stmt));
1195 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1196 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1198 repl = new_debug_lhs;
1200 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1201 SET_USE (use, repl);
1202 update_stmt (use_stmt);
1204 return new_lhs;
1207 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1208 uses with new SSAs. Also do this for the stmt that defines DEF
1209 if *DEF is not OP. */
1211 static void
1212 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1213 vec<gimple *> &stmts_to_fix)
1215 unsigned i;
1216 gimple *stmt;
1218 if (*def != op
1219 && TREE_CODE (*def) == SSA_NAME
1220 && (stmt = SSA_NAME_DEF_STMT (*def))
1221 && gimple_code (stmt) != GIMPLE_NOP)
1222 *def = make_new_ssa_for_def (stmt, opcode, op);
1224 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1225 make_new_ssa_for_def (stmt, opcode, op);
1228 /* Find the single immediate use of STMT's LHS, and replace it
1229 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1230 replace *DEF with OP as well. */
1232 static void
1233 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1235 tree lhs;
1236 gimple *use_stmt;
1237 use_operand_p use;
1238 gimple_stmt_iterator gsi;
1240 if (is_gimple_call (stmt))
1241 lhs = gimple_call_lhs (stmt);
1242 else
1243 lhs = gimple_assign_lhs (stmt);
1245 gcc_assert (has_single_use (lhs));
1246 single_imm_use (lhs, &use, &use_stmt);
1247 if (lhs == *def)
1248 *def = op;
1249 SET_USE (use, op);
1250 if (TREE_CODE (op) != SSA_NAME)
1251 update_stmt (use_stmt);
1252 gsi = gsi_for_stmt (stmt);
1253 unlink_stmt_vdef (stmt);
1254 reassoc_remove_stmt (&gsi);
1255 release_defs (stmt);
1258 /* Walks the linear chain with result *DEF searching for an operation
1259 with operand OP and code OPCODE removing that from the chain. *DEF
1260 is updated if there is only one operand but no operation left. */
1262 static void
1263 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1265 tree orig_def = *def;
1266 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1267 /* PR72835 - Record the stmt chain that has to be updated such that
1268 we dont use the same LHS when the values computed are different. */
1269 auto_vec<gimple *, 64> stmts_to_fix;
1273 tree name;
1275 if (opcode == MULT_EXPR)
1277 if (stmt_is_power_of_op (stmt, op))
1279 if (decrement_power (stmt) == 1)
1281 if (stmts_to_fix.length () > 0)
1282 stmts_to_fix.pop ();
1283 propagate_op_to_single_use (op, stmt, def);
1285 break;
1287 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1289 if (gimple_assign_rhs1 (stmt) == op)
1291 tree cst = build_minus_one_cst (TREE_TYPE (op));
1292 if (stmts_to_fix.length () > 0)
1293 stmts_to_fix.pop ();
1294 propagate_op_to_single_use (cst, stmt, def);
1295 break;
1297 else if (integer_minus_onep (op)
1298 || real_minus_onep (op))
1300 gimple_assign_set_rhs_code
1301 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1302 break;
1307 name = gimple_assign_rhs1 (stmt);
1309 /* If this is the operation we look for and one of the operands
1310 is ours simply propagate the other operand into the stmts
1311 single use. */
1312 if (gimple_assign_rhs_code (stmt) == opcode
1313 && (name == op
1314 || gimple_assign_rhs2 (stmt) == op))
1316 if (name == op)
1317 name = gimple_assign_rhs2 (stmt);
1318 if (stmts_to_fix.length () > 0)
1319 stmts_to_fix.pop ();
1320 propagate_op_to_single_use (name, stmt, def);
1321 break;
1324 /* We might have a multiply of two __builtin_pow* calls, and
1325 the operand might be hiding in the rightmost one. Likewise
1326 this can happen for a negate. */
1327 if (opcode == MULT_EXPR
1328 && gimple_assign_rhs_code (stmt) == opcode
1329 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1330 && has_single_use (gimple_assign_rhs2 (stmt)))
1332 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1333 if (stmt_is_power_of_op (stmt2, op))
1335 if (decrement_power (stmt2) == 1)
1336 propagate_op_to_single_use (op, stmt2, def);
1337 else
1338 stmts_to_fix.safe_push (stmt2);
1339 break;
1341 else if (is_gimple_assign (stmt2)
1342 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1344 if (gimple_assign_rhs1 (stmt2) == op)
1346 tree cst = build_minus_one_cst (TREE_TYPE (op));
1347 propagate_op_to_single_use (cst, stmt2, def);
1348 break;
1350 else if (integer_minus_onep (op)
1351 || real_minus_onep (op))
1353 stmts_to_fix.safe_push (stmt2);
1354 gimple_assign_set_rhs_code
1355 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1356 break;
1361 /* Continue walking the chain. */
1362 gcc_assert (name != op
1363 && TREE_CODE (name) == SSA_NAME);
1364 stmt = SSA_NAME_DEF_STMT (name);
1365 stmts_to_fix.safe_push (stmt);
1367 while (1);
1369 if (stmts_to_fix.length () > 0 || *def == orig_def)
1370 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1373 /* Returns true if statement S1 dominates statement S2. Like
1374 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1376 static bool
1377 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1379 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1381 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1382 SSA_NAME. Assume it lives at the beginning of function and
1383 thus dominates everything. */
1384 if (!bb1 || s1 == s2)
1385 return true;
1387 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1388 if (!bb2)
1389 return false;
1391 if (bb1 == bb2)
1393 /* PHIs in the same basic block are assumed to be
1394 executed all in parallel, if only one stmt is a PHI,
1395 it dominates the other stmt in the same basic block. */
1396 if (gimple_code (s1) == GIMPLE_PHI)
1397 return true;
1399 if (gimple_code (s2) == GIMPLE_PHI)
1400 return false;
1402 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1404 if (gimple_uid (s1) < gimple_uid (s2))
1405 return true;
1407 if (gimple_uid (s1) > gimple_uid (s2))
1408 return false;
1410 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1411 unsigned int uid = gimple_uid (s1);
1412 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1414 gimple *s = gsi_stmt (gsi);
1415 if (gimple_uid (s) != uid)
1416 break;
1417 if (s == s2)
1418 return true;
1421 return false;
1424 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1427 /* Insert STMT after INSERT_POINT. */
1429 static void
1430 insert_stmt_after (gimple *stmt, gimple *insert_point)
1432 gimple_stmt_iterator gsi;
1433 basic_block bb;
1435 if (gimple_code (insert_point) == GIMPLE_PHI)
1436 bb = gimple_bb (insert_point);
1437 else if (!stmt_ends_bb_p (insert_point))
1439 gsi = gsi_for_stmt (insert_point);
1440 gimple_set_uid (stmt, gimple_uid (insert_point));
1441 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1442 return;
1444 else
1445 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1446 thus if it must end a basic block, it should be a call that can
1447 throw, or some assignment that can throw. If it throws, the LHS
1448 of it will not be initialized though, so only valid places using
1449 the SSA_NAME should be dominated by the fallthru edge. */
1450 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1451 gsi = gsi_after_labels (bb);
1452 if (gsi_end_p (gsi))
1454 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1455 gimple_set_uid (stmt,
1456 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1458 else
1459 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1460 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1463 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1464 the result. Places the statement after the definition of either
1465 OP1 or OP2. Returns the new statement. */
1467 static gimple *
1468 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1470 gimple *op1def = NULL, *op2def = NULL;
1471 gimple_stmt_iterator gsi;
1472 tree op;
1473 gassign *sum;
1475 /* Create the addition statement. */
1476 op = make_ssa_name (type);
1477 sum = gimple_build_assign (op, opcode, op1, op2);
1479 /* Find an insertion place and insert. */
1480 if (TREE_CODE (op1) == SSA_NAME)
1481 op1def = SSA_NAME_DEF_STMT (op1);
1482 if (TREE_CODE (op2) == SSA_NAME)
1483 op2def = SSA_NAME_DEF_STMT (op2);
1484 if ((!op1def || gimple_nop_p (op1def))
1485 && (!op2def || gimple_nop_p (op2def)))
1487 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1488 if (gsi_end_p (gsi))
1490 gimple_stmt_iterator gsi2
1491 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1492 gimple_set_uid (sum,
1493 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1495 else
1496 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1497 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1499 else
1501 gimple *insert_point;
1502 if ((!op1def || gimple_nop_p (op1def))
1503 || (op2def && !gimple_nop_p (op2def)
1504 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1505 insert_point = op2def;
1506 else
1507 insert_point = op1def;
1508 insert_stmt_after (sum, insert_point);
1510 update_stmt (sum);
1512 return sum;
1515 /* Perform un-distribution of divisions and multiplications.
1516 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1517 to (A + B) / X for real X.
1519 The algorithm is organized as follows.
1521 - First we walk the addition chain *OPS looking for summands that
1522 are defined by a multiplication or a real division. This results
1523 in the candidates bitmap with relevant indices into *OPS.
1525 - Second we build the chains of multiplications or divisions for
1526 these candidates, counting the number of occurrences of (operand, code)
1527 pairs in all of the candidates chains.
1529 - Third we sort the (operand, code) pairs by number of occurrence and
1530 process them starting with the pair with the most uses.
1532 * For each such pair we walk the candidates again to build a
1533 second candidate bitmap noting all multiplication/division chains
1534 that have at least one occurrence of (operand, code).
1536 * We build an alternate addition chain only covering these
1537 candidates with one (operand, code) operation removed from their
1538 multiplication/division chain.
1540 * The first candidate gets replaced by the alternate addition chain
1541 multiplied/divided by the operand.
1543 * All candidate chains get disabled for further processing and
1544 processing of (operand, code) pairs continues.
1546 The alternate addition chains built are re-processed by the main
1547 reassociation algorithm which allows optimizing a * x * y + b * y * x
1548 to (a + b ) * x * y in one invocation of the reassociation pass. */
1550 static bool
1551 undistribute_ops_list (enum tree_code opcode,
1552 vec<operand_entry *> *ops, struct loop *loop)
1554 unsigned int length = ops->length ();
1555 operand_entry *oe1;
1556 unsigned i, j;
1557 unsigned nr_candidates, nr_candidates2;
1558 sbitmap_iterator sbi0;
1559 vec<operand_entry *> *subops;
1560 bool changed = false;
1561 unsigned int next_oecount_id = 0;
1563 if (length <= 1
1564 || opcode != PLUS_EXPR)
1565 return false;
1567 /* Build a list of candidates to process. */
1568 auto_sbitmap candidates (length);
1569 bitmap_clear (candidates);
1570 nr_candidates = 0;
1571 FOR_EACH_VEC_ELT (*ops, i, oe1)
1573 enum tree_code dcode;
1574 gimple *oe1def;
1576 if (TREE_CODE (oe1->op) != SSA_NAME)
1577 continue;
1578 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1579 if (!is_gimple_assign (oe1def))
1580 continue;
1581 dcode = gimple_assign_rhs_code (oe1def);
1582 if ((dcode != MULT_EXPR
1583 && dcode != RDIV_EXPR)
1584 || !is_reassociable_op (oe1def, dcode, loop))
1585 continue;
1587 bitmap_set_bit (candidates, i);
1588 nr_candidates++;
1591 if (nr_candidates < 2)
1592 return false;
1594 if (dump_file && (dump_flags & TDF_DETAILS))
1596 fprintf (dump_file, "searching for un-distribute opportunities ");
1597 print_generic_expr (dump_file,
1598 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1599 fprintf (dump_file, " %d\n", nr_candidates);
1602 /* Build linearized sub-operand lists and the counting table. */
1603 cvec.create (0);
1605 hash_table<oecount_hasher> ctable (15);
1607 /* ??? Macro arguments cannot have multi-argument template types in
1608 them. This typedef is needed to workaround that limitation. */
1609 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1610 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1611 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1613 gimple *oedef;
1614 enum tree_code oecode;
1615 unsigned j;
1617 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1618 oecode = gimple_assign_rhs_code (oedef);
1619 linearize_expr_tree (&subops[i], oedef,
1620 associative_tree_code (oecode), false);
1622 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1624 oecount c;
1625 int *slot;
1626 int idx;
1627 c.oecode = oecode;
1628 c.cnt = 1;
1629 c.id = next_oecount_id++;
1630 c.op = oe1->op;
1631 cvec.safe_push (c);
1632 idx = cvec.length () + 41;
1633 slot = ctable.find_slot (idx, INSERT);
1634 if (!*slot)
1636 *slot = idx;
1638 else
1640 cvec.pop ();
1641 cvec[*slot - 42].cnt++;
1646 /* Sort the counting table. */
1647 cvec.qsort (oecount_cmp);
1649 if (dump_file && (dump_flags & TDF_DETAILS))
1651 oecount *c;
1652 fprintf (dump_file, "Candidates:\n");
1653 FOR_EACH_VEC_ELT (cvec, j, c)
1655 fprintf (dump_file, " %u %s: ", c->cnt,
1656 c->oecode == MULT_EXPR
1657 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1658 print_generic_expr (dump_file, c->op);
1659 fprintf (dump_file, "\n");
1663 /* Process the (operand, code) pairs in order of most occurrence. */
1664 auto_sbitmap candidates2 (length);
1665 while (!cvec.is_empty ())
1667 oecount *c = &cvec.last ();
1668 if (c->cnt < 2)
1669 break;
1671 /* Now collect the operands in the outer chain that contain
1672 the common operand in their inner chain. */
1673 bitmap_clear (candidates2);
1674 nr_candidates2 = 0;
1675 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1677 gimple *oedef;
1678 enum tree_code oecode;
1679 unsigned j;
1680 tree op = (*ops)[i]->op;
1682 /* If we undistributed in this chain already this may be
1683 a constant. */
1684 if (TREE_CODE (op) != SSA_NAME)
1685 continue;
1687 oedef = SSA_NAME_DEF_STMT (op);
1688 oecode = gimple_assign_rhs_code (oedef);
1689 if (oecode != c->oecode)
1690 continue;
1692 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1694 if (oe1->op == c->op)
1696 bitmap_set_bit (candidates2, i);
1697 ++nr_candidates2;
1698 break;
1703 if (nr_candidates2 >= 2)
1705 operand_entry *oe1, *oe2;
1706 gimple *prod;
1707 int first = bitmap_first_set_bit (candidates2);
1709 /* Build the new addition chain. */
1710 oe1 = (*ops)[first];
1711 if (dump_file && (dump_flags & TDF_DETAILS))
1713 fprintf (dump_file, "Building (");
1714 print_generic_expr (dump_file, oe1->op);
1716 zero_one_operation (&oe1->op, c->oecode, c->op);
1717 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1719 gimple *sum;
1720 oe2 = (*ops)[i];
1721 if (dump_file && (dump_flags & TDF_DETAILS))
1723 fprintf (dump_file, " + ");
1724 print_generic_expr (dump_file, oe2->op);
1726 zero_one_operation (&oe2->op, c->oecode, c->op);
1727 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1728 oe1->op, oe2->op, opcode);
1729 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1730 oe2->rank = 0;
1731 oe1->op = gimple_get_lhs (sum);
1734 /* Apply the multiplication/division. */
1735 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1736 oe1->op, c->op, c->oecode);
1737 if (dump_file && (dump_flags & TDF_DETAILS))
1739 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1740 print_generic_expr (dump_file, c->op);
1741 fprintf (dump_file, "\n");
1744 /* Record it in the addition chain and disable further
1745 undistribution with this op. */
1746 oe1->op = gimple_assign_lhs (prod);
1747 oe1->rank = get_rank (oe1->op);
1748 subops[first].release ();
1750 changed = true;
1753 cvec.pop ();
1756 for (i = 0; i < ops->length (); ++i)
1757 subops[i].release ();
1758 free (subops);
1759 cvec.release ();
1761 return changed;
1764 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1765 expression, examine the other OPS to see if any of them are comparisons
1766 of the same values, which we may be able to combine or eliminate.
1767 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1769 static bool
1770 eliminate_redundant_comparison (enum tree_code opcode,
1771 vec<operand_entry *> *ops,
1772 unsigned int currindex,
1773 operand_entry *curr)
1775 tree op1, op2;
1776 enum tree_code lcode, rcode;
1777 gimple *def1, *def2;
1778 int i;
1779 operand_entry *oe;
1781 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1782 return false;
1784 /* Check that CURR is a comparison. */
1785 if (TREE_CODE (curr->op) != SSA_NAME)
1786 return false;
1787 def1 = SSA_NAME_DEF_STMT (curr->op);
1788 if (!is_gimple_assign (def1))
1789 return false;
1790 lcode = gimple_assign_rhs_code (def1);
1791 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1792 return false;
1793 op1 = gimple_assign_rhs1 (def1);
1794 op2 = gimple_assign_rhs2 (def1);
1796 /* Now look for a similar comparison in the remaining OPS. */
1797 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1799 tree t;
1801 if (TREE_CODE (oe->op) != SSA_NAME)
1802 continue;
1803 def2 = SSA_NAME_DEF_STMT (oe->op);
1804 if (!is_gimple_assign (def2))
1805 continue;
1806 rcode = gimple_assign_rhs_code (def2);
1807 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1808 continue;
1810 /* If we got here, we have a match. See if we can combine the
1811 two comparisons. */
1812 if (opcode == BIT_IOR_EXPR)
1813 t = maybe_fold_or_comparisons (lcode, op1, op2,
1814 rcode, gimple_assign_rhs1 (def2),
1815 gimple_assign_rhs2 (def2));
1816 else
1817 t = maybe_fold_and_comparisons (lcode, op1, op2,
1818 rcode, gimple_assign_rhs1 (def2),
1819 gimple_assign_rhs2 (def2));
1820 if (!t)
1821 continue;
1823 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1824 always give us a boolean_type_node value back. If the original
1825 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1826 we need to convert. */
1827 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1828 t = fold_convert (TREE_TYPE (curr->op), t);
1830 if (TREE_CODE (t) != INTEGER_CST
1831 && !operand_equal_p (t, curr->op, 0))
1833 enum tree_code subcode;
1834 tree newop1, newop2;
1835 if (!COMPARISON_CLASS_P (t))
1836 continue;
1837 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1838 STRIP_USELESS_TYPE_CONVERSION (newop1);
1839 STRIP_USELESS_TYPE_CONVERSION (newop2);
1840 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1841 continue;
1844 if (dump_file && (dump_flags & TDF_DETAILS))
1846 fprintf (dump_file, "Equivalence: ");
1847 print_generic_expr (dump_file, curr->op);
1848 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1849 print_generic_expr (dump_file, oe->op);
1850 fprintf (dump_file, " -> ");
1851 print_generic_expr (dump_file, t);
1852 fprintf (dump_file, "\n");
1855 /* Now we can delete oe, as it has been subsumed by the new combined
1856 expression t. */
1857 ops->ordered_remove (i);
1858 reassociate_stats.ops_eliminated ++;
1860 /* If t is the same as curr->op, we're done. Otherwise we must
1861 replace curr->op with t. Special case is if we got a constant
1862 back, in which case we add it to the end instead of in place of
1863 the current entry. */
1864 if (TREE_CODE (t) == INTEGER_CST)
1866 ops->ordered_remove (currindex);
1867 add_to_ops_vec (ops, t);
1869 else if (!operand_equal_p (t, curr->op, 0))
1871 gimple *sum;
1872 enum tree_code subcode;
1873 tree newop1;
1874 tree newop2;
1875 gcc_assert (COMPARISON_CLASS_P (t));
1876 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1877 STRIP_USELESS_TYPE_CONVERSION (newop1);
1878 STRIP_USELESS_TYPE_CONVERSION (newop2);
1879 gcc_checking_assert (is_gimple_val (newop1)
1880 && is_gimple_val (newop2));
1881 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1882 curr->op = gimple_get_lhs (sum);
1884 return true;
1887 return false;
1891 /* Transform repeated addition of same values into multiply with
1892 constant. */
1893 static bool
1894 transform_add_to_multiply (vec<operand_entry *> *ops)
1896 operand_entry *oe;
1897 tree op = NULL_TREE;
1898 int j;
1899 int i, start = -1, end = 0, count = 0;
1900 auto_vec<std::pair <int, int> > indxs;
1901 bool changed = false;
1903 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1904 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1905 || !flag_unsafe_math_optimizations))
1906 return false;
1908 /* Look for repeated operands. */
1909 FOR_EACH_VEC_ELT (*ops, i, oe)
1911 if (start == -1)
1913 count = 1;
1914 op = oe->op;
1915 start = i;
1917 else if (operand_equal_p (oe->op, op, 0))
1919 count++;
1920 end = i;
1922 else
1924 if (count > 1)
1925 indxs.safe_push (std::make_pair (start, end));
1926 count = 1;
1927 op = oe->op;
1928 start = i;
1932 if (count > 1)
1933 indxs.safe_push (std::make_pair (start, end));
1935 for (j = indxs.length () - 1; j >= 0; --j)
1937 /* Convert repeated operand addition to multiplication. */
1938 start = indxs[j].first;
1939 end = indxs[j].second;
1940 op = (*ops)[start]->op;
1941 count = end - start + 1;
1942 for (i = end; i >= start; --i)
1943 ops->unordered_remove (i);
1944 tree tmp = make_ssa_name (TREE_TYPE (op));
1945 tree cst = build_int_cst (integer_type_node, count);
1946 gassign *mul_stmt
1947 = gimple_build_assign (tmp, MULT_EXPR,
1948 op, fold_convert (TREE_TYPE (op), cst));
1949 gimple_set_visited (mul_stmt, true);
1950 add_to_ops_vec (ops, tmp, mul_stmt);
1951 changed = true;
1954 return changed;
1958 /* Perform various identities and other optimizations on the list of
1959 operand entries, stored in OPS. The tree code for the binary
1960 operation between all the operands is OPCODE. */
1962 static void
1963 optimize_ops_list (enum tree_code opcode,
1964 vec<operand_entry *> *ops)
1966 unsigned int length = ops->length ();
1967 unsigned int i;
1968 operand_entry *oe;
1969 operand_entry *oelast = NULL;
1970 bool iterate = false;
1972 if (length == 1)
1973 return;
1975 oelast = ops->last ();
1977 /* If the last two are constants, pop the constants off, merge them
1978 and try the next two. */
1979 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1981 operand_entry *oelm1 = (*ops)[length - 2];
1983 if (oelm1->rank == 0
1984 && is_gimple_min_invariant (oelm1->op)
1985 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1986 TREE_TYPE (oelast->op)))
1988 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1989 oelm1->op, oelast->op);
1991 if (folded && is_gimple_min_invariant (folded))
1993 if (dump_file && (dump_flags & TDF_DETAILS))
1994 fprintf (dump_file, "Merging constants\n");
1996 ops->pop ();
1997 ops->pop ();
1999 add_to_ops_vec (ops, folded);
2000 reassociate_stats.constants_eliminated++;
2002 optimize_ops_list (opcode, ops);
2003 return;
2008 eliminate_using_constants (opcode, ops);
2009 oelast = NULL;
2011 for (i = 0; ops->iterate (i, &oe);)
2013 bool done = false;
2015 if (eliminate_not_pairs (opcode, ops, i, oe))
2016 return;
2017 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2018 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2019 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2021 if (done)
2022 return;
2023 iterate = true;
2024 oelast = NULL;
2025 continue;
2027 oelast = oe;
2028 i++;
2031 length = ops->length ();
2032 oelast = ops->last ();
2034 if (iterate)
2035 optimize_ops_list (opcode, ops);
2038 /* The following functions are subroutines to optimize_range_tests and allow
2039 it to try to change a logical combination of comparisons into a range
2040 test.
2042 For example, both
2043 X == 2 || X == 5 || X == 3 || X == 4
2045 X >= 2 && X <= 5
2046 are converted to
2047 (unsigned) (X - 2) <= 3
2049 For more information see comments above fold_test_range in fold-const.c,
2050 this implementation is for GIMPLE. */
2052 struct range_entry
2054 tree exp;
2055 tree low;
2056 tree high;
2057 bool in_p;
2058 bool strict_overflow_p;
2059 unsigned int idx, next;
2062 /* This is similar to make_range in fold-const.c, but on top of
2063 GIMPLE instead of trees. If EXP is non-NULL, it should be
2064 an SSA_NAME and STMT argument is ignored, otherwise STMT
2065 argument should be a GIMPLE_COND. */
2067 static void
2068 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2070 int in_p;
2071 tree low, high;
2072 bool is_bool, strict_overflow_p;
2074 r->exp = NULL_TREE;
2075 r->in_p = false;
2076 r->strict_overflow_p = false;
2077 r->low = NULL_TREE;
2078 r->high = NULL_TREE;
2079 if (exp != NULL_TREE
2080 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2081 return;
2083 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2084 and see if we can refine the range. Some of the cases below may not
2085 happen, but it doesn't seem worth worrying about this. We "continue"
2086 the outer loop when we've changed something; otherwise we "break"
2087 the switch, which will "break" the while. */
2088 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2089 high = low;
2090 in_p = 0;
2091 strict_overflow_p = false;
2092 is_bool = false;
2093 if (exp == NULL_TREE)
2094 is_bool = true;
2095 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2097 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2098 is_bool = true;
2099 else
2100 return;
2102 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2103 is_bool = true;
2105 while (1)
2107 enum tree_code code;
2108 tree arg0, arg1, exp_type;
2109 tree nexp;
2110 location_t loc;
2112 if (exp != NULL_TREE)
2114 if (TREE_CODE (exp) != SSA_NAME
2115 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2116 break;
2118 stmt = SSA_NAME_DEF_STMT (exp);
2119 if (!is_gimple_assign (stmt))
2120 break;
2122 code = gimple_assign_rhs_code (stmt);
2123 arg0 = gimple_assign_rhs1 (stmt);
2124 arg1 = gimple_assign_rhs2 (stmt);
2125 exp_type = TREE_TYPE (exp);
2127 else
2129 code = gimple_cond_code (stmt);
2130 arg0 = gimple_cond_lhs (stmt);
2131 arg1 = gimple_cond_rhs (stmt);
2132 exp_type = boolean_type_node;
2135 if (TREE_CODE (arg0) != SSA_NAME)
2136 break;
2137 loc = gimple_location (stmt);
2138 switch (code)
2140 case BIT_NOT_EXPR:
2141 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2142 /* Ensure the range is either +[-,0], +[0,0],
2143 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2144 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2145 or similar expression of unconditional true or
2146 false, it should not be negated. */
2147 && ((high && integer_zerop (high))
2148 || (low && integer_onep (low))))
2150 in_p = !in_p;
2151 exp = arg0;
2152 continue;
2154 break;
2155 case SSA_NAME:
2156 exp = arg0;
2157 continue;
2158 CASE_CONVERT:
2159 if (is_bool)
2160 goto do_default;
2161 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2163 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2164 is_bool = true;
2165 else
2166 return;
2168 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2169 is_bool = true;
2170 goto do_default;
2171 case EQ_EXPR:
2172 case NE_EXPR:
2173 case LT_EXPR:
2174 case LE_EXPR:
2175 case GE_EXPR:
2176 case GT_EXPR:
2177 is_bool = true;
2178 /* FALLTHRU */
2179 default:
2180 if (!is_bool)
2181 return;
2182 do_default:
2183 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2184 &low, &high, &in_p,
2185 &strict_overflow_p);
2186 if (nexp != NULL_TREE)
2188 exp = nexp;
2189 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2190 continue;
2192 break;
2194 break;
2196 if (is_bool)
2198 r->exp = exp;
2199 r->in_p = in_p;
2200 r->low = low;
2201 r->high = high;
2202 r->strict_overflow_p = strict_overflow_p;
2206 /* Comparison function for qsort. Sort entries
2207 without SSA_NAME exp first, then with SSA_NAMEs sorted
2208 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2209 by increasing ->low and if ->low is the same, by increasing
2210 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2211 maximum. */
2213 static int
2214 range_entry_cmp (const void *a, const void *b)
2216 const struct range_entry *p = (const struct range_entry *) a;
2217 const struct range_entry *q = (const struct range_entry *) b;
2219 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2221 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2223 /* Group range_entries for the same SSA_NAME together. */
2224 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2225 return -1;
2226 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2227 return 1;
2228 /* If ->low is different, NULL low goes first, then by
2229 ascending low. */
2230 if (p->low != NULL_TREE)
2232 if (q->low != NULL_TREE)
2234 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2235 p->low, q->low);
2236 if (tem && integer_onep (tem))
2237 return -1;
2238 tem = fold_binary (GT_EXPR, boolean_type_node,
2239 p->low, q->low);
2240 if (tem && integer_onep (tem))
2241 return 1;
2243 else
2244 return 1;
2246 else if (q->low != NULL_TREE)
2247 return -1;
2248 /* If ->high is different, NULL high goes last, before that by
2249 ascending high. */
2250 if (p->high != NULL_TREE)
2252 if (q->high != NULL_TREE)
2254 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2255 p->high, q->high);
2256 if (tem && integer_onep (tem))
2257 return -1;
2258 tem = fold_binary (GT_EXPR, boolean_type_node,
2259 p->high, q->high);
2260 if (tem && integer_onep (tem))
2261 return 1;
2263 else
2264 return -1;
2266 else if (q->high != NULL_TREE)
2267 return 1;
2268 /* If both ranges are the same, sort below by ascending idx. */
2270 else
2271 return 1;
2273 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2274 return -1;
2276 if (p->idx < q->idx)
2277 return -1;
2278 else
2280 gcc_checking_assert (p->idx > q->idx);
2281 return 1;
2285 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2286 insert needed statements BEFORE or after GSI. */
2288 static tree
2289 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2291 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2292 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2293 if (TREE_CODE (ret) != SSA_NAME)
2295 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2296 if (before)
2297 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2298 else
2299 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2300 ret = gimple_assign_lhs (g);
2302 return ret;
2305 /* Helper routine of optimize_range_test.
2306 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2307 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2308 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2309 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2310 an array of COUNT pointers to other ranges. Return
2311 true if the range merge has been successful.
2312 If OPCODE is ERROR_MARK, this is called from within
2313 maybe_optimize_range_tests and is performing inter-bb range optimization.
2314 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2315 oe->rank. */
2317 static bool
2318 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2319 struct range_entry **otherrangep,
2320 unsigned int count, enum tree_code opcode,
2321 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2322 bool in_p, tree low, tree high, bool strict_overflow_p)
2324 operand_entry *oe = (*ops)[range->idx];
2325 tree op = oe->op;
2326 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2327 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2328 location_t loc = gimple_location (stmt);
2329 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2330 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2331 in_p, low, high);
2332 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2333 gimple_stmt_iterator gsi;
2334 unsigned int i, uid;
2336 if (tem == NULL_TREE)
2337 return false;
2339 /* If op is default def SSA_NAME, there is no place to insert the
2340 new comparison. Give up, unless we can use OP itself as the
2341 range test. */
2342 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2344 if (op == range->exp
2345 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2346 || TREE_CODE (optype) == BOOLEAN_TYPE)
2347 && (op == tem
2348 || (TREE_CODE (tem) == EQ_EXPR
2349 && TREE_OPERAND (tem, 0) == op
2350 && integer_onep (TREE_OPERAND (tem, 1))))
2351 && opcode != BIT_IOR_EXPR
2352 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2354 stmt = NULL;
2355 tem = op;
2357 else
2358 return false;
2361 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2362 warning_at (loc, OPT_Wstrict_overflow,
2363 "assuming signed overflow does not occur "
2364 "when simplifying range test");
2366 if (dump_file && (dump_flags & TDF_DETAILS))
2368 struct range_entry *r;
2369 fprintf (dump_file, "Optimizing range tests ");
2370 print_generic_expr (dump_file, range->exp);
2371 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2372 print_generic_expr (dump_file, range->low);
2373 fprintf (dump_file, ", ");
2374 print_generic_expr (dump_file, range->high);
2375 fprintf (dump_file, "]");
2376 for (i = 0; i < count; i++)
2378 if (otherrange)
2379 r = otherrange + i;
2380 else
2381 r = otherrangep[i];
2382 if (r->exp
2383 && r->exp != range->exp
2384 && TREE_CODE (r->exp) == SSA_NAME)
2386 fprintf (dump_file, " and ");
2387 print_generic_expr (dump_file, r->exp);
2389 else
2390 fprintf (dump_file, " and");
2391 fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
2392 print_generic_expr (dump_file, r->low);
2393 fprintf (dump_file, ", ");
2394 print_generic_expr (dump_file, r->high);
2395 fprintf (dump_file, "]");
2397 fprintf (dump_file, "\n into ");
2398 print_generic_expr (dump_file, tem);
2399 fprintf (dump_file, "\n");
2402 if (opcode == BIT_IOR_EXPR
2403 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2404 tem = invert_truthvalue_loc (loc, tem);
2406 tem = fold_convert_loc (loc, optype, tem);
2407 if (stmt)
2409 gsi = gsi_for_stmt (stmt);
2410 uid = gimple_uid (stmt);
2412 else
2414 gsi = gsi_none ();
2415 uid = 0;
2417 if (stmt == NULL)
2418 gcc_checking_assert (tem == op);
2419 /* In rare cases range->exp can be equal to lhs of stmt.
2420 In that case we have to insert after the stmt rather then before
2421 it. If stmt is a PHI, insert it at the start of the basic block. */
2422 else if (op != range->exp)
2424 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2425 tem = force_into_ssa_name (&gsi, tem, true);
2426 gsi_prev (&gsi);
2428 else if (gimple_code (stmt) != GIMPLE_PHI)
2430 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2431 tem = force_into_ssa_name (&gsi, tem, false);
2433 else
2435 gsi = gsi_after_labels (gimple_bb (stmt));
2436 if (!gsi_end_p (gsi))
2437 uid = gimple_uid (gsi_stmt (gsi));
2438 else
2440 gsi = gsi_start_bb (gimple_bb (stmt));
2441 uid = 1;
2442 while (!gsi_end_p (gsi))
2444 uid = gimple_uid (gsi_stmt (gsi));
2445 gsi_next (&gsi);
2448 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2449 tem = force_into_ssa_name (&gsi, tem, true);
2450 if (gsi_end_p (gsi))
2451 gsi = gsi_last_bb (gimple_bb (stmt));
2452 else
2453 gsi_prev (&gsi);
2455 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2456 if (gimple_uid (gsi_stmt (gsi)))
2457 break;
2458 else
2459 gimple_set_uid (gsi_stmt (gsi), uid);
2461 oe->op = tem;
2462 range->exp = exp;
2463 range->low = low;
2464 range->high = high;
2465 range->in_p = in_p;
2466 range->strict_overflow_p = false;
2468 for (i = 0; i < count; i++)
2470 if (otherrange)
2471 range = otherrange + i;
2472 else
2473 range = otherrangep[i];
2474 oe = (*ops)[range->idx];
2475 /* Now change all the other range test immediate uses, so that
2476 those tests will be optimized away. */
2477 if (opcode == ERROR_MARK)
2479 if (oe->op)
2480 oe->op = build_int_cst (TREE_TYPE (oe->op),
2481 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2482 else
2483 oe->op = (oe->rank == BIT_IOR_EXPR
2484 ? boolean_false_node : boolean_true_node);
2486 else
2487 oe->op = error_mark_node;
2488 range->exp = NULL_TREE;
2489 range->low = NULL_TREE;
2490 range->high = NULL_TREE;
2492 return true;
2495 /* Optimize X == CST1 || X == CST2
2496 if popcount (CST1 ^ CST2) == 1 into
2497 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2498 Similarly for ranges. E.g.
2499 X != 2 && X != 3 && X != 10 && X != 11
2500 will be transformed by the previous optimization into
2501 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2502 and this loop can transform that into
2503 !(((X & ~8) - 2U) <= 1U). */
2505 static bool
2506 optimize_range_tests_xor (enum tree_code opcode, tree type,
2507 tree lowi, tree lowj, tree highi, tree highj,
2508 vec<operand_entry *> *ops,
2509 struct range_entry *rangei,
2510 struct range_entry *rangej)
2512 tree lowxor, highxor, tem, exp;
2513 /* Check lowi ^ lowj == highi ^ highj and
2514 popcount (lowi ^ lowj) == 1. */
2515 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2516 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2517 return false;
2518 if (!integer_pow2p (lowxor))
2519 return false;
2520 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2521 if (!tree_int_cst_equal (lowxor, highxor))
2522 return false;
2524 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2525 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2526 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2527 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2528 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2529 NULL, rangei->in_p, lowj, highj,
2530 rangei->strict_overflow_p
2531 || rangej->strict_overflow_p))
2532 return true;
2533 return false;
2536 /* Optimize X == CST1 || X == CST2
2537 if popcount (CST2 - CST1) == 1 into
2538 ((X - CST1) & ~(CST2 - CST1)) == 0.
2539 Similarly for ranges. E.g.
2540 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2541 || X == 75 || X == 45
2542 will be transformed by the previous optimization into
2543 (X - 43U) <= 3U || (X - 75U) <= 3U
2544 and this loop can transform that into
2545 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2546 static bool
2547 optimize_range_tests_diff (enum tree_code opcode, tree type,
2548 tree lowi, tree lowj, tree highi, tree highj,
2549 vec<operand_entry *> *ops,
2550 struct range_entry *rangei,
2551 struct range_entry *rangej)
2553 tree tem1, tem2, mask;
2554 /* Check highi - lowi == highj - lowj. */
2555 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2556 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2557 return false;
2558 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2559 if (!tree_int_cst_equal (tem1, tem2))
2560 return false;
2561 /* Check popcount (lowj - lowi) == 1. */
2562 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2563 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2564 return false;
2565 if (!integer_pow2p (tem1))
2566 return false;
2568 type = unsigned_type_for (type);
2569 tem1 = fold_convert (type, tem1);
2570 tem2 = fold_convert (type, tem2);
2571 lowi = fold_convert (type, lowi);
2572 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2573 tem1 = fold_build2 (MINUS_EXPR, type,
2574 fold_convert (type, rangei->exp), lowi);
2575 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2576 lowj = build_int_cst (type, 0);
2577 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2578 NULL, rangei->in_p, lowj, tem2,
2579 rangei->strict_overflow_p
2580 || rangej->strict_overflow_p))
2581 return true;
2582 return false;
2585 /* It does some common checks for function optimize_range_tests_xor and
2586 optimize_range_tests_diff.
2587 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2588 Else it calls optimize_range_tests_diff. */
2590 static bool
2591 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2592 bool optimize_xor, vec<operand_entry *> *ops,
2593 struct range_entry *ranges)
2595 int i, j;
2596 bool any_changes = false;
2597 for (i = first; i < length; i++)
2599 tree lowi, highi, lowj, highj, type, tem;
2601 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2602 continue;
2603 type = TREE_TYPE (ranges[i].exp);
2604 if (!INTEGRAL_TYPE_P (type))
2605 continue;
2606 lowi = ranges[i].low;
2607 if (lowi == NULL_TREE)
2608 lowi = TYPE_MIN_VALUE (type);
2609 highi = ranges[i].high;
2610 if (highi == NULL_TREE)
2611 continue;
2612 for (j = i + 1; j < length && j < i + 64; j++)
2614 bool changes;
2615 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2616 continue;
2617 lowj = ranges[j].low;
2618 if (lowj == NULL_TREE)
2619 continue;
2620 highj = ranges[j].high;
2621 if (highj == NULL_TREE)
2622 highj = TYPE_MAX_VALUE (type);
2623 /* Check lowj > highi. */
2624 tem = fold_binary (GT_EXPR, boolean_type_node,
2625 lowj, highi);
2626 if (tem == NULL_TREE || !integer_onep (tem))
2627 continue;
2628 if (optimize_xor)
2629 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2630 highi, highj, ops,
2631 ranges + i, ranges + j);
2632 else
2633 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2634 highi, highj, ops,
2635 ranges + i, ranges + j);
2636 if (changes)
2638 any_changes = true;
2639 break;
2643 return any_changes;
2646 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2647 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2648 EXP on success, NULL otherwise. */
2650 static tree
2651 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2652 wide_int *mask, tree *totallowp)
2654 tree tem = int_const_binop (MINUS_EXPR, high, low);
2655 if (tem == NULL_TREE
2656 || TREE_CODE (tem) != INTEGER_CST
2657 || TREE_OVERFLOW (tem)
2658 || tree_int_cst_sgn (tem) == -1
2659 || compare_tree_int (tem, prec) != -1)
2660 return NULL_TREE;
2662 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2663 *mask = wi::shifted_mask (0, max, false, prec);
2664 if (TREE_CODE (exp) == BIT_AND_EXPR
2665 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2667 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2668 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2669 if (wi::popcount (msk) == 1
2670 && wi::ltu_p (msk, prec - max))
2672 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2673 max += msk.to_uhwi ();
2674 exp = TREE_OPERAND (exp, 0);
2675 if (integer_zerop (low)
2676 && TREE_CODE (exp) == PLUS_EXPR
2677 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2679 tree ret = TREE_OPERAND (exp, 0);
2680 STRIP_NOPS (ret);
2681 widest_int bias
2682 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2683 TYPE_PRECISION (TREE_TYPE (low))));
2684 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2685 if (totallowp)
2687 *totallowp = tbias;
2688 return ret;
2690 else if (!tree_int_cst_lt (totallow, tbias))
2691 return NULL_TREE;
2692 bias = wi::to_widest (tbias);
2693 bias -= wi::to_widest (totallow);
2694 if (bias >= 0 && bias < prec - max)
2696 *mask = wi::lshift (*mask, bias);
2697 return ret;
2702 if (totallowp)
2703 return exp;
2704 if (!tree_int_cst_lt (totallow, low))
2705 return exp;
2706 tem = int_const_binop (MINUS_EXPR, low, totallow);
2707 if (tem == NULL_TREE
2708 || TREE_CODE (tem) != INTEGER_CST
2709 || TREE_OVERFLOW (tem)
2710 || compare_tree_int (tem, prec - max) == 1)
2711 return NULL_TREE;
2713 *mask = wi::lshift (*mask, wi::to_widest (tem));
2714 return exp;
2717 /* Attempt to optimize small range tests using bit test.
2718 E.g.
2719 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2720 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2721 has been by earlier optimizations optimized into:
2722 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2723 As all the 43 through 82 range is less than 64 numbers,
2724 for 64-bit word targets optimize that into:
2725 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2727 static bool
2728 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2729 vec<operand_entry *> *ops,
2730 struct range_entry *ranges)
2732 int i, j;
2733 bool any_changes = false;
2734 int prec = GET_MODE_BITSIZE (word_mode);
2735 auto_vec<struct range_entry *, 64> candidates;
2737 for (i = first; i < length - 2; i++)
2739 tree lowi, highi, lowj, highj, type;
2741 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2742 continue;
2743 type = TREE_TYPE (ranges[i].exp);
2744 if (!INTEGRAL_TYPE_P (type))
2745 continue;
2746 lowi = ranges[i].low;
2747 if (lowi == NULL_TREE)
2748 lowi = TYPE_MIN_VALUE (type);
2749 highi = ranges[i].high;
2750 if (highi == NULL_TREE)
2751 continue;
2752 wide_int mask;
2753 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2754 highi, &mask, &lowi);
2755 if (exp == NULL_TREE)
2756 continue;
2757 bool strict_overflow_p = ranges[i].strict_overflow_p;
2758 candidates.truncate (0);
2759 int end = MIN (i + 64, length);
2760 for (j = i + 1; j < end; j++)
2762 tree exp2;
2763 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2764 continue;
2765 if (ranges[j].exp == exp)
2767 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2769 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2770 if (exp2 == exp)
2772 else if (TREE_CODE (exp2) == PLUS_EXPR)
2774 exp2 = TREE_OPERAND (exp2, 0);
2775 STRIP_NOPS (exp2);
2776 if (exp2 != exp)
2777 continue;
2779 else
2780 continue;
2782 else
2783 continue;
2784 lowj = ranges[j].low;
2785 if (lowj == NULL_TREE)
2786 continue;
2787 highj = ranges[j].high;
2788 if (highj == NULL_TREE)
2789 highj = TYPE_MAX_VALUE (type);
2790 wide_int mask2;
2791 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2792 highj, &mask2, NULL);
2793 if (exp2 != exp)
2794 continue;
2795 mask |= mask2;
2796 strict_overflow_p |= ranges[j].strict_overflow_p;
2797 candidates.safe_push (&ranges[j]);
2800 /* If we need otherwise 3 or more comparisons, use a bit test. */
2801 if (candidates.length () >= 2)
2803 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2804 wi::to_widest (lowi)
2805 + prec - 1 - wi::clz (mask));
2806 operand_entry *oe = (*ops)[ranges[i].idx];
2807 tree op = oe->op;
2808 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2809 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2810 location_t loc = gimple_location (stmt);
2811 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2813 /* See if it isn't cheaper to pretend the minimum value of the
2814 range is 0, if maximum value is small enough.
2815 We can avoid then subtraction of the minimum value, but the
2816 mask constant could be perhaps more expensive. */
2817 if (compare_tree_int (lowi, 0) > 0
2818 && compare_tree_int (high, prec) < 0)
2820 int cost_diff;
2821 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2822 rtx reg = gen_raw_REG (word_mode, 10000);
2823 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2824 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2825 GEN_INT (-m)), speed_p);
2826 rtx r = immed_wide_int_const (mask, word_mode);
2827 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2828 word_mode, speed_p);
2829 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2830 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2831 word_mode, speed_p);
2832 if (cost_diff > 0)
2834 mask = wi::lshift (mask, m);
2835 lowi = build_zero_cst (TREE_TYPE (lowi));
2839 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2840 false, lowi, high);
2841 if (tem == NULL_TREE || is_gimple_val (tem))
2842 continue;
2843 tree etype = unsigned_type_for (TREE_TYPE (exp));
2844 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2845 fold_convert_loc (loc, etype, exp),
2846 fold_convert_loc (loc, etype, lowi));
2847 exp = fold_convert_loc (loc, integer_type_node, exp);
2848 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2849 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2850 build_int_cst (word_type, 1), exp);
2851 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2852 wide_int_to_tree (word_type, mask));
2853 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2854 build_zero_cst (word_type));
2855 if (is_gimple_val (exp))
2856 continue;
2858 /* The shift might have undefined behavior if TEM is true,
2859 but reassociate_bb isn't prepared to have basic blocks
2860 split when it is running. So, temporarily emit a code
2861 with BIT_IOR_EXPR instead of &&, and fix it up in
2862 branch_fixup. */
2863 gimple_seq seq;
2864 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2865 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2866 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2867 gimple_seq seq2;
2868 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2869 gimple_seq_add_seq_without_update (&seq, seq2);
2870 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2871 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2872 gimple *g = gimple_build_assign (make_ssa_name (optype),
2873 BIT_IOR_EXPR, tem, exp);
2874 gimple_set_location (g, loc);
2875 gimple_seq_add_stmt_without_update (&seq, g);
2876 exp = gimple_assign_lhs (g);
2877 tree val = build_zero_cst (optype);
2878 if (update_range_test (&ranges[i], NULL, candidates.address (),
2879 candidates.length (), opcode, ops, exp,
2880 seq, false, val, val, strict_overflow_p))
2882 any_changes = true;
2883 reassoc_branch_fixups.safe_push (tem);
2885 else
2886 gimple_seq_discard (seq);
2889 return any_changes;
2892 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
2893 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
2895 static bool
2896 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
2897 vec<operand_entry *> *ops,
2898 struct range_entry *ranges)
2900 int i;
2901 unsigned int b;
2902 bool any_changes = false;
2903 auto_vec<int, 128> buckets;
2904 auto_vec<int, 32> chains;
2905 auto_vec<struct range_entry *, 32> candidates;
2907 for (i = first; i < length; i++)
2909 if (ranges[i].exp == NULL_TREE
2910 || TREE_CODE (ranges[i].exp) != SSA_NAME
2911 || !ranges[i].in_p
2912 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
2913 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
2914 || ranges[i].low == NULL_TREE
2915 || ranges[i].low != ranges[i].high)
2916 continue;
2918 bool zero_p = integer_zerop (ranges[i].low);
2919 if (!zero_p && !integer_all_onesp (ranges[i].low))
2920 continue;
2922 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
2923 if (buckets.length () <= b)
2924 buckets.safe_grow_cleared (b + 1);
2925 if (chains.length () <= (unsigned) i)
2926 chains.safe_grow (i + 1);
2927 chains[i] = buckets[b];
2928 buckets[b] = i + 1;
2931 FOR_EACH_VEC_ELT (buckets, b, i)
2932 if (i && chains[i - 1])
2934 int j, k = i;
2935 for (j = chains[i - 1]; j; j = chains[j - 1])
2937 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
2938 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
2939 if (reassoc_stmt_dominates_stmt_p (gk, gj))
2940 k = j;
2942 tree type1 = TREE_TYPE (ranges[k - 1].exp);
2943 tree type2 = NULL_TREE;
2944 bool strict_overflow_p = false;
2945 candidates.truncate (0);
2946 for (j = i; j; j = chains[j - 1])
2948 tree type = TREE_TYPE (ranges[j - 1].exp);
2949 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
2950 if (j == k
2951 || useless_type_conversion_p (type1, type))
2953 else if (type2 == NULL_TREE
2954 || useless_type_conversion_p (type2, type))
2956 if (type2 == NULL_TREE)
2957 type2 = type;
2958 candidates.safe_push (&ranges[j - 1]);
2961 unsigned l = candidates.length ();
2962 for (j = i; j; j = chains[j - 1])
2964 tree type = TREE_TYPE (ranges[j - 1].exp);
2965 if (j == k)
2966 continue;
2967 if (useless_type_conversion_p (type1, type))
2969 else if (type2 == NULL_TREE
2970 || useless_type_conversion_p (type2, type))
2971 continue;
2972 candidates.safe_push (&ranges[j - 1]);
2974 gimple_seq seq = NULL;
2975 tree op = NULL_TREE;
2976 unsigned int id;
2977 struct range_entry *r;
2978 candidates.safe_push (&ranges[k - 1]);
2979 FOR_EACH_VEC_ELT (candidates, id, r)
2981 gimple *g;
2982 if (id == 0)
2984 op = r->exp;
2985 continue;
2987 if (id == l)
2989 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
2990 gimple_seq_add_stmt_without_update (&seq, g);
2991 op = gimple_assign_lhs (g);
2993 tree type = TREE_TYPE (r->exp);
2994 tree exp = r->exp;
2995 if (id >= l && !useless_type_conversion_p (type1, type))
2997 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
2998 gimple_seq_add_stmt_without_update (&seq, g);
2999 exp = gimple_assign_lhs (g);
3001 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3002 (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3003 op, exp);
3004 gimple_seq_add_stmt_without_update (&seq, g);
3005 op = gimple_assign_lhs (g);
3007 candidates.pop ();
3008 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3009 candidates.length (), opcode, ops, op,
3010 seq, true, ranges[k - 1].low,
3011 ranges[k - 1].low, strict_overflow_p))
3012 any_changes = true;
3013 else
3014 gimple_seq_discard (seq);
3017 return any_changes;
3020 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3021 a >= 0 && a < b into (unsigned) a < (unsigned) b
3022 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3024 static bool
3025 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3026 vec<operand_entry *> *ops,
3027 struct range_entry *ranges)
3029 int i;
3030 bool any_changes = false;
3031 hash_map<tree, int> *map = NULL;
3033 for (i = first; i < length; i++)
3035 if (ranges[i].exp == NULL_TREE
3036 || TREE_CODE (ranges[i].exp) != SSA_NAME
3037 || !ranges[i].in_p)
3038 continue;
3040 tree type = TREE_TYPE (ranges[i].exp);
3041 if (!INTEGRAL_TYPE_P (type)
3042 || TYPE_UNSIGNED (type)
3043 || ranges[i].low == NULL_TREE
3044 || !integer_zerop (ranges[i].low)
3045 || ranges[i].high != NULL_TREE)
3046 continue;
3047 /* EXP >= 0 here. */
3048 if (map == NULL)
3049 map = new hash_map <tree, int>;
3050 map->put (ranges[i].exp, i);
3053 if (map == NULL)
3054 return false;
3056 for (i = 0; i < length; i++)
3058 bool in_p = ranges[i].in_p;
3059 if (ranges[i].low == NULL_TREE
3060 || ranges[i].high == NULL_TREE)
3061 continue;
3062 if (!integer_zerop (ranges[i].low)
3063 || !integer_zerop (ranges[i].high))
3065 if (ranges[i].exp
3066 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3067 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3068 && integer_onep (ranges[i].low)
3069 && integer_onep (ranges[i].high))
3070 in_p = !in_p;
3071 else
3072 continue;
3075 gimple *stmt;
3076 tree_code ccode;
3077 tree rhs1, rhs2;
3078 if (ranges[i].exp)
3080 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3081 continue;
3082 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3083 if (!is_gimple_assign (stmt))
3084 continue;
3085 ccode = gimple_assign_rhs_code (stmt);
3086 rhs1 = gimple_assign_rhs1 (stmt);
3087 rhs2 = gimple_assign_rhs2 (stmt);
3089 else
3091 operand_entry *oe = (*ops)[ranges[i].idx];
3092 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3093 if (gimple_code (stmt) != GIMPLE_COND)
3094 continue;
3095 ccode = gimple_cond_code (stmt);
3096 rhs1 = gimple_cond_lhs (stmt);
3097 rhs2 = gimple_cond_rhs (stmt);
3100 if (TREE_CODE (rhs1) != SSA_NAME
3101 || rhs2 == NULL_TREE
3102 || TREE_CODE (rhs2) != SSA_NAME)
3103 continue;
3105 switch (ccode)
3107 case GT_EXPR:
3108 case GE_EXPR:
3109 case LT_EXPR:
3110 case LE_EXPR:
3111 break;
3112 default:
3113 continue;
3115 if (in_p)
3116 ccode = invert_tree_comparison (ccode, false);
3117 switch (ccode)
3119 case GT_EXPR:
3120 case GE_EXPR:
3121 std::swap (rhs1, rhs2);
3122 ccode = swap_tree_comparison (ccode);
3123 break;
3124 case LT_EXPR:
3125 case LE_EXPR:
3126 break;
3127 default:
3128 gcc_unreachable ();
3131 int *idx = map->get (rhs1);
3132 if (idx == NULL)
3133 continue;
3135 wide_int nz = get_nonzero_bits (rhs2);
3136 if (wi::neg_p (nz))
3137 continue;
3139 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3140 and RHS2 is known to be RHS2 >= 0. */
3141 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3143 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3144 if ((ranges[*idx].strict_overflow_p
3145 || ranges[i].strict_overflow_p)
3146 && issue_strict_overflow_warning (wc))
3147 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3148 "assuming signed overflow does not occur "
3149 "when simplifying range test");
3151 if (dump_file && (dump_flags & TDF_DETAILS))
3153 struct range_entry *r = &ranges[*idx];
3154 fprintf (dump_file, "Optimizing range test ");
3155 print_generic_expr (dump_file, r->exp);
3156 fprintf (dump_file, " +[");
3157 print_generic_expr (dump_file, r->low);
3158 fprintf (dump_file, ", ");
3159 print_generic_expr (dump_file, r->high);
3160 fprintf (dump_file, "] and comparison ");
3161 print_generic_expr (dump_file, rhs1);
3162 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3163 print_generic_expr (dump_file, rhs2);
3164 fprintf (dump_file, "\n into (");
3165 print_generic_expr (dump_file, utype);
3166 fprintf (dump_file, ") ");
3167 print_generic_expr (dump_file, rhs1);
3168 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3169 print_generic_expr (dump_file, utype);
3170 fprintf (dump_file, ") ");
3171 print_generic_expr (dump_file, rhs2);
3172 fprintf (dump_file, "\n");
3175 operand_entry *oe = (*ops)[ranges[i].idx];
3176 ranges[i].in_p = 0;
3177 if (opcode == BIT_IOR_EXPR
3178 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3180 ranges[i].in_p = 1;
3181 ccode = invert_tree_comparison (ccode, false);
3184 unsigned int uid = gimple_uid (stmt);
3185 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3186 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3187 gimple_set_uid (g, uid);
3188 rhs1 = gimple_assign_lhs (g);
3189 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3190 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3191 gimple_set_uid (g, uid);
3192 rhs2 = gimple_assign_lhs (g);
3193 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3194 if (tree_swap_operands_p (rhs1, rhs2))
3196 std::swap (rhs1, rhs2);
3197 ccode = swap_tree_comparison (ccode);
3199 if (gimple_code (stmt) == GIMPLE_COND)
3201 gcond *c = as_a <gcond *> (stmt);
3202 gimple_cond_set_code (c, ccode);
3203 gimple_cond_set_lhs (c, rhs1);
3204 gimple_cond_set_rhs (c, rhs2);
3205 update_stmt (stmt);
3207 else
3209 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3210 if (!INTEGRAL_TYPE_P (ctype)
3211 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3212 && TYPE_PRECISION (ctype) != 1))
3213 ctype = boolean_type_node;
3214 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3215 gimple_set_uid (g, uid);
3216 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3217 if (oe->op && ctype != TREE_TYPE (oe->op))
3219 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3220 NOP_EXPR, gimple_assign_lhs (g));
3221 gimple_set_uid (g, uid);
3222 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3224 ranges[i].exp = gimple_assign_lhs (g);
3225 oe->op = ranges[i].exp;
3226 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3227 ranges[i].high = ranges[i].low;
3229 ranges[i].strict_overflow_p = false;
3230 oe = (*ops)[ranges[*idx].idx];
3231 /* Now change all the other range test immediate uses, so that
3232 those tests will be optimized away. */
3233 if (opcode == ERROR_MARK)
3235 if (oe->op)
3236 oe->op = build_int_cst (TREE_TYPE (oe->op),
3237 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3238 else
3239 oe->op = (oe->rank == BIT_IOR_EXPR
3240 ? boolean_false_node : boolean_true_node);
3242 else
3243 oe->op = error_mark_node;
3244 ranges[*idx].exp = NULL_TREE;
3245 ranges[*idx].low = NULL_TREE;
3246 ranges[*idx].high = NULL_TREE;
3247 any_changes = true;
3250 delete map;
3251 return any_changes;
3254 /* Optimize range tests, similarly how fold_range_test optimizes
3255 it on trees. The tree code for the binary
3256 operation between all the operands is OPCODE.
3257 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3258 maybe_optimize_range_tests for inter-bb range optimization.
3259 In that case if oe->op is NULL, oe->id is bb->index whose
3260 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3261 the actual opcode. */
3263 static bool
3264 optimize_range_tests (enum tree_code opcode,
3265 vec<operand_entry *> *ops)
3267 unsigned int length = ops->length (), i, j, first;
3268 operand_entry *oe;
3269 struct range_entry *ranges;
3270 bool any_changes = false;
3272 if (length == 1)
3273 return false;
3275 ranges = XNEWVEC (struct range_entry, length);
3276 for (i = 0; i < length; i++)
3278 oe = (*ops)[i];
3279 ranges[i].idx = i;
3280 init_range_entry (ranges + i, oe->op,
3281 oe->op
3282 ? NULL
3283 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3284 /* For | invert it now, we will invert it again before emitting
3285 the optimized expression. */
3286 if (opcode == BIT_IOR_EXPR
3287 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3288 ranges[i].in_p = !ranges[i].in_p;
3291 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3292 for (i = 0; i < length; i++)
3293 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3294 break;
3296 /* Try to merge ranges. */
3297 for (first = i; i < length; i++)
3299 tree low = ranges[i].low;
3300 tree high = ranges[i].high;
3301 int in_p = ranges[i].in_p;
3302 bool strict_overflow_p = ranges[i].strict_overflow_p;
3303 int update_fail_count = 0;
3305 for (j = i + 1; j < length; j++)
3307 if (ranges[i].exp != ranges[j].exp)
3308 break;
3309 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3310 ranges[j].in_p, ranges[j].low, ranges[j].high))
3311 break;
3312 strict_overflow_p |= ranges[j].strict_overflow_p;
3315 if (j == i + 1)
3316 continue;
3318 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3319 opcode, ops, ranges[i].exp, NULL, in_p,
3320 low, high, strict_overflow_p))
3322 i = j - 1;
3323 any_changes = true;
3325 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3326 while update_range_test would fail. */
3327 else if (update_fail_count == 64)
3328 i = j - 1;
3329 else
3330 ++update_fail_count;
3333 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3334 ops, ranges);
3336 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3337 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3338 ops, ranges);
3339 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3340 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3341 ops, ranges);
3342 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3343 ops, ranges);
3344 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3345 ranges);
3347 if (any_changes && opcode != ERROR_MARK)
3349 j = 0;
3350 FOR_EACH_VEC_ELT (*ops, i, oe)
3352 if (oe->op == error_mark_node)
3353 continue;
3354 else if (i != j)
3355 (*ops)[j] = oe;
3356 j++;
3358 ops->truncate (j);
3361 XDELETEVEC (ranges);
3362 return any_changes;
3365 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3366 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3367 otherwise the comparison code. */
3369 static tree_code
3370 ovce_extract_ops (tree var, gassign **rets, bool *reti)
3372 if (TREE_CODE (var) != SSA_NAME)
3373 return ERROR_MARK;
3375 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3376 if (stmt == NULL)
3377 return ERROR_MARK;
3379 /* ??? If we start creating more COND_EXPR, we could perform
3380 this same optimization with them. For now, simplify. */
3381 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3382 return ERROR_MARK;
3384 tree cond = gimple_assign_rhs1 (stmt);
3385 tree_code cmp = TREE_CODE (cond);
3386 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3387 return ERROR_MARK;
3389 /* ??? For now, allow only canonical true and false result vectors.
3390 We could expand this to other constants should the need arise,
3391 but at the moment we don't create them. */
3392 tree t = gimple_assign_rhs2 (stmt);
3393 tree f = gimple_assign_rhs3 (stmt);
3394 bool inv;
3395 if (integer_all_onesp (t))
3396 inv = false;
3397 else if (integer_all_onesp (f))
3399 cmp = invert_tree_comparison (cmp, false);
3400 inv = true;
3402 else
3403 return ERROR_MARK;
3404 if (!integer_zerop (f))
3405 return ERROR_MARK;
3407 /* Success! */
3408 if (rets)
3409 *rets = stmt;
3410 if (reti)
3411 *reti = inv;
3412 return cmp;
3415 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3416 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3418 static bool
3419 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3421 unsigned int length = ops->length (), i, j;
3422 bool any_changes = false;
3424 if (length == 1)
3425 return false;
3427 for (i = 0; i < length; ++i)
3429 tree elt0 = (*ops)[i]->op;
3431 gassign *stmt0;
3432 bool invert;
3433 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3434 if (cmp0 == ERROR_MARK)
3435 continue;
3437 for (j = i + 1; j < length; ++j)
3439 tree &elt1 = (*ops)[j]->op;
3441 gassign *stmt1;
3442 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3443 if (cmp1 == ERROR_MARK)
3444 continue;
3446 tree cond0 = gimple_assign_rhs1 (stmt0);
3447 tree x0 = TREE_OPERAND (cond0, 0);
3448 tree y0 = TREE_OPERAND (cond0, 1);
3450 tree cond1 = gimple_assign_rhs1 (stmt1);
3451 tree x1 = TREE_OPERAND (cond1, 0);
3452 tree y1 = TREE_OPERAND (cond1, 1);
3454 tree comb;
3455 if (opcode == BIT_AND_EXPR)
3456 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3457 else if (opcode == BIT_IOR_EXPR)
3458 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3459 else
3460 gcc_unreachable ();
3461 if (comb == NULL)
3462 continue;
3464 /* Success! */
3465 if (dump_file && (dump_flags & TDF_DETAILS))
3467 fprintf (dump_file, "Transforming ");
3468 print_generic_expr (dump_file, cond0);
3469 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3470 print_generic_expr (dump_file, cond1);
3471 fprintf (dump_file, " into ");
3472 print_generic_expr (dump_file, comb);
3473 fputc ('\n', dump_file);
3476 gimple_assign_set_rhs1 (stmt0, comb);
3477 if (invert)
3478 std::swap (*gimple_assign_rhs2_ptr (stmt0),
3479 *gimple_assign_rhs3_ptr (stmt0));
3480 update_stmt (stmt0);
3482 elt1 = error_mark_node;
3483 any_changes = true;
3487 if (any_changes)
3489 operand_entry *oe;
3490 j = 0;
3491 FOR_EACH_VEC_ELT (*ops, i, oe)
3493 if (oe->op == error_mark_node)
3494 continue;
3495 else if (i != j)
3496 (*ops)[j] = oe;
3497 j++;
3499 ops->truncate (j);
3502 return any_changes;
3505 /* Return true if STMT is a cast like:
3506 <bb N>:
3508 _123 = (int) _234;
3510 <bb M>:
3511 # _345 = PHI <_123(N), 1(...), 1(...)>
3512 where _234 has bool type, _123 has single use and
3513 bb N has a single successor M. This is commonly used in
3514 the last block of a range test.
3516 Also Return true if STMT is tcc_compare like:
3517 <bb N>:
3519 _234 = a_2(D) == 2;
3521 <bb M>:
3522 # _345 = PHI <_234(N), 1(...), 1(...)>
3523 _346 = (int) _345;
3524 where _234 has booltype, single use and
3525 bb N has a single successor M. This is commonly used in
3526 the last block of a range test. */
3528 static bool
3529 final_range_test_p (gimple *stmt)
3531 basic_block bb, rhs_bb, lhs_bb;
3532 edge e;
3533 tree lhs, rhs;
3534 use_operand_p use_p;
3535 gimple *use_stmt;
3537 if (!gimple_assign_cast_p (stmt)
3538 && (!is_gimple_assign (stmt)
3539 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3540 != tcc_comparison)))
3541 return false;
3542 bb = gimple_bb (stmt);
3543 if (!single_succ_p (bb))
3544 return false;
3545 e = single_succ_edge (bb);
3546 if (e->flags & EDGE_COMPLEX)
3547 return false;
3549 lhs = gimple_assign_lhs (stmt);
3550 rhs = gimple_assign_rhs1 (stmt);
3551 if (gimple_assign_cast_p (stmt)
3552 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3553 || TREE_CODE (rhs) != SSA_NAME
3554 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3555 return false;
3557 if (!gimple_assign_cast_p (stmt)
3558 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3559 return false;
3561 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3562 if (!single_imm_use (lhs, &use_p, &use_stmt))
3563 return false;
3565 if (gimple_code (use_stmt) != GIMPLE_PHI
3566 || gimple_bb (use_stmt) != e->dest)
3567 return false;
3569 /* And that the rhs is defined in the same loop. */
3570 if (gimple_assign_cast_p (stmt))
3572 if (TREE_CODE (rhs) != SSA_NAME
3573 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3574 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3575 return false;
3577 else
3579 if (TREE_CODE (lhs) != SSA_NAME
3580 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3581 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3582 return false;
3585 return true;
3588 /* Return true if BB is suitable basic block for inter-bb range test
3589 optimization. If BACKWARD is true, BB should be the only predecessor
3590 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3591 or compared with to find a common basic block to which all conditions
3592 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3593 be the only predecessor of BB. */
3595 static bool
3596 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3597 bool backward)
3599 edge_iterator ei, ei2;
3600 edge e, e2;
3601 gimple *stmt;
3602 gphi_iterator gsi;
3603 bool other_edge_seen = false;
3604 bool is_cond;
3606 if (test_bb == bb)
3607 return false;
3608 /* Check last stmt first. */
3609 stmt = last_stmt (bb);
3610 if (stmt == NULL
3611 || (gimple_code (stmt) != GIMPLE_COND
3612 && (backward || !final_range_test_p (stmt)))
3613 || gimple_visited_p (stmt)
3614 || stmt_could_throw_p (stmt)
3615 || *other_bb == bb)
3616 return false;
3617 is_cond = gimple_code (stmt) == GIMPLE_COND;
3618 if (is_cond)
3620 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3621 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3622 to *OTHER_BB (if not set yet, try to find it out). */
3623 if (EDGE_COUNT (bb->succs) != 2)
3624 return false;
3625 FOR_EACH_EDGE (e, ei, bb->succs)
3627 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3628 return false;
3629 if (e->dest == test_bb)
3631 if (backward)
3632 continue;
3633 else
3634 return false;
3636 if (e->dest == bb)
3637 return false;
3638 if (*other_bb == NULL)
3640 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3641 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3642 return false;
3643 else if (e->dest == e2->dest)
3644 *other_bb = e->dest;
3645 if (*other_bb == NULL)
3646 return false;
3648 if (e->dest == *other_bb)
3649 other_edge_seen = true;
3650 else if (backward)
3651 return false;
3653 if (*other_bb == NULL || !other_edge_seen)
3654 return false;
3656 else if (single_succ (bb) != *other_bb)
3657 return false;
3659 /* Now check all PHIs of *OTHER_BB. */
3660 e = find_edge (bb, *other_bb);
3661 e2 = find_edge (test_bb, *other_bb);
3662 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3664 gphi *phi = gsi.phi ();
3665 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3666 corresponding to BB and TEST_BB predecessor must be the same. */
3667 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3668 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3670 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3671 one of the PHIs should have the lhs of the last stmt in
3672 that block as PHI arg and that PHI should have 0 or 1
3673 corresponding to it in all other range test basic blocks
3674 considered. */
3675 if (!is_cond)
3677 if (gimple_phi_arg_def (phi, e->dest_idx)
3678 == gimple_assign_lhs (stmt)
3679 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3680 || integer_onep (gimple_phi_arg_def (phi,
3681 e2->dest_idx))))
3682 continue;
3684 else
3686 gimple *test_last = last_stmt (test_bb);
3687 if (gimple_code (test_last) != GIMPLE_COND
3688 && gimple_phi_arg_def (phi, e2->dest_idx)
3689 == gimple_assign_lhs (test_last)
3690 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3691 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3692 continue;
3695 return false;
3698 return true;
3701 /* Return true if BB doesn't have side-effects that would disallow
3702 range test optimization, all SSA_NAMEs set in the bb are consumed
3703 in the bb and there are no PHIs. */
3705 static bool
3706 no_side_effect_bb (basic_block bb)
3708 gimple_stmt_iterator gsi;
3709 gimple *last;
3711 if (!gimple_seq_empty_p (phi_nodes (bb)))
3712 return false;
3713 last = last_stmt (bb);
3714 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3716 gimple *stmt = gsi_stmt (gsi);
3717 tree lhs;
3718 imm_use_iterator imm_iter;
3719 use_operand_p use_p;
3721 if (is_gimple_debug (stmt))
3722 continue;
3723 if (gimple_has_side_effects (stmt))
3724 return false;
3725 if (stmt == last)
3726 return true;
3727 if (!is_gimple_assign (stmt))
3728 return false;
3729 lhs = gimple_assign_lhs (stmt);
3730 if (TREE_CODE (lhs) != SSA_NAME)
3731 return false;
3732 if (gimple_assign_rhs_could_trap_p (stmt))
3733 return false;
3734 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3736 gimple *use_stmt = USE_STMT (use_p);
3737 if (is_gimple_debug (use_stmt))
3738 continue;
3739 if (gimple_bb (use_stmt) != bb)
3740 return false;
3743 return false;
3746 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3747 return true and fill in *OPS recursively. */
3749 static bool
3750 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3751 struct loop *loop)
3753 gimple *stmt = SSA_NAME_DEF_STMT (var);
3754 tree rhs[2];
3755 int i;
3757 if (!is_reassociable_op (stmt, code, loop))
3758 return false;
3760 rhs[0] = gimple_assign_rhs1 (stmt);
3761 rhs[1] = gimple_assign_rhs2 (stmt);
3762 gimple_set_visited (stmt, true);
3763 for (i = 0; i < 2; i++)
3764 if (TREE_CODE (rhs[i]) == SSA_NAME
3765 && !get_ops (rhs[i], code, ops, loop)
3766 && has_single_use (rhs[i]))
3768 operand_entry *oe = operand_entry_pool.allocate ();
3770 oe->op = rhs[i];
3771 oe->rank = code;
3772 oe->id = 0;
3773 oe->count = 1;
3774 oe->stmt_to_insert = NULL;
3775 ops->safe_push (oe);
3777 return true;
3780 /* Find the ops that were added by get_ops starting from VAR, see if
3781 they were changed during update_range_test and if yes, create new
3782 stmts. */
3784 static tree
3785 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3786 unsigned int *pidx, struct loop *loop)
3788 gimple *stmt = SSA_NAME_DEF_STMT (var);
3789 tree rhs[4];
3790 int i;
3792 if (!is_reassociable_op (stmt, code, loop))
3793 return NULL;
3795 rhs[0] = gimple_assign_rhs1 (stmt);
3796 rhs[1] = gimple_assign_rhs2 (stmt);
3797 rhs[2] = rhs[0];
3798 rhs[3] = rhs[1];
3799 for (i = 0; i < 2; i++)
3800 if (TREE_CODE (rhs[i]) == SSA_NAME)
3802 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3803 if (rhs[2 + i] == NULL_TREE)
3805 if (has_single_use (rhs[i]))
3806 rhs[2 + i] = ops[(*pidx)++]->op;
3807 else
3808 rhs[2 + i] = rhs[i];
3811 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3812 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3814 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3815 var = make_ssa_name (TREE_TYPE (var));
3816 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3817 rhs[2], rhs[3]);
3818 gimple_set_uid (g, gimple_uid (stmt));
3819 gimple_set_visited (g, true);
3820 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3822 return var;
3825 /* Structure to track the initial value passed to get_ops and
3826 the range in the ops vector for each basic block. */
3828 struct inter_bb_range_test_entry
3830 tree op;
3831 unsigned int first_idx, last_idx;
3834 /* Inter-bb range test optimization.
3836 Returns TRUE if a gimple conditional is optimized to a true/false,
3837 otherwise return FALSE.
3839 This indicates to the caller that it should run a CFG cleanup pass
3840 once reassociation is completed. */
3842 static bool
3843 maybe_optimize_range_tests (gimple *stmt)
3845 basic_block first_bb = gimple_bb (stmt);
3846 basic_block last_bb = first_bb;
3847 basic_block other_bb = NULL;
3848 basic_block bb;
3849 edge_iterator ei;
3850 edge e;
3851 auto_vec<operand_entry *> ops;
3852 auto_vec<inter_bb_range_test_entry> bbinfo;
3853 bool any_changes = false;
3854 bool cfg_cleanup_needed = false;
3856 /* Consider only basic blocks that end with GIMPLE_COND or
3857 a cast statement satisfying final_range_test_p. All
3858 but the last bb in the first_bb .. last_bb range
3859 should end with GIMPLE_COND. */
3860 if (gimple_code (stmt) == GIMPLE_COND)
3862 if (EDGE_COUNT (first_bb->succs) != 2)
3863 return cfg_cleanup_needed;
3865 else if (final_range_test_p (stmt))
3866 other_bb = single_succ (first_bb);
3867 else
3868 return cfg_cleanup_needed;
3870 if (stmt_could_throw_p (stmt))
3871 return cfg_cleanup_needed;
3873 /* As relative ordering of post-dominator sons isn't fixed,
3874 maybe_optimize_range_tests can be called first on any
3875 bb in the range we want to optimize. So, start searching
3876 backwards, if first_bb can be set to a predecessor. */
3877 while (single_pred_p (first_bb))
3879 basic_block pred_bb = single_pred (first_bb);
3880 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3881 break;
3882 if (!no_side_effect_bb (first_bb))
3883 break;
3884 first_bb = pred_bb;
3886 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3887 Before starting forward search in last_bb successors, find
3888 out the other_bb. */
3889 if (first_bb == last_bb)
3891 other_bb = NULL;
3892 /* As non-GIMPLE_COND last stmt always terminates the range,
3893 if forward search didn't discover anything, just give up. */
3894 if (gimple_code (stmt) != GIMPLE_COND)
3895 return cfg_cleanup_needed;
3896 /* Look at both successors. Either it ends with a GIMPLE_COND
3897 and satisfies suitable_cond_bb, or ends with a cast and
3898 other_bb is that cast's successor. */
3899 FOR_EACH_EDGE (e, ei, first_bb->succs)
3900 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3901 || e->dest == first_bb)
3902 return cfg_cleanup_needed;
3903 else if (single_pred_p (e->dest))
3905 stmt = last_stmt (e->dest);
3906 if (stmt
3907 && gimple_code (stmt) == GIMPLE_COND
3908 && EDGE_COUNT (e->dest->succs) == 2)
3910 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3911 break;
3912 else
3913 other_bb = NULL;
3915 else if (stmt
3916 && final_range_test_p (stmt)
3917 && find_edge (first_bb, single_succ (e->dest)))
3919 other_bb = single_succ (e->dest);
3920 if (other_bb == first_bb)
3921 other_bb = NULL;
3924 if (other_bb == NULL)
3925 return cfg_cleanup_needed;
3927 /* Now do the forward search, moving last_bb to successor bbs
3928 that aren't other_bb. */
3929 while (EDGE_COUNT (last_bb->succs) == 2)
3931 FOR_EACH_EDGE (e, ei, last_bb->succs)
3932 if (e->dest != other_bb)
3933 break;
3934 if (e == NULL)
3935 break;
3936 if (!single_pred_p (e->dest))
3937 break;
3938 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3939 break;
3940 if (!no_side_effect_bb (e->dest))
3941 break;
3942 last_bb = e->dest;
3944 if (first_bb == last_bb)
3945 return cfg_cleanup_needed;
3946 /* Here basic blocks first_bb through last_bb's predecessor
3947 end with GIMPLE_COND, all of them have one of the edges to
3948 other_bb and another to another block in the range,
3949 all blocks except first_bb don't have side-effects and
3950 last_bb ends with either GIMPLE_COND, or cast satisfying
3951 final_range_test_p. */
3952 for (bb = last_bb; ; bb = single_pred (bb))
3954 enum tree_code code;
3955 tree lhs, rhs;
3956 inter_bb_range_test_entry bb_ent;
3958 bb_ent.op = NULL_TREE;
3959 bb_ent.first_idx = ops.length ();
3960 bb_ent.last_idx = bb_ent.first_idx;
3961 e = find_edge (bb, other_bb);
3962 stmt = last_stmt (bb);
3963 gimple_set_visited (stmt, true);
3964 if (gimple_code (stmt) != GIMPLE_COND)
3966 use_operand_p use_p;
3967 gimple *phi;
3968 edge e2;
3969 unsigned int d;
3971 lhs = gimple_assign_lhs (stmt);
3972 rhs = gimple_assign_rhs1 (stmt);
3973 gcc_assert (bb == last_bb);
3975 /* stmt is
3976 _123 = (int) _234;
3978 _234 = a_2(D) == 2;
3980 followed by:
3981 <bb M>:
3982 # _345 = PHI <_123(N), 1(...), 1(...)>
3984 or 0 instead of 1. If it is 0, the _234
3985 range test is anded together with all the
3986 other range tests, if it is 1, it is ored with
3987 them. */
3988 single_imm_use (lhs, &use_p, &phi);
3989 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3990 e2 = find_edge (first_bb, other_bb);
3991 d = e2->dest_idx;
3992 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3993 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3994 code = BIT_AND_EXPR;
3995 else
3997 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3998 code = BIT_IOR_EXPR;
4001 /* If _234 SSA_NAME_DEF_STMT is
4002 _234 = _567 | _789;
4003 (or &, corresponding to 1/0 in the phi arguments,
4004 push into ops the individual range test arguments
4005 of the bitwise or resp. and, recursively. */
4006 if (TREE_CODE (rhs) == SSA_NAME
4007 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4008 != tcc_comparison)
4009 && !get_ops (rhs, code, &ops,
4010 loop_containing_stmt (stmt))
4011 && has_single_use (rhs))
4013 /* Otherwise, push the _234 range test itself. */
4014 operand_entry *oe = operand_entry_pool.allocate ();
4016 oe->op = rhs;
4017 oe->rank = code;
4018 oe->id = 0;
4019 oe->count = 1;
4020 oe->stmt_to_insert = NULL;
4021 ops.safe_push (oe);
4022 bb_ent.last_idx++;
4023 bb_ent.op = rhs;
4025 else if (is_gimple_assign (stmt)
4026 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4027 == tcc_comparison)
4028 && !get_ops (lhs, code, &ops,
4029 loop_containing_stmt (stmt))
4030 && has_single_use (lhs))
4032 operand_entry *oe = operand_entry_pool.allocate ();
4033 oe->op = lhs;
4034 oe->rank = code;
4035 oe->id = 0;
4036 oe->count = 1;
4037 ops.safe_push (oe);
4038 bb_ent.last_idx++;
4039 bb_ent.op = lhs;
4041 else
4043 bb_ent.last_idx = ops.length ();
4044 bb_ent.op = rhs;
4046 bbinfo.safe_push (bb_ent);
4047 continue;
4049 /* Otherwise stmt is GIMPLE_COND. */
4050 code = gimple_cond_code (stmt);
4051 lhs = gimple_cond_lhs (stmt);
4052 rhs = gimple_cond_rhs (stmt);
4053 if (TREE_CODE (lhs) == SSA_NAME
4054 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4055 && ((code != EQ_EXPR && code != NE_EXPR)
4056 || rhs != boolean_false_node
4057 /* Either push into ops the individual bitwise
4058 or resp. and operands, depending on which
4059 edge is other_bb. */
4060 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4061 ^ (code == EQ_EXPR))
4062 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4063 loop_containing_stmt (stmt))))
4065 /* Or push the GIMPLE_COND stmt itself. */
4066 operand_entry *oe = operand_entry_pool.allocate ();
4068 oe->op = NULL;
4069 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4070 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4071 /* oe->op = NULL signs that there is no SSA_NAME
4072 for the range test, and oe->id instead is the
4073 basic block number, at which's end the GIMPLE_COND
4074 is. */
4075 oe->id = bb->index;
4076 oe->count = 1;
4077 oe->stmt_to_insert = NULL;
4078 ops.safe_push (oe);
4079 bb_ent.op = NULL;
4080 bb_ent.last_idx++;
4082 else if (ops.length () > bb_ent.first_idx)
4084 bb_ent.op = lhs;
4085 bb_ent.last_idx = ops.length ();
4087 bbinfo.safe_push (bb_ent);
4088 if (bb == first_bb)
4089 break;
4091 if (ops.length () > 1)
4092 any_changes = optimize_range_tests (ERROR_MARK, &ops);
4093 if (any_changes)
4095 unsigned int idx, max_idx = 0;
4096 /* update_ops relies on has_single_use predicates returning the
4097 same values as it did during get_ops earlier. Additionally it
4098 never removes statements, only adds new ones and it should walk
4099 from the single imm use and check the predicate already before
4100 making those changes.
4101 On the other side, the handling of GIMPLE_COND directly can turn
4102 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4103 it needs to be done in a separate loop afterwards. */
4104 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4106 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4107 && bbinfo[idx].op != NULL_TREE)
4109 tree new_op;
4111 max_idx = idx;
4112 stmt = last_stmt (bb);
4113 new_op = update_ops (bbinfo[idx].op,
4114 (enum tree_code)
4115 ops[bbinfo[idx].first_idx]->rank,
4116 ops, &bbinfo[idx].first_idx,
4117 loop_containing_stmt (stmt));
4118 if (new_op == NULL_TREE)
4120 gcc_assert (bb == last_bb);
4121 new_op = ops[bbinfo[idx].first_idx++]->op;
4123 if (bbinfo[idx].op != new_op)
4125 imm_use_iterator iter;
4126 use_operand_p use_p;
4127 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4129 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4130 if (is_gimple_debug (use_stmt))
4131 continue;
4132 else if (gimple_code (use_stmt) == GIMPLE_COND
4133 || gimple_code (use_stmt) == GIMPLE_PHI)
4134 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4135 SET_USE (use_p, new_op);
4136 else if ((is_gimple_assign (use_stmt)
4137 && (TREE_CODE_CLASS
4138 (gimple_assign_rhs_code (use_stmt))
4139 == tcc_comparison)))
4140 cast_or_tcc_cmp_stmt = use_stmt;
4141 else if (gimple_assign_cast_p (use_stmt))
4142 cast_or_tcc_cmp_stmt = use_stmt;
4143 else
4144 gcc_unreachable ();
4146 if (cast_or_tcc_cmp_stmt)
4148 gcc_assert (bb == last_bb);
4149 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4150 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4151 enum tree_code rhs_code
4152 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4153 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4154 : CONVERT_EXPR;
4155 gassign *g;
4156 if (is_gimple_min_invariant (new_op))
4158 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4159 g = gimple_build_assign (new_lhs, new_op);
4161 else
4162 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4163 gimple_stmt_iterator gsi
4164 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4165 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4166 gimple_set_visited (g, true);
4167 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4168 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4169 if (is_gimple_debug (use_stmt))
4170 continue;
4171 else if (gimple_code (use_stmt) == GIMPLE_COND
4172 || gimple_code (use_stmt) == GIMPLE_PHI)
4173 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4174 SET_USE (use_p, new_lhs);
4175 else
4176 gcc_unreachable ();
4180 if (bb == first_bb)
4181 break;
4183 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4185 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4186 && bbinfo[idx].op == NULL_TREE
4187 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4189 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4191 if (idx > max_idx)
4192 max_idx = idx;
4194 /* If we collapse the conditional to a true/false
4195 condition, then bubble that knowledge up to our caller. */
4196 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4198 gimple_cond_make_false (cond_stmt);
4199 cfg_cleanup_needed = true;
4201 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4203 gimple_cond_make_true (cond_stmt);
4204 cfg_cleanup_needed = true;
4206 else
4208 gimple_cond_set_code (cond_stmt, NE_EXPR);
4209 gimple_cond_set_lhs (cond_stmt,
4210 ops[bbinfo[idx].first_idx]->op);
4211 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4213 update_stmt (cond_stmt);
4215 if (bb == first_bb)
4216 break;
4219 /* The above changes could result in basic blocks after the first
4220 modified one, up to and including last_bb, to be executed even if
4221 they would not be in the original program. If the value ranges of
4222 assignment lhs' in those bbs were dependent on the conditions
4223 guarding those basic blocks which now can change, the VRs might
4224 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4225 are only used within the same bb, it should be not a big deal if
4226 we just reset all the VRs in those bbs. See PR68671. */
4227 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4228 reset_flow_sensitive_info_in_bb (bb);
4230 return cfg_cleanup_needed;
4233 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4234 of STMT in it's operands. This is also known as a "destructive
4235 update" operation. */
4237 static bool
4238 is_phi_for_stmt (gimple *stmt, tree operand)
4240 gimple *def_stmt;
4241 gphi *def_phi;
4242 tree lhs;
4243 use_operand_p arg_p;
4244 ssa_op_iter i;
4246 if (TREE_CODE (operand) != SSA_NAME)
4247 return false;
4249 lhs = gimple_assign_lhs (stmt);
4251 def_stmt = SSA_NAME_DEF_STMT (operand);
4252 def_phi = dyn_cast <gphi *> (def_stmt);
4253 if (!def_phi)
4254 return false;
4256 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4257 if (lhs == USE_FROM_PTR (arg_p))
4258 return true;
4259 return false;
4262 /* Remove def stmt of VAR if VAR has zero uses and recurse
4263 on rhs1 operand if so. */
4265 static void
4266 remove_visited_stmt_chain (tree var)
4268 gimple *stmt;
4269 gimple_stmt_iterator gsi;
4271 while (1)
4273 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4274 return;
4275 stmt = SSA_NAME_DEF_STMT (var);
4276 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4278 var = gimple_assign_rhs1 (stmt);
4279 gsi = gsi_for_stmt (stmt);
4280 reassoc_remove_stmt (&gsi);
4281 release_defs (stmt);
4283 else
4284 return;
4288 /* This function checks three consequtive operands in
4289 passed operands vector OPS starting from OPINDEX and
4290 swaps two operands if it is profitable for binary operation
4291 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4293 We pair ops with the same rank if possible.
4295 The alternative we try is to see if STMT is a destructive
4296 update style statement, which is like:
4297 b = phi (a, ...)
4298 a = c + b;
4299 In that case, we want to use the destructive update form to
4300 expose the possible vectorizer sum reduction opportunity.
4301 In that case, the third operand will be the phi node. This
4302 check is not performed if STMT is null.
4304 We could, of course, try to be better as noted above, and do a
4305 lot of work to try to find these opportunities in >3 operand
4306 cases, but it is unlikely to be worth it. */
4308 static void
4309 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4310 unsigned int opindex, gimple *stmt)
4312 operand_entry *oe1, *oe2, *oe3;
4314 oe1 = ops[opindex];
4315 oe2 = ops[opindex + 1];
4316 oe3 = ops[opindex + 2];
4318 if ((oe1->rank == oe2->rank
4319 && oe2->rank != oe3->rank)
4320 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4321 && !is_phi_for_stmt (stmt, oe1->op)
4322 && !is_phi_for_stmt (stmt, oe2->op)))
4323 std::swap (*oe1, *oe3);
4324 else if ((oe1->rank == oe3->rank
4325 && oe2->rank != oe3->rank)
4326 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4327 && !is_phi_for_stmt (stmt, oe1->op)
4328 && !is_phi_for_stmt (stmt, oe3->op)))
4329 std::swap (*oe1, *oe2);
4332 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4333 two definitions, otherwise return STMT. */
4335 static inline gimple *
4336 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4338 if (TREE_CODE (rhs1) == SSA_NAME
4339 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4340 stmt = SSA_NAME_DEF_STMT (rhs1);
4341 if (TREE_CODE (rhs2) == SSA_NAME
4342 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4343 stmt = SSA_NAME_DEF_STMT (rhs2);
4344 return stmt;
4347 /* If the stmt that defines operand has to be inserted, insert it
4348 before the use. */
4349 static void
4350 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4352 gcc_assert (is_gimple_assign (stmt_to_insert));
4353 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4354 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4355 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4356 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4357 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4359 /* If the insert point is not stmt, then insert_point would be
4360 the point where operand rhs1 or rhs2 is defined. In this case,
4361 stmt_to_insert has to be inserted afterwards. This would
4362 only happen when the stmt insertion point is flexible. */
4363 if (stmt == insert_point)
4364 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4365 else
4366 insert_stmt_after (stmt_to_insert, insert_point);
4370 /* Recursively rewrite our linearized statements so that the operators
4371 match those in OPS[OPINDEX], putting the computation in rank
4372 order. Return new lhs.
4373 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4374 the current stmt and during recursive invocations.
4375 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4376 recursive invocations. */
4378 static tree
4379 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4380 vec<operand_entry *> ops, bool changed, bool next_changed)
4382 tree rhs1 = gimple_assign_rhs1 (stmt);
4383 tree rhs2 = gimple_assign_rhs2 (stmt);
4384 tree lhs = gimple_assign_lhs (stmt);
4385 operand_entry *oe;
4387 /* The final recursion case for this function is that you have
4388 exactly two operations left.
4389 If we had exactly one op in the entire list to start with, we
4390 would have never called this function, and the tail recursion
4391 rewrites them one at a time. */
4392 if (opindex + 2 == ops.length ())
4394 operand_entry *oe1, *oe2;
4396 oe1 = ops[opindex];
4397 oe2 = ops[opindex + 1];
4399 if (rhs1 != oe1->op || rhs2 != oe2->op)
4401 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4402 unsigned int uid = gimple_uid (stmt);
4404 if (dump_file && (dump_flags & TDF_DETAILS))
4406 fprintf (dump_file, "Transforming ");
4407 print_gimple_stmt (dump_file, stmt, 0);
4410 /* If the stmt that defines operand has to be inserted, insert it
4411 before the use. */
4412 if (oe1->stmt_to_insert)
4413 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4414 if (oe2->stmt_to_insert)
4415 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4416 /* Even when changed is false, reassociation could have e.g. removed
4417 some redundant operations, so unless we are just swapping the
4418 arguments or unless there is no change at all (then we just
4419 return lhs), force creation of a new SSA_NAME. */
4420 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4422 gimple *insert_point
4423 = find_insert_point (stmt, oe1->op, oe2->op);
4424 lhs = make_ssa_name (TREE_TYPE (lhs));
4425 stmt
4426 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4427 oe1->op, oe2->op);
4428 gimple_set_uid (stmt, uid);
4429 gimple_set_visited (stmt, true);
4430 if (insert_point == gsi_stmt (gsi))
4431 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4432 else
4433 insert_stmt_after (stmt, insert_point);
4435 else
4437 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4438 == stmt);
4439 gimple_assign_set_rhs1 (stmt, oe1->op);
4440 gimple_assign_set_rhs2 (stmt, oe2->op);
4441 update_stmt (stmt);
4444 if (rhs1 != oe1->op && rhs1 != oe2->op)
4445 remove_visited_stmt_chain (rhs1);
4447 if (dump_file && (dump_flags & TDF_DETAILS))
4449 fprintf (dump_file, " into ");
4450 print_gimple_stmt (dump_file, stmt, 0);
4453 return lhs;
4456 /* If we hit here, we should have 3 or more ops left. */
4457 gcc_assert (opindex + 2 < ops.length ());
4459 /* Rewrite the next operator. */
4460 oe = ops[opindex];
4462 /* If the stmt that defines operand has to be inserted, insert it
4463 before the use. */
4464 if (oe->stmt_to_insert)
4465 insert_stmt_before_use (stmt, oe->stmt_to_insert);
4467 /* Recurse on the LHS of the binary operator, which is guaranteed to
4468 be the non-leaf side. */
4469 tree new_rhs1
4470 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4471 changed || oe->op != rhs2 || next_changed,
4472 false);
4474 if (oe->op != rhs2 || new_rhs1 != rhs1)
4476 if (dump_file && (dump_flags & TDF_DETAILS))
4478 fprintf (dump_file, "Transforming ");
4479 print_gimple_stmt (dump_file, stmt, 0);
4482 /* If changed is false, this is either opindex == 0
4483 or all outer rhs2's were equal to corresponding oe->op,
4484 and powi_result is NULL.
4485 That means lhs is equivalent before and after reassociation.
4486 Otherwise ensure the old lhs SSA_NAME is not reused and
4487 create a new stmt as well, so that any debug stmts will be
4488 properly adjusted. */
4489 if (changed)
4491 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4492 unsigned int uid = gimple_uid (stmt);
4493 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4495 lhs = make_ssa_name (TREE_TYPE (lhs));
4496 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4497 new_rhs1, oe->op);
4498 gimple_set_uid (stmt, uid);
4499 gimple_set_visited (stmt, true);
4500 if (insert_point == gsi_stmt (gsi))
4501 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4502 else
4503 insert_stmt_after (stmt, insert_point);
4505 else
4507 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4508 == stmt);
4509 gimple_assign_set_rhs1 (stmt, new_rhs1);
4510 gimple_assign_set_rhs2 (stmt, oe->op);
4511 update_stmt (stmt);
4514 if (dump_file && (dump_flags & TDF_DETAILS))
4516 fprintf (dump_file, " into ");
4517 print_gimple_stmt (dump_file, stmt, 0);
4520 return lhs;
4523 /* Find out how many cycles we need to compute statements chain.
4524 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4525 maximum number of independent statements we may execute per cycle. */
4527 static int
4528 get_required_cycles (int ops_num, int cpu_width)
4530 int res;
4531 int elog;
4532 unsigned int rest;
4534 /* While we have more than 2 * cpu_width operands
4535 we may reduce number of operands by cpu_width
4536 per cycle. */
4537 res = ops_num / (2 * cpu_width);
4539 /* Remained operands count may be reduced twice per cycle
4540 until we have only one operand. */
4541 rest = (unsigned)(ops_num - res * cpu_width);
4542 elog = exact_log2 (rest);
4543 if (elog >= 0)
4544 res += elog;
4545 else
4546 res += floor_log2 (rest) + 1;
4548 return res;
4551 /* Returns an optimal number of registers to use for computation of
4552 given statements. */
4554 static int
4555 get_reassociation_width (int ops_num, enum tree_code opc,
4556 machine_mode mode)
4558 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4559 int width;
4560 int width_min;
4561 int cycles_best;
4563 if (param_width > 0)
4564 width = param_width;
4565 else
4566 width = targetm.sched.reassociation_width (opc, mode);
4568 if (width == 1)
4569 return width;
4571 /* Get the minimal time required for sequence computation. */
4572 cycles_best = get_required_cycles (ops_num, width);
4574 /* Check if we may use less width and still compute sequence for
4575 the same time. It will allow us to reduce registers usage.
4576 get_required_cycles is monotonically increasing with lower width
4577 so we can perform a binary search for the minimal width that still
4578 results in the optimal cycle count. */
4579 width_min = 1;
4580 while (width > width_min)
4582 int width_mid = (width + width_min) / 2;
4584 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4585 width = width_mid;
4586 else if (width_min < width_mid)
4587 width_min = width_mid;
4588 else
4589 break;
4592 return width;
4595 /* Recursively rewrite our linearized statements so that the operators
4596 match those in OPS[OPINDEX], putting the computation in rank
4597 order and trying to allow operations to be executed in
4598 parallel. */
4600 static void
4601 rewrite_expr_tree_parallel (gassign *stmt, int width,
4602 vec<operand_entry *> ops)
4604 enum tree_code opcode = gimple_assign_rhs_code (stmt);
4605 int op_num = ops.length ();
4606 gcc_assert (op_num > 0);
4607 int stmt_num = op_num - 1;
4608 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4609 int op_index = op_num - 1;
4610 int stmt_index = 0;
4611 int ready_stmts_end = 0;
4612 int i = 0;
4613 gimple *stmt1 = NULL, *stmt2 = NULL;
4614 tree last_rhs1 = gimple_assign_rhs1 (stmt);
4616 /* We start expression rewriting from the top statements.
4617 So, in this loop we create a full list of statements
4618 we will work with. */
4619 stmts[stmt_num - 1] = stmt;
4620 for (i = stmt_num - 2; i >= 0; i--)
4621 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4623 for (i = 0; i < stmt_num; i++)
4625 tree op1, op2;
4627 /* Determine whether we should use results of
4628 already handled statements or not. */
4629 if (ready_stmts_end == 0
4630 && (i - stmt_index >= width || op_index < 1))
4631 ready_stmts_end = i;
4633 /* Now we choose operands for the next statement. Non zero
4634 value in ready_stmts_end means here that we should use
4635 the result of already generated statements as new operand. */
4636 if (ready_stmts_end > 0)
4638 op1 = gimple_assign_lhs (stmts[stmt_index++]);
4639 if (ready_stmts_end > stmt_index)
4640 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4641 else if (op_index >= 0)
4643 operand_entry *oe = ops[op_index--];
4644 stmt2 = oe->stmt_to_insert;
4645 op2 = oe->op;
4647 else
4649 gcc_assert (stmt_index < i);
4650 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4653 if (stmt_index >= ready_stmts_end)
4654 ready_stmts_end = 0;
4656 else
4658 if (op_index > 1)
4659 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4660 operand_entry *oe2 = ops[op_index--];
4661 operand_entry *oe1 = ops[op_index--];
4662 op2 = oe2->op;
4663 stmt2 = oe2->stmt_to_insert;
4664 op1 = oe1->op;
4665 stmt1 = oe1->stmt_to_insert;
4668 /* If we emit the last statement then we should put
4669 operands into the last statement. It will also
4670 break the loop. */
4671 if (op_index < 0 && stmt_index == i)
4672 i = stmt_num - 1;
4674 if (dump_file && (dump_flags & TDF_DETAILS))
4676 fprintf (dump_file, "Transforming ");
4677 print_gimple_stmt (dump_file, stmts[i], 0);
4680 /* If the stmt that defines operand has to be inserted, insert it
4681 before the use. */
4682 if (stmt1)
4683 insert_stmt_before_use (stmts[i], stmt1);
4684 if (stmt2)
4685 insert_stmt_before_use (stmts[i], stmt2);
4686 stmt1 = stmt2 = NULL;
4688 /* We keep original statement only for the last one. All
4689 others are recreated. */
4690 if (i == stmt_num - 1)
4692 gimple_assign_set_rhs1 (stmts[i], op1);
4693 gimple_assign_set_rhs2 (stmts[i], op2);
4694 update_stmt (stmts[i]);
4696 else
4698 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4700 if (dump_file && (dump_flags & TDF_DETAILS))
4702 fprintf (dump_file, " into ");
4703 print_gimple_stmt (dump_file, stmts[i], 0);
4707 remove_visited_stmt_chain (last_rhs1);
4710 /* Transform STMT, which is really (A +B) + (C + D) into the left
4711 linear form, ((A+B)+C)+D.
4712 Recurse on D if necessary. */
4714 static void
4715 linearize_expr (gimple *stmt)
4717 gimple_stmt_iterator gsi;
4718 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4719 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4720 gimple *oldbinrhs = binrhs;
4721 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4722 gimple *newbinrhs = NULL;
4723 struct loop *loop = loop_containing_stmt (stmt);
4724 tree lhs = gimple_assign_lhs (stmt);
4726 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4727 && is_reassociable_op (binrhs, rhscode, loop));
4729 gsi = gsi_for_stmt (stmt);
4731 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4732 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4733 gimple_assign_rhs_code (binrhs),
4734 gimple_assign_lhs (binlhs),
4735 gimple_assign_rhs2 (binrhs));
4736 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4737 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4738 gimple_set_uid (binrhs, gimple_uid (stmt));
4740 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4741 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4743 if (dump_file && (dump_flags & TDF_DETAILS))
4745 fprintf (dump_file, "Linearized: ");
4746 print_gimple_stmt (dump_file, stmt, 0);
4749 reassociate_stats.linearized++;
4750 update_stmt (stmt);
4752 gsi = gsi_for_stmt (oldbinrhs);
4753 reassoc_remove_stmt (&gsi);
4754 release_defs (oldbinrhs);
4756 gimple_set_visited (stmt, true);
4757 gimple_set_visited (binlhs, true);
4758 gimple_set_visited (binrhs, true);
4760 /* Tail recurse on the new rhs if it still needs reassociation. */
4761 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4762 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4763 want to change the algorithm while converting to tuples. */
4764 linearize_expr (stmt);
4767 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4768 it. Otherwise, return NULL. */
4770 static gimple *
4771 get_single_immediate_use (tree lhs)
4773 use_operand_p immuse;
4774 gimple *immusestmt;
4776 if (TREE_CODE (lhs) == SSA_NAME
4777 && single_imm_use (lhs, &immuse, &immusestmt)
4778 && is_gimple_assign (immusestmt))
4779 return immusestmt;
4781 return NULL;
4784 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4785 representing the negated value. Insertions of any necessary
4786 instructions go before GSI.
4787 This function is recursive in that, if you hand it "a_5" as the
4788 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4789 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4791 static tree
4792 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4794 gimple *negatedefstmt = NULL;
4795 tree resultofnegate;
4796 gimple_stmt_iterator gsi;
4797 unsigned int uid;
4799 /* If we are trying to negate a name, defined by an add, negate the
4800 add operands instead. */
4801 if (TREE_CODE (tonegate) == SSA_NAME)
4802 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4803 if (TREE_CODE (tonegate) == SSA_NAME
4804 && is_gimple_assign (negatedefstmt)
4805 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4806 && has_single_use (gimple_assign_lhs (negatedefstmt))
4807 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4809 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4810 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4811 tree lhs = gimple_assign_lhs (negatedefstmt);
4812 gimple *g;
4814 gsi = gsi_for_stmt (negatedefstmt);
4815 rhs1 = negate_value (rhs1, &gsi);
4817 gsi = gsi_for_stmt (negatedefstmt);
4818 rhs2 = negate_value (rhs2, &gsi);
4820 gsi = gsi_for_stmt (negatedefstmt);
4821 lhs = make_ssa_name (TREE_TYPE (lhs));
4822 gimple_set_visited (negatedefstmt, true);
4823 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4824 gimple_set_uid (g, gimple_uid (negatedefstmt));
4825 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4826 return lhs;
4829 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4830 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4831 NULL_TREE, true, GSI_SAME_STMT);
4832 gsi = *gsip;
4833 uid = gimple_uid (gsi_stmt (gsi));
4834 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4836 gimple *stmt = gsi_stmt (gsi);
4837 if (gimple_uid (stmt) != 0)
4838 break;
4839 gimple_set_uid (stmt, uid);
4841 return resultofnegate;
4844 /* Return true if we should break up the subtract in STMT into an add
4845 with negate. This is true when we the subtract operands are really
4846 adds, or the subtract itself is used in an add expression. In
4847 either case, breaking up the subtract into an add with negate
4848 exposes the adds to reassociation. */
4850 static bool
4851 should_break_up_subtract (gimple *stmt)
4853 tree lhs = gimple_assign_lhs (stmt);
4854 tree binlhs = gimple_assign_rhs1 (stmt);
4855 tree binrhs = gimple_assign_rhs2 (stmt);
4856 gimple *immusestmt;
4857 struct loop *loop = loop_containing_stmt (stmt);
4859 if (TREE_CODE (binlhs) == SSA_NAME
4860 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4861 return true;
4863 if (TREE_CODE (binrhs) == SSA_NAME
4864 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4865 return true;
4867 if (TREE_CODE (lhs) == SSA_NAME
4868 && (immusestmt = get_single_immediate_use (lhs))
4869 && is_gimple_assign (immusestmt)
4870 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4871 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
4872 && gimple_assign_rhs1 (immusestmt) == lhs)
4873 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4874 return true;
4875 return false;
4878 /* Transform STMT from A - B into A + -B. */
4880 static void
4881 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4883 tree rhs1 = gimple_assign_rhs1 (stmt);
4884 tree rhs2 = gimple_assign_rhs2 (stmt);
4886 if (dump_file && (dump_flags & TDF_DETAILS))
4888 fprintf (dump_file, "Breaking up subtract ");
4889 print_gimple_stmt (dump_file, stmt, 0);
4892 rhs2 = negate_value (rhs2, gsip);
4893 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4894 update_stmt (stmt);
4897 /* Determine whether STMT is a builtin call that raises an SSA name
4898 to an integer power and has only one use. If so, and this is early
4899 reassociation and unsafe math optimizations are permitted, place
4900 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4901 If any of these conditions does not hold, return FALSE. */
4903 static bool
4904 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4906 tree arg1;
4907 REAL_VALUE_TYPE c, cint;
4909 switch (gimple_call_combined_fn (stmt))
4911 CASE_CFN_POW:
4912 if (flag_errno_math)
4913 return false;
4915 *base = gimple_call_arg (stmt, 0);
4916 arg1 = gimple_call_arg (stmt, 1);
4918 if (TREE_CODE (arg1) != REAL_CST)
4919 return false;
4921 c = TREE_REAL_CST (arg1);
4923 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4924 return false;
4926 *exponent = real_to_integer (&c);
4927 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4928 if (!real_identical (&c, &cint))
4929 return false;
4931 break;
4933 CASE_CFN_POWI:
4934 *base = gimple_call_arg (stmt, 0);
4935 arg1 = gimple_call_arg (stmt, 1);
4937 if (!tree_fits_shwi_p (arg1))
4938 return false;
4940 *exponent = tree_to_shwi (arg1);
4941 break;
4943 default:
4944 return false;
4947 /* Expanding negative exponents is generally unproductive, so we don't
4948 complicate matters with those. Exponents of zero and one should
4949 have been handled by expression folding. */
4950 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4951 return false;
4953 return true;
4956 /* Try to derive and add operand entry for OP to *OPS. Return false if
4957 unsuccessful. */
4959 static bool
4960 try_special_add_to_ops (vec<operand_entry *> *ops,
4961 enum tree_code code,
4962 tree op, gimple* def_stmt)
4964 tree base = NULL_TREE;
4965 HOST_WIDE_INT exponent = 0;
4967 if (TREE_CODE (op) != SSA_NAME
4968 || ! has_single_use (op))
4969 return false;
4971 if (code == MULT_EXPR
4972 && reassoc_insert_powi_p
4973 && flag_unsafe_math_optimizations
4974 && is_gimple_call (def_stmt)
4975 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
4977 add_repeat_to_ops_vec (ops, base, exponent);
4978 gimple_set_visited (def_stmt, true);
4979 return true;
4981 else if (code == MULT_EXPR
4982 && is_gimple_assign (def_stmt)
4983 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
4984 && !HONOR_SNANS (TREE_TYPE (op))
4985 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
4986 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
4988 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4989 tree cst = build_minus_one_cst (TREE_TYPE (op));
4990 add_to_ops_vec (ops, rhs1);
4991 add_to_ops_vec (ops, cst);
4992 gimple_set_visited (def_stmt, true);
4993 return true;
4996 return false;
4999 /* Recursively linearize a binary expression that is the RHS of STMT.
5000 Place the operands of the expression tree in the vector named OPS. */
5002 static void
5003 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5004 bool is_associative, bool set_visited)
5006 tree binlhs = gimple_assign_rhs1 (stmt);
5007 tree binrhs = gimple_assign_rhs2 (stmt);
5008 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5009 bool binlhsisreassoc = false;
5010 bool binrhsisreassoc = false;
5011 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5012 struct loop *loop = loop_containing_stmt (stmt);
5014 if (set_visited)
5015 gimple_set_visited (stmt, true);
5017 if (TREE_CODE (binlhs) == SSA_NAME)
5019 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5020 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5021 && !stmt_could_throw_p (binlhsdef));
5024 if (TREE_CODE (binrhs) == SSA_NAME)
5026 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5027 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5028 && !stmt_could_throw_p (binrhsdef));
5031 /* If the LHS is not reassociable, but the RHS is, we need to swap
5032 them. If neither is reassociable, there is nothing we can do, so
5033 just put them in the ops vector. If the LHS is reassociable,
5034 linearize it. If both are reassociable, then linearize the RHS
5035 and the LHS. */
5037 if (!binlhsisreassoc)
5039 /* If this is not a associative operation like division, give up. */
5040 if (!is_associative)
5042 add_to_ops_vec (ops, binrhs);
5043 return;
5046 if (!binrhsisreassoc)
5048 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5049 add_to_ops_vec (ops, binrhs);
5051 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5052 add_to_ops_vec (ops, binlhs);
5054 return;
5057 if (dump_file && (dump_flags & TDF_DETAILS))
5059 fprintf (dump_file, "swapping operands of ");
5060 print_gimple_stmt (dump_file, stmt, 0);
5063 swap_ssa_operands (stmt,
5064 gimple_assign_rhs1_ptr (stmt),
5065 gimple_assign_rhs2_ptr (stmt));
5066 update_stmt (stmt);
5068 if (dump_file && (dump_flags & TDF_DETAILS))
5070 fprintf (dump_file, " is now ");
5071 print_gimple_stmt (dump_file, stmt, 0);
5074 /* We want to make it so the lhs is always the reassociative op,
5075 so swap. */
5076 std::swap (binlhs, binrhs);
5078 else if (binrhsisreassoc)
5080 linearize_expr (stmt);
5081 binlhs = gimple_assign_rhs1 (stmt);
5082 binrhs = gimple_assign_rhs2 (stmt);
5085 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5086 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5087 rhscode, loop));
5088 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5089 is_associative, set_visited);
5091 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5092 add_to_ops_vec (ops, binrhs);
5095 /* Repropagate the negates back into subtracts, since no other pass
5096 currently does it. */
5098 static void
5099 repropagate_negates (void)
5101 unsigned int i = 0;
5102 tree negate;
5104 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5106 gimple *user = get_single_immediate_use (negate);
5108 if (!user || !is_gimple_assign (user))
5109 continue;
5111 /* The negate operand can be either operand of a PLUS_EXPR
5112 (it can be the LHS if the RHS is a constant for example).
5114 Force the negate operand to the RHS of the PLUS_EXPR, then
5115 transform the PLUS_EXPR into a MINUS_EXPR. */
5116 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5118 /* If the negated operand appears on the LHS of the
5119 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5120 to force the negated operand to the RHS of the PLUS_EXPR. */
5121 if (gimple_assign_rhs1 (user) == negate)
5123 swap_ssa_operands (user,
5124 gimple_assign_rhs1_ptr (user),
5125 gimple_assign_rhs2_ptr (user));
5128 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5129 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5130 if (gimple_assign_rhs2 (user) == negate)
5132 tree rhs1 = gimple_assign_rhs1 (user);
5133 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5134 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5135 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5136 update_stmt (user);
5139 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5141 if (gimple_assign_rhs1 (user) == negate)
5143 /* We have
5144 x = -a
5145 y = x - b
5146 which we transform into
5147 x = a + b
5148 y = -x .
5149 This pushes down the negate which we possibly can merge
5150 into some other operation, hence insert it into the
5151 plus_negates vector. */
5152 gimple *feed = SSA_NAME_DEF_STMT (negate);
5153 tree a = gimple_assign_rhs1 (feed);
5154 tree b = gimple_assign_rhs2 (user);
5155 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5156 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5157 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5158 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5159 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5160 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5161 user = gsi_stmt (gsi2);
5162 update_stmt (user);
5163 reassoc_remove_stmt (&gsi);
5164 release_defs (feed);
5165 plus_negates.safe_push (gimple_assign_lhs (user));
5167 else
5169 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5170 rid of one operation. */
5171 gimple *feed = SSA_NAME_DEF_STMT (negate);
5172 tree a = gimple_assign_rhs1 (feed);
5173 tree rhs1 = gimple_assign_rhs1 (user);
5174 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5175 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5176 update_stmt (gsi_stmt (gsi));
5182 /* Returns true if OP is of a type for which we can do reassociation.
5183 That is for integral or non-saturating fixed-point types, and for
5184 floating point type when associative-math is enabled. */
5186 static bool
5187 can_reassociate_p (tree op)
5189 tree type = TREE_TYPE (op);
5190 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5191 return false;
5192 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5193 || NON_SAT_FIXED_POINT_TYPE_P (type)
5194 || (flag_associative_math && FLOAT_TYPE_P (type)))
5195 return true;
5196 return false;
5199 /* Break up subtract operations in block BB.
5201 We do this top down because we don't know whether the subtract is
5202 part of a possible chain of reassociation except at the top.
5204 IE given
5205 d = f + g
5206 c = a + e
5207 b = c - d
5208 q = b - r
5209 k = t - q
5211 we want to break up k = t - q, but we won't until we've transformed q
5212 = b - r, which won't be broken up until we transform b = c - d.
5214 En passant, clear the GIMPLE visited flag on every statement
5215 and set UIDs within each basic block. */
5217 static void
5218 break_up_subtract_bb (basic_block bb)
5220 gimple_stmt_iterator gsi;
5221 basic_block son;
5222 unsigned int uid = 1;
5224 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5226 gimple *stmt = gsi_stmt (gsi);
5227 gimple_set_visited (stmt, false);
5228 gimple_set_uid (stmt, uid++);
5230 if (!is_gimple_assign (stmt)
5231 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5232 continue;
5234 /* Look for simple gimple subtract operations. */
5235 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5237 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5238 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5239 continue;
5241 /* Check for a subtract used only in an addition. If this
5242 is the case, transform it into add of a negate for better
5243 reassociation. IE transform C = A-B into C = A + -B if C
5244 is only used in an addition. */
5245 if (should_break_up_subtract (stmt))
5246 break_up_subtract (stmt, &gsi);
5248 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5249 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5250 plus_negates.safe_push (gimple_assign_lhs (stmt));
5252 for (son = first_dom_son (CDI_DOMINATORS, bb);
5253 son;
5254 son = next_dom_son (CDI_DOMINATORS, son))
5255 break_up_subtract_bb (son);
5258 /* Used for repeated factor analysis. */
5259 struct repeat_factor
5261 /* An SSA name that occurs in a multiply chain. */
5262 tree factor;
5264 /* Cached rank of the factor. */
5265 unsigned rank;
5267 /* Number of occurrences of the factor in the chain. */
5268 HOST_WIDE_INT count;
5270 /* An SSA name representing the product of this factor and
5271 all factors appearing later in the repeated factor vector. */
5272 tree repr;
5276 static vec<repeat_factor> repeat_factor_vec;
5278 /* Used for sorting the repeat factor vector. Sort primarily by
5279 ascending occurrence count, secondarily by descending rank. */
5281 static int
5282 compare_repeat_factors (const void *x1, const void *x2)
5284 const repeat_factor *rf1 = (const repeat_factor *) x1;
5285 const repeat_factor *rf2 = (const repeat_factor *) x2;
5287 if (rf1->count != rf2->count)
5288 return rf1->count - rf2->count;
5290 return rf2->rank - rf1->rank;
5293 /* Look for repeated operands in OPS in the multiply tree rooted at
5294 STMT. Replace them with an optimal sequence of multiplies and powi
5295 builtin calls, and remove the used operands from OPS. Return an
5296 SSA name representing the value of the replacement sequence. */
5298 static tree
5299 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5301 unsigned i, j, vec_len;
5302 int ii;
5303 operand_entry *oe;
5304 repeat_factor *rf1, *rf2;
5305 repeat_factor rfnew;
5306 tree result = NULL_TREE;
5307 tree target_ssa, iter_result;
5308 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5309 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5310 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5311 gimple *mul_stmt, *pow_stmt;
5313 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5314 target. */
5315 if (!powi_fndecl)
5316 return NULL_TREE;
5318 /* Allocate the repeated factor vector. */
5319 repeat_factor_vec.create (10);
5321 /* Scan the OPS vector for all SSA names in the product and build
5322 up a vector of occurrence counts for each factor. */
5323 FOR_EACH_VEC_ELT (*ops, i, oe)
5325 if (TREE_CODE (oe->op) == SSA_NAME)
5327 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5329 if (rf1->factor == oe->op)
5331 rf1->count += oe->count;
5332 break;
5336 if (j >= repeat_factor_vec.length ())
5338 rfnew.factor = oe->op;
5339 rfnew.rank = oe->rank;
5340 rfnew.count = oe->count;
5341 rfnew.repr = NULL_TREE;
5342 repeat_factor_vec.safe_push (rfnew);
5347 /* Sort the repeated factor vector by (a) increasing occurrence count,
5348 and (b) decreasing rank. */
5349 repeat_factor_vec.qsort (compare_repeat_factors);
5351 /* It is generally best to combine as many base factors as possible
5352 into a product before applying __builtin_powi to the result.
5353 However, the sort order chosen for the repeated factor vector
5354 allows us to cache partial results for the product of the base
5355 factors for subsequent use. When we already have a cached partial
5356 result from a previous iteration, it is best to make use of it
5357 before looking for another __builtin_pow opportunity.
5359 As an example, consider x * x * y * y * y * z * z * z * z.
5360 We want to first compose the product x * y * z, raise it to the
5361 second power, then multiply this by y * z, and finally multiply
5362 by z. This can be done in 5 multiplies provided we cache y * z
5363 for use in both expressions:
5365 t1 = y * z
5366 t2 = t1 * x
5367 t3 = t2 * t2
5368 t4 = t1 * t3
5369 result = t4 * z
5371 If we instead ignored the cached y * z and first multiplied by
5372 the __builtin_pow opportunity z * z, we would get the inferior:
5374 t1 = y * z
5375 t2 = t1 * x
5376 t3 = t2 * t2
5377 t4 = z * z
5378 t5 = t3 * t4
5379 result = t5 * y */
5381 vec_len = repeat_factor_vec.length ();
5383 /* Repeatedly look for opportunities to create a builtin_powi call. */
5384 while (true)
5386 HOST_WIDE_INT power;
5388 /* First look for the largest cached product of factors from
5389 preceding iterations. If found, create a builtin_powi for
5390 it if the minimum occurrence count for its factors is at
5391 least 2, or just use this cached product as our next
5392 multiplicand if the minimum occurrence count is 1. */
5393 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5395 if (rf1->repr && rf1->count > 0)
5396 break;
5399 if (j < vec_len)
5401 power = rf1->count;
5403 if (power == 1)
5405 iter_result = rf1->repr;
5407 if (dump_file && (dump_flags & TDF_DETAILS))
5409 unsigned elt;
5410 repeat_factor *rf;
5411 fputs ("Multiplying by cached product ", dump_file);
5412 for (elt = j; elt < vec_len; elt++)
5414 rf = &repeat_factor_vec[elt];
5415 print_generic_expr (dump_file, rf->factor);
5416 if (elt < vec_len - 1)
5417 fputs (" * ", dump_file);
5419 fputs ("\n", dump_file);
5422 else
5424 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5425 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5426 build_int_cst (integer_type_node,
5427 power));
5428 gimple_call_set_lhs (pow_stmt, iter_result);
5429 gimple_set_location (pow_stmt, gimple_location (stmt));
5430 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5431 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5433 if (dump_file && (dump_flags & TDF_DETAILS))
5435 unsigned elt;
5436 repeat_factor *rf;
5437 fputs ("Building __builtin_pow call for cached product (",
5438 dump_file);
5439 for (elt = j; elt < vec_len; elt++)
5441 rf = &repeat_factor_vec[elt];
5442 print_generic_expr (dump_file, rf->factor);
5443 if (elt < vec_len - 1)
5444 fputs (" * ", dump_file);
5446 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5447 power);
5451 else
5453 /* Otherwise, find the first factor in the repeated factor
5454 vector whose occurrence count is at least 2. If no such
5455 factor exists, there are no builtin_powi opportunities
5456 remaining. */
5457 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5459 if (rf1->count >= 2)
5460 break;
5463 if (j >= vec_len)
5464 break;
5466 power = rf1->count;
5468 if (dump_file && (dump_flags & TDF_DETAILS))
5470 unsigned elt;
5471 repeat_factor *rf;
5472 fputs ("Building __builtin_pow call for (", dump_file);
5473 for (elt = j; elt < vec_len; elt++)
5475 rf = &repeat_factor_vec[elt];
5476 print_generic_expr (dump_file, rf->factor);
5477 if (elt < vec_len - 1)
5478 fputs (" * ", dump_file);
5480 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5483 reassociate_stats.pows_created++;
5485 /* Visit each element of the vector in reverse order (so that
5486 high-occurrence elements are visited first, and within the
5487 same occurrence count, lower-ranked elements are visited
5488 first). Form a linear product of all elements in this order
5489 whose occurrencce count is at least that of element J.
5490 Record the SSA name representing the product of each element
5491 with all subsequent elements in the vector. */
5492 if (j == vec_len - 1)
5493 rf1->repr = rf1->factor;
5494 else
5496 for (ii = vec_len - 2; ii >= (int)j; ii--)
5498 tree op1, op2;
5500 rf1 = &repeat_factor_vec[ii];
5501 rf2 = &repeat_factor_vec[ii + 1];
5503 /* Init the last factor's representative to be itself. */
5504 if (!rf2->repr)
5505 rf2->repr = rf2->factor;
5507 op1 = rf1->factor;
5508 op2 = rf2->repr;
5510 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5511 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5512 op1, op2);
5513 gimple_set_location (mul_stmt, gimple_location (stmt));
5514 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5515 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5516 rf1->repr = target_ssa;
5518 /* Don't reprocess the multiply we just introduced. */
5519 gimple_set_visited (mul_stmt, true);
5523 /* Form a call to __builtin_powi for the maximum product
5524 just formed, raised to the power obtained earlier. */
5525 rf1 = &repeat_factor_vec[j];
5526 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5527 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5528 build_int_cst (integer_type_node,
5529 power));
5530 gimple_call_set_lhs (pow_stmt, iter_result);
5531 gimple_set_location (pow_stmt, gimple_location (stmt));
5532 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5533 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5536 /* If we previously formed at least one other builtin_powi call,
5537 form the product of this one and those others. */
5538 if (result)
5540 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5541 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5542 result, iter_result);
5543 gimple_set_location (mul_stmt, gimple_location (stmt));
5544 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5545 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5546 gimple_set_visited (mul_stmt, true);
5547 result = new_result;
5549 else
5550 result = iter_result;
5552 /* Decrement the occurrence count of each element in the product
5553 by the count found above, and remove this many copies of each
5554 factor from OPS. */
5555 for (i = j; i < vec_len; i++)
5557 unsigned k = power;
5558 unsigned n;
5560 rf1 = &repeat_factor_vec[i];
5561 rf1->count -= power;
5563 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5565 if (oe->op == rf1->factor)
5567 if (oe->count <= k)
5569 ops->ordered_remove (n);
5570 k -= oe->count;
5572 if (k == 0)
5573 break;
5575 else
5577 oe->count -= k;
5578 break;
5585 /* At this point all elements in the repeated factor vector have a
5586 remaining occurrence count of 0 or 1, and those with a count of 1
5587 don't have cached representatives. Re-sort the ops vector and
5588 clean up. */
5589 ops->qsort (sort_by_operand_rank);
5590 repeat_factor_vec.release ();
5592 /* Return the final product computed herein. Note that there may
5593 still be some elements with single occurrence count left in OPS;
5594 those will be handled by the normal reassociation logic. */
5595 return result;
5598 /* Attempt to optimize
5599 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5600 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5602 static void
5603 attempt_builtin_copysign (vec<operand_entry *> *ops)
5605 operand_entry *oe;
5606 unsigned int i;
5607 unsigned int length = ops->length ();
5608 tree cst = ops->last ()->op;
5610 if (length == 1 || TREE_CODE (cst) != REAL_CST)
5611 return;
5613 FOR_EACH_VEC_ELT (*ops, i, oe)
5615 if (TREE_CODE (oe->op) == SSA_NAME
5616 && has_single_use (oe->op))
5618 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5619 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5621 tree arg0, arg1;
5622 switch (gimple_call_combined_fn (old_call))
5624 CASE_CFN_COPYSIGN:
5625 arg0 = gimple_call_arg (old_call, 0);
5626 arg1 = gimple_call_arg (old_call, 1);
5627 /* The first argument of copysign must be a constant,
5628 otherwise there's nothing to do. */
5629 if (TREE_CODE (arg0) == REAL_CST)
5631 tree type = TREE_TYPE (arg0);
5632 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5633 /* If we couldn't fold to a single constant, skip it.
5634 That happens e.g. for inexact multiplication when
5635 -frounding-math. */
5636 if (mul == NULL_TREE)
5637 break;
5638 /* Instead of adjusting OLD_CALL, let's build a new
5639 call to not leak the LHS and prevent keeping bogus
5640 debug statements. DCE will clean up the old call. */
5641 gcall *new_call;
5642 if (gimple_call_internal_p (old_call))
5643 new_call = gimple_build_call_internal
5644 (IFN_COPYSIGN, 2, mul, arg1);
5645 else
5646 new_call = gimple_build_call
5647 (gimple_call_fndecl (old_call), 2, mul, arg1);
5648 tree lhs = make_ssa_name (type);
5649 gimple_call_set_lhs (new_call, lhs);
5650 gimple_set_location (new_call,
5651 gimple_location (old_call));
5652 insert_stmt_after (new_call, old_call);
5653 /* We've used the constant, get rid of it. */
5654 ops->pop ();
5655 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5656 /* Handle the CST1 < 0 case by negating the result. */
5657 if (cst1_neg)
5659 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5660 gimple *negate_stmt
5661 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5662 insert_stmt_after (negate_stmt, new_call);
5663 oe->op = negrhs;
5665 else
5666 oe->op = lhs;
5667 if (dump_file && (dump_flags & TDF_DETAILS))
5669 fprintf (dump_file, "Optimizing copysign: ");
5670 print_generic_expr (dump_file, cst);
5671 fprintf (dump_file, " * COPYSIGN (");
5672 print_generic_expr (dump_file, arg0);
5673 fprintf (dump_file, ", ");
5674 print_generic_expr (dump_file, arg1);
5675 fprintf (dump_file, ") into %sCOPYSIGN (",
5676 cst1_neg ? "-" : "");
5677 print_generic_expr (dump_file, mul);
5678 fprintf (dump_file, ", ");
5679 print_generic_expr (dump_file, arg1);
5680 fprintf (dump_file, "\n");
5682 return;
5684 break;
5685 default:
5686 break;
5693 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5695 static void
5696 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5698 tree rhs1;
5700 if (dump_file && (dump_flags & TDF_DETAILS))
5702 fprintf (dump_file, "Transforming ");
5703 print_gimple_stmt (dump_file, stmt, 0);
5706 rhs1 = gimple_assign_rhs1 (stmt);
5707 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5708 update_stmt (stmt);
5709 remove_visited_stmt_chain (rhs1);
5711 if (dump_file && (dump_flags & TDF_DETAILS))
5713 fprintf (dump_file, " into ");
5714 print_gimple_stmt (dump_file, stmt, 0);
5718 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5720 static void
5721 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5722 tree rhs1, tree rhs2)
5724 if (dump_file && (dump_flags & TDF_DETAILS))
5726 fprintf (dump_file, "Transforming ");
5727 print_gimple_stmt (dump_file, stmt, 0);
5730 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5731 update_stmt (gsi_stmt (*gsi));
5732 remove_visited_stmt_chain (rhs1);
5734 if (dump_file && (dump_flags & TDF_DETAILS))
5736 fprintf (dump_file, " into ");
5737 print_gimple_stmt (dump_file, stmt, 0);
5741 /* Reassociate expressions in basic block BB and its post-dominator as
5742 children.
5744 Bubble up return status from maybe_optimize_range_tests. */
5746 static bool
5747 reassociate_bb (basic_block bb)
5749 gimple_stmt_iterator gsi;
5750 basic_block son;
5751 gimple *stmt = last_stmt (bb);
5752 bool cfg_cleanup_needed = false;
5754 if (stmt && !gimple_visited_p (stmt))
5755 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5757 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5759 stmt = gsi_stmt (gsi);
5761 if (is_gimple_assign (stmt)
5762 && !stmt_could_throw_p (stmt))
5764 tree lhs, rhs1, rhs2;
5765 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5767 /* If this is not a gimple binary expression, there is
5768 nothing for us to do with it. */
5769 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5770 continue;
5772 /* If this was part of an already processed statement,
5773 we don't need to touch it again. */
5774 if (gimple_visited_p (stmt))
5776 /* This statement might have become dead because of previous
5777 reassociations. */
5778 if (has_zero_uses (gimple_get_lhs (stmt)))
5780 reassoc_remove_stmt (&gsi);
5781 release_defs (stmt);
5782 /* We might end up removing the last stmt above which
5783 places the iterator to the end of the sequence.
5784 Reset it to the last stmt in this case which might
5785 be the end of the sequence as well if we removed
5786 the last statement of the sequence. In which case
5787 we need to bail out. */
5788 if (gsi_end_p (gsi))
5790 gsi = gsi_last_bb (bb);
5791 if (gsi_end_p (gsi))
5792 break;
5795 continue;
5798 lhs = gimple_assign_lhs (stmt);
5799 rhs1 = gimple_assign_rhs1 (stmt);
5800 rhs2 = gimple_assign_rhs2 (stmt);
5802 /* For non-bit or min/max operations we can't associate
5803 all types. Verify that here. */
5804 if (rhs_code != BIT_IOR_EXPR
5805 && rhs_code != BIT_AND_EXPR
5806 && rhs_code != BIT_XOR_EXPR
5807 && rhs_code != MIN_EXPR
5808 && rhs_code != MAX_EXPR
5809 && (!can_reassociate_p (lhs)
5810 || !can_reassociate_p (rhs1)
5811 || !can_reassociate_p (rhs2)))
5812 continue;
5814 if (associative_tree_code (rhs_code))
5816 auto_vec<operand_entry *> ops;
5817 tree powi_result = NULL_TREE;
5818 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5820 /* There may be no immediate uses left by the time we
5821 get here because we may have eliminated them all. */
5822 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5823 continue;
5825 gimple_set_visited (stmt, true);
5826 linearize_expr_tree (&ops, stmt, true, true);
5827 ops.qsort (sort_by_operand_rank);
5828 int orig_len = ops.length ();
5829 optimize_ops_list (rhs_code, &ops);
5830 if (undistribute_ops_list (rhs_code, &ops,
5831 loop_containing_stmt (stmt)))
5833 ops.qsort (sort_by_operand_rank);
5834 optimize_ops_list (rhs_code, &ops);
5837 if (rhs_code == PLUS_EXPR
5838 && transform_add_to_multiply (&ops))
5839 ops.qsort (sort_by_operand_rank);
5841 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5843 if (is_vector)
5844 optimize_vec_cond_expr (rhs_code, &ops);
5845 else
5846 optimize_range_tests (rhs_code, &ops);
5849 if (rhs_code == MULT_EXPR && !is_vector)
5851 attempt_builtin_copysign (&ops);
5853 if (reassoc_insert_powi_p
5854 && flag_unsafe_math_optimizations)
5855 powi_result = attempt_builtin_powi (stmt, &ops);
5858 operand_entry *last;
5859 bool negate_result = false;
5860 if (ops.length () > 1
5861 && rhs_code == MULT_EXPR)
5863 last = ops.last ();
5864 if ((integer_minus_onep (last->op)
5865 || real_minus_onep (last->op))
5866 && !HONOR_SNANS (TREE_TYPE (lhs))
5867 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5868 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5870 ops.pop ();
5871 negate_result = true;
5875 tree new_lhs = lhs;
5876 /* If the operand vector is now empty, all operands were
5877 consumed by the __builtin_powi optimization. */
5878 if (ops.length () == 0)
5879 transform_stmt_to_copy (&gsi, stmt, powi_result);
5880 else if (ops.length () == 1)
5882 tree last_op = ops.last ()->op;
5884 /* If the stmt that defines operand has to be inserted, insert it
5885 before the use. */
5886 if (ops.last ()->stmt_to_insert)
5887 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5888 if (powi_result)
5889 transform_stmt_to_multiply (&gsi, stmt, last_op,
5890 powi_result);
5891 else
5892 transform_stmt_to_copy (&gsi, stmt, last_op);
5894 else
5896 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5897 int ops_num = ops.length ();
5898 int width = get_reassociation_width (ops_num, rhs_code, mode);
5900 if (dump_file && (dump_flags & TDF_DETAILS))
5901 fprintf (dump_file,
5902 "Width = %d was chosen for reassociation\n", width);
5905 /* For binary bit operations, if there are at least 3
5906 operands and the last last operand in OPS is a constant,
5907 move it to the front. This helps ensure that we generate
5908 (X & Y) & C rather than (X & C) & Y. The former will
5909 often match a canonical bit test when we get to RTL. */
5910 if (ops.length () != 2
5911 && (rhs_code == BIT_AND_EXPR
5912 || rhs_code == BIT_IOR_EXPR
5913 || rhs_code == BIT_XOR_EXPR)
5914 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
5915 std::swap (*ops[0], *ops[ops_num - 1]);
5917 if (width > 1
5918 && ops.length () > 3)
5919 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5920 width, ops);
5921 else
5923 /* When there are three operands left, we want
5924 to make sure the ones that get the double
5925 binary op are chosen wisely. */
5926 int len = ops.length ();
5927 if (len >= 3)
5928 swap_ops_for_binary_stmt (ops, len - 3, stmt);
5930 new_lhs = rewrite_expr_tree (stmt, 0, ops,
5931 powi_result != NULL
5932 || negate_result,
5933 len != orig_len);
5936 /* If we combined some repeated factors into a
5937 __builtin_powi call, multiply that result by the
5938 reassociated operands. */
5939 if (powi_result)
5941 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5942 tree type = TREE_TYPE (lhs);
5943 tree target_ssa = make_temp_ssa_name (type, NULL,
5944 "reassocpow");
5945 gimple_set_lhs (lhs_stmt, target_ssa);
5946 update_stmt (lhs_stmt);
5947 if (lhs != new_lhs)
5949 target_ssa = new_lhs;
5950 new_lhs = lhs;
5952 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5953 powi_result, target_ssa);
5954 gimple_set_location (mul_stmt, gimple_location (stmt));
5955 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5956 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5960 if (negate_result)
5962 stmt = SSA_NAME_DEF_STMT (lhs);
5963 tree tmp = make_ssa_name (TREE_TYPE (lhs));
5964 gimple_set_lhs (stmt, tmp);
5965 if (lhs != new_lhs)
5966 tmp = new_lhs;
5967 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
5968 tmp);
5969 gimple_set_uid (neg_stmt, gimple_uid (stmt));
5970 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
5971 update_stmt (stmt);
5976 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5977 son;
5978 son = next_dom_son (CDI_POST_DOMINATORS, son))
5979 cfg_cleanup_needed |= reassociate_bb (son);
5981 return cfg_cleanup_needed;
5984 /* Add jumps around shifts for range tests turned into bit tests.
5985 For each SSA_NAME VAR we have code like:
5986 VAR = ...; // final stmt of range comparison
5987 // bit test here...;
5988 OTHERVAR = ...; // final stmt of the bit test sequence
5989 RES = VAR | OTHERVAR;
5990 Turn the above into:
5991 VAR = ...;
5992 if (VAR != 0)
5993 goto <l3>;
5994 else
5995 goto <l2>;
5996 <l2>:
5997 // bit test here...;
5998 OTHERVAR = ...;
5999 <l3>:
6000 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6002 static void
6003 branch_fixup (void)
6005 tree var;
6006 unsigned int i;
6008 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6010 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6011 gimple *use_stmt;
6012 use_operand_p use;
6013 bool ok = single_imm_use (var, &use, &use_stmt);
6014 gcc_assert (ok
6015 && is_gimple_assign (use_stmt)
6016 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6017 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6019 basic_block cond_bb = gimple_bb (def_stmt);
6020 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6021 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6023 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6024 gimple *g = gimple_build_cond (NE_EXPR, var,
6025 build_zero_cst (TREE_TYPE (var)),
6026 NULL_TREE, NULL_TREE);
6027 location_t loc = gimple_location (use_stmt);
6028 gimple_set_location (g, loc);
6029 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6031 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6032 etrue->probability = profile_probability::even ();
6033 etrue->count = cond_bb->count.apply_scale (1, 2);
6034 edge efalse = find_edge (cond_bb, then_bb);
6035 efalse->flags = EDGE_FALSE_VALUE;
6036 efalse->probability -= etrue->probability;
6037 efalse->count -= etrue->count;
6038 then_bb->count -= etrue->count;
6040 tree othervar = NULL_TREE;
6041 if (gimple_assign_rhs1 (use_stmt) == var)
6042 othervar = gimple_assign_rhs2 (use_stmt);
6043 else if (gimple_assign_rhs2 (use_stmt) == var)
6044 othervar = gimple_assign_rhs1 (use_stmt);
6045 else
6046 gcc_unreachable ();
6047 tree lhs = gimple_assign_lhs (use_stmt);
6048 gphi *phi = create_phi_node (lhs, merge_bb);
6049 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6050 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6051 gsi = gsi_for_stmt (use_stmt);
6052 gsi_remove (&gsi, true);
6054 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6055 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6057 reassoc_branch_fixups.release ();
6060 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6061 void debug_ops_vector (vec<operand_entry *> ops);
6063 /* Dump the operand entry vector OPS to FILE. */
6065 void
6066 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6068 operand_entry *oe;
6069 unsigned int i;
6071 FOR_EACH_VEC_ELT (ops, i, oe)
6073 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6074 print_generic_expr (file, oe->op);
6075 fprintf (file, "\n");
6079 /* Dump the operand entry vector OPS to STDERR. */
6081 DEBUG_FUNCTION void
6082 debug_ops_vector (vec<operand_entry *> ops)
6084 dump_ops_vector (stderr, ops);
6087 /* Bubble up return status from reassociate_bb. */
6089 static bool
6090 do_reassoc (void)
6092 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6093 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6096 /* Initialize the reassociation pass. */
6098 static void
6099 init_reassoc (void)
6101 int i;
6102 long rank = 2;
6103 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6105 /* Find the loops, so that we can prevent moving calculations in
6106 them. */
6107 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6109 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6111 next_operand_entry_id = 0;
6113 /* Reverse RPO (Reverse Post Order) will give us something where
6114 deeper loops come later. */
6115 pre_and_rev_post_order_compute (NULL, bbs, false);
6116 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
6117 operand_rank = new hash_map<tree, long>;
6119 /* Give each default definition a distinct rank. This includes
6120 parameters and the static chain. Walk backwards over all
6121 SSA names so that we get proper rank ordering according
6122 to tree_swap_operands_p. */
6123 for (i = num_ssa_names - 1; i > 0; --i)
6125 tree name = ssa_name (i);
6126 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6127 insert_operand_rank (name, ++rank);
6130 /* Set up rank for each BB */
6131 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6132 bb_rank[bbs[i]] = ++rank << 16;
6134 free (bbs);
6135 calculate_dominance_info (CDI_POST_DOMINATORS);
6136 plus_negates = vNULL;
6139 /* Cleanup after the reassociation pass, and print stats if
6140 requested. */
6142 static void
6143 fini_reassoc (void)
6145 statistics_counter_event (cfun, "Linearized",
6146 reassociate_stats.linearized);
6147 statistics_counter_event (cfun, "Constants eliminated",
6148 reassociate_stats.constants_eliminated);
6149 statistics_counter_event (cfun, "Ops eliminated",
6150 reassociate_stats.ops_eliminated);
6151 statistics_counter_event (cfun, "Statements rewritten",
6152 reassociate_stats.rewritten);
6153 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6154 reassociate_stats.pows_encountered);
6155 statistics_counter_event (cfun, "Built-in powi calls created",
6156 reassociate_stats.pows_created);
6158 delete operand_rank;
6159 operand_entry_pool.release ();
6160 free (bb_rank);
6161 plus_negates.release ();
6162 free_dominance_info (CDI_POST_DOMINATORS);
6163 loop_optimizer_finalize ();
6166 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6167 insertion of __builtin_powi calls.
6169 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6170 optimization of a gimple conditional. Otherwise returns zero. */
6172 static unsigned int
6173 execute_reassoc (bool insert_powi_p)
6175 reassoc_insert_powi_p = insert_powi_p;
6177 init_reassoc ();
6179 bool cfg_cleanup_needed;
6180 cfg_cleanup_needed = do_reassoc ();
6181 repropagate_negates ();
6182 branch_fixup ();
6184 fini_reassoc ();
6185 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6188 namespace {
6190 const pass_data pass_data_reassoc =
6192 GIMPLE_PASS, /* type */
6193 "reassoc", /* name */
6194 OPTGROUP_NONE, /* optinfo_flags */
6195 TV_TREE_REASSOC, /* tv_id */
6196 ( PROP_cfg | PROP_ssa ), /* properties_required */
6197 0, /* properties_provided */
6198 0, /* properties_destroyed */
6199 0, /* todo_flags_start */
6200 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6203 class pass_reassoc : public gimple_opt_pass
6205 public:
6206 pass_reassoc (gcc::context *ctxt)
6207 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6210 /* opt_pass methods: */
6211 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
6212 void set_pass_param (unsigned int n, bool param)
6214 gcc_assert (n == 0);
6215 insert_powi_p = param;
6217 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
6218 virtual unsigned int execute (function *)
6219 { return execute_reassoc (insert_powi_p); }
6221 private:
6222 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6223 point 3a in the pass header comment. */
6224 bool insert_powi_p;
6225 }; // class pass_reassoc
6227 } // anon namespace
6229 gimple_opt_pass *
6230 make_pass_reassoc (gcc::context *ctxt)
6232 return new pass_reassoc (ctxt);