match.pd: Relax some tree_nop_conversion_p
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob596920f5197ab20663e8bb7c50698352e78c1fec
1 /* Reassociation for trees.
2 Copyright (C) 2005-2016 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "tm_p.h"
33 #include "ssa.h"
34 #include "optabs-tree.h"
35 #include "gimple-pretty-print.h"
36 #include "diagnostic-core.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "cfganal.h"
40 #include "gimple-fold.h"
41 #include "tree-eh.h"
42 #include "gimple-iterator.h"
43 #include "gimplify-me.h"
44 #include "tree-cfg.h"
45 #include "tree-ssa-loop.h"
46 #include "flags.h"
47 #include "tree-ssa.h"
48 #include "langhooks.h"
49 #include "cfgloop.h"
50 #include "params.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
59 It consists of five steps:
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_*
69 3. Optimization of the operand lists, eliminating things like a +
70 -a, a & a, etc.
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
79 5. Repropagate negates, as nothing else will clean it up ATM.
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
100 a (1), b (1), c (1), d (2), e (2)
103 We start with our merge worklist empty, and the ops list with all of
104 those on it.
106 You want to first merge all leaves of the same rank, as much as
107 possible.
109 So first build a binary op of
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
121 Then build a binary op of d + e
122 mergetmp2 = d + e
124 and put mergetmp2 on the merge worklist.
126 so merge worklist = {mergetmp, c, mergetmp2}
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
131 So we have
133 build binary op
134 mergetmp3 = mergetmp + c
136 worklist = {mergetmp2, mergetmp3}
138 mergetmp4 = mergetmp2 + mergetmp3
140 worklist = {mergetmp4}
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
151 So why don't we do this?
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
159 mergetmp = op1 + op2
160 newstmt = mergetmp + op3
162 instead of
163 mergetmp = op2 + op3
164 newstmt = mergetmp + op1
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
168 can do.
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
176 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
177 point 3a in the pass header comment. */
178 static bool reassoc_insert_powi_p;
180 /* Statistics */
181 static struct
183 int linearized;
184 int constants_eliminated;
185 int ops_eliminated;
186 int rewritten;
187 int pows_encountered;
188 int pows_created;
189 } reassociate_stats;
191 /* Operator, rank pair. */
192 struct operand_entry
194 unsigned int rank;
195 int id;
196 tree op;
197 unsigned int count;
200 static object_allocator<operand_entry> operand_entry_pool
201 ("operand entry pool");
203 /* This is used to assign a unique ID to each struct operand_entry
204 so that qsort results are identical on different hosts. */
205 static int next_operand_entry_id;
207 /* Starting rank number for a given basic block, so that we can rank
208 operations using unmovable instructions in that BB based on the bb
209 depth. */
210 static long *bb_rank;
212 /* Operand->rank hashtable. */
213 static hash_map<tree, long> *operand_rank;
215 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
216 all basic blocks the CFG should be adjusted - basic blocks
217 split right after that SSA_NAME's definition statement and before
218 the only use, which must be a bit ior. */
219 static vec<tree> reassoc_branch_fixups;
221 /* Forward decls. */
222 static long get_rank (tree);
223 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
225 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
226 possibly added by gsi_remove. */
228 bool
229 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
231 gimple *stmt = gsi_stmt (*gsi);
233 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
234 return gsi_remove (gsi, true);
236 gimple_stmt_iterator prev = *gsi;
237 gsi_prev (&prev);
238 unsigned uid = gimple_uid (stmt);
239 basic_block bb = gimple_bb (stmt);
240 bool ret = gsi_remove (gsi, true);
241 if (!gsi_end_p (prev))
242 gsi_next (&prev);
243 else
244 prev = gsi_start_bb (bb);
245 gimple *end_stmt = gsi_stmt (*gsi);
246 while ((stmt = gsi_stmt (prev)) != end_stmt)
248 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
249 gimple_set_uid (stmt, uid);
250 gsi_next (&prev);
252 return ret;
255 /* Bias amount for loop-carried phis. We want this to be larger than
256 the depth of any reassociation tree we can see, but not larger than
257 the rank difference between two blocks. */
258 #define PHI_LOOP_BIAS (1 << 15)
260 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
261 an innermost loop, and the phi has only a single use which is inside
262 the loop, then the rank is the block rank of the loop latch plus an
263 extra bias for the loop-carried dependence. This causes expressions
264 calculated into an accumulator variable to be independent for each
265 iteration of the loop. If STMT is some other phi, the rank is the
266 block rank of its containing block. */
267 static long
268 phi_rank (gimple *stmt)
270 basic_block bb = gimple_bb (stmt);
271 struct loop *father = bb->loop_father;
272 tree res;
273 unsigned i;
274 use_operand_p use;
275 gimple *use_stmt;
277 /* We only care about real loops (those with a latch). */
278 if (!father->latch)
279 return bb_rank[bb->index];
281 /* Interesting phis must be in headers of innermost loops. */
282 if (bb != father->header
283 || father->inner)
284 return bb_rank[bb->index];
286 /* Ignore virtual SSA_NAMEs. */
287 res = gimple_phi_result (stmt);
288 if (virtual_operand_p (res))
289 return bb_rank[bb->index];
291 /* The phi definition must have a single use, and that use must be
292 within the loop. Otherwise this isn't an accumulator pattern. */
293 if (!single_imm_use (res, &use, &use_stmt)
294 || gimple_bb (use_stmt)->loop_father != father)
295 return bb_rank[bb->index];
297 /* Look for phi arguments from within the loop. If found, bias this phi. */
298 for (i = 0; i < gimple_phi_num_args (stmt); i++)
300 tree arg = gimple_phi_arg_def (stmt, i);
301 if (TREE_CODE (arg) == SSA_NAME
302 && !SSA_NAME_IS_DEFAULT_DEF (arg))
304 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
305 if (gimple_bb (def_stmt)->loop_father == father)
306 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
310 /* Must be an uninteresting phi. */
311 return bb_rank[bb->index];
314 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
315 loop-carried dependence of an innermost loop, return TRUE; else
316 return FALSE. */
317 static bool
318 loop_carried_phi (tree exp)
320 gimple *phi_stmt;
321 long block_rank;
323 if (TREE_CODE (exp) != SSA_NAME
324 || SSA_NAME_IS_DEFAULT_DEF (exp))
325 return false;
327 phi_stmt = SSA_NAME_DEF_STMT (exp);
329 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
330 return false;
332 /* Non-loop-carried phis have block rank. Loop-carried phis have
333 an additional bias added in. If this phi doesn't have block rank,
334 it's biased and should not be propagated. */
335 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
337 if (phi_rank (phi_stmt) != block_rank)
338 return true;
340 return false;
343 /* Return the maximum of RANK and the rank that should be propagated
344 from expression OP. For most operands, this is just the rank of OP.
345 For loop-carried phis, the value is zero to avoid undoing the bias
346 in favor of the phi. */
347 static long
348 propagate_rank (long rank, tree op)
350 long op_rank;
352 if (loop_carried_phi (op))
353 return rank;
355 op_rank = get_rank (op);
357 return MAX (rank, op_rank);
360 /* Look up the operand rank structure for expression E. */
362 static inline long
363 find_operand_rank (tree e)
365 long *slot = operand_rank->get (e);
366 return slot ? *slot : -1;
369 /* Insert {E,RANK} into the operand rank hashtable. */
371 static inline void
372 insert_operand_rank (tree e, long rank)
374 gcc_assert (rank > 0);
375 gcc_assert (!operand_rank->put (e, rank));
378 /* Given an expression E, return the rank of the expression. */
380 static long
381 get_rank (tree e)
383 /* SSA_NAME's have the rank of the expression they are the result
385 For globals and uninitialized values, the rank is 0.
386 For function arguments, use the pre-setup rank.
387 For PHI nodes, stores, asm statements, etc, we use the rank of
388 the BB.
389 For simple operations, the rank is the maximum rank of any of
390 its operands, or the bb_rank, whichever is less.
391 I make no claims that this is optimal, however, it gives good
392 results. */
394 /* We make an exception to the normal ranking system to break
395 dependences of accumulator variables in loops. Suppose we
396 have a simple one-block loop containing:
398 x_1 = phi(x_0, x_2)
399 b = a + x_1
400 c = b + d
401 x_2 = c + e
403 As shown, each iteration of the calculation into x is fully
404 dependent upon the iteration before it. We would prefer to
405 see this in the form:
407 x_1 = phi(x_0, x_2)
408 b = a + d
409 c = b + e
410 x_2 = c + x_1
412 If the loop is unrolled, the calculations of b and c from
413 different iterations can be interleaved.
415 To obtain this result during reassociation, we bias the rank
416 of the phi definition x_1 upward, when it is recognized as an
417 accumulator pattern. The artificial rank causes it to be
418 added last, providing the desired independence. */
420 if (TREE_CODE (e) == SSA_NAME)
422 ssa_op_iter iter;
423 gimple *stmt;
424 long rank;
425 tree op;
427 if (SSA_NAME_IS_DEFAULT_DEF (e))
428 return find_operand_rank (e);
430 stmt = SSA_NAME_DEF_STMT (e);
431 if (gimple_code (stmt) == GIMPLE_PHI)
432 return phi_rank (stmt);
434 if (!is_gimple_assign (stmt))
435 return bb_rank[gimple_bb (stmt)->index];
437 /* If we already have a rank for this expression, use that. */
438 rank = find_operand_rank (e);
439 if (rank != -1)
440 return rank;
442 /* Otherwise, find the maximum rank for the operands. As an
443 exception, remove the bias from loop-carried phis when propagating
444 the rank so that dependent operations are not also biased. */
445 /* Simply walk over all SSA uses - this takes advatage of the
446 fact that non-SSA operands are is_gimple_min_invariant and
447 thus have rank 0. */
448 rank = 0;
449 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
450 rank = propagate_rank (rank, op);
452 if (dump_file && (dump_flags & TDF_DETAILS))
454 fprintf (dump_file, "Rank for ");
455 print_generic_expr (dump_file, e, 0);
456 fprintf (dump_file, " is %ld\n", (rank + 1));
459 /* Note the rank in the hashtable so we don't recompute it. */
460 insert_operand_rank (e, (rank + 1));
461 return (rank + 1);
464 /* Constants, globals, etc., are rank 0 */
465 return 0;
469 /* We want integer ones to end up last no matter what, since they are
470 the ones we can do the most with. */
471 #define INTEGER_CONST_TYPE 1 << 3
472 #define FLOAT_CONST_TYPE 1 << 2
473 #define OTHER_CONST_TYPE 1 << 1
475 /* Classify an invariant tree into integer, float, or other, so that
476 we can sort them to be near other constants of the same type. */
477 static inline int
478 constant_type (tree t)
480 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
481 return INTEGER_CONST_TYPE;
482 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
483 return FLOAT_CONST_TYPE;
484 else
485 return OTHER_CONST_TYPE;
488 /* qsort comparison function to sort operand entries PA and PB by rank
489 so that the sorted array is ordered by rank in decreasing order. */
490 static int
491 sort_by_operand_rank (const void *pa, const void *pb)
493 const operand_entry *oea = *(const operand_entry *const *)pa;
494 const operand_entry *oeb = *(const operand_entry *const *)pb;
496 /* It's nicer for optimize_expression if constants that are likely
497 to fold when added/multiplied//whatever are put next to each
498 other. Since all constants have rank 0, order them by type. */
499 if (oeb->rank == 0 && oea->rank == 0)
501 if (constant_type (oeb->op) != constant_type (oea->op))
502 return constant_type (oeb->op) - constant_type (oea->op);
503 else
504 /* To make sorting result stable, we use unique IDs to determine
505 order. */
506 return oeb->id - oea->id;
509 /* Lastly, make sure the versions that are the same go next to each
510 other. */
511 if ((oeb->rank - oea->rank == 0)
512 && TREE_CODE (oea->op) == SSA_NAME
513 && TREE_CODE (oeb->op) == SSA_NAME)
515 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
516 versions of removed SSA_NAMEs, so if possible, prefer to sort
517 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
518 See PR60418. */
519 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
520 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
521 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
523 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
524 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
525 basic_block bba = gimple_bb (stmta);
526 basic_block bbb = gimple_bb (stmtb);
527 if (bbb != bba)
529 if (bb_rank[bbb->index] != bb_rank[bba->index])
530 return bb_rank[bbb->index] - bb_rank[bba->index];
532 else
534 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
535 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
536 if (da != db)
537 return da ? 1 : -1;
541 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
542 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
543 else
544 return oeb->id - oea->id;
547 if (oeb->rank != oea->rank)
548 return oeb->rank - oea->rank;
549 else
550 return oeb->id - oea->id;
553 /* Add an operand entry to *OPS for the tree operand OP. */
555 static void
556 add_to_ops_vec (vec<operand_entry *> *ops, tree op)
558 operand_entry *oe = operand_entry_pool.allocate ();
560 oe->op = op;
561 oe->rank = get_rank (op);
562 oe->id = next_operand_entry_id++;
563 oe->count = 1;
564 ops->safe_push (oe);
567 /* Add an operand entry to *OPS for the tree operand OP with repeat
568 count REPEAT. */
570 static void
571 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
572 HOST_WIDE_INT repeat)
574 operand_entry *oe = operand_entry_pool.allocate ();
576 oe->op = op;
577 oe->rank = get_rank (op);
578 oe->id = next_operand_entry_id++;
579 oe->count = repeat;
580 ops->safe_push (oe);
582 reassociate_stats.pows_encountered++;
585 /* Return true if STMT is reassociable operation containing a binary
586 operation with tree code CODE, and is inside LOOP. */
588 static bool
589 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
591 basic_block bb = gimple_bb (stmt);
593 if (gimple_bb (stmt) == NULL)
594 return false;
596 if (!flow_bb_inside_loop_p (loop, bb))
597 return false;
599 if (is_gimple_assign (stmt)
600 && gimple_assign_rhs_code (stmt) == code
601 && has_single_use (gimple_assign_lhs (stmt)))
602 return true;
604 return false;
608 /* Return true if STMT is a nop-conversion. */
610 static bool
611 gimple_nop_conversion_p (gimple *stmt)
613 if (gassign *ass = dyn_cast <gassign *> (stmt))
615 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
616 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
617 TREE_TYPE (gimple_assign_rhs1 (ass))))
618 return true;
620 return false;
623 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
624 operand of the negate operation. Otherwise, return NULL. */
626 static tree
627 get_unary_op (tree name, enum tree_code opcode)
629 gimple *stmt = SSA_NAME_DEF_STMT (name);
631 /* Look through nop conversions (sign changes). */
632 if (gimple_nop_conversion_p (stmt)
633 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
634 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
636 if (!is_gimple_assign (stmt))
637 return NULL_TREE;
639 if (gimple_assign_rhs_code (stmt) == opcode)
640 return gimple_assign_rhs1 (stmt);
641 return NULL_TREE;
644 /* Return true if OP1 and OP2 have the same value if casted to either type. */
646 static bool
647 ops_equal_values_p (tree op1, tree op2)
649 if (op1 == op2)
650 return true;
652 tree orig_op1 = op1;
653 if (TREE_CODE (op1) == SSA_NAME)
655 gimple *stmt = SSA_NAME_DEF_STMT (op1);
656 if (gimple_nop_conversion_p (stmt))
658 op1 = gimple_assign_rhs1 (stmt);
659 if (op1 == op2)
660 return true;
664 if (TREE_CODE (op2) == SSA_NAME)
666 gimple *stmt = SSA_NAME_DEF_STMT (op2);
667 if (gimple_nop_conversion_p (stmt))
669 op2 = gimple_assign_rhs1 (stmt);
670 if (op1 == op2
671 || orig_op1 == op2)
672 return true;
676 return false;
680 /* If CURR and LAST are a pair of ops that OPCODE allows us to
681 eliminate through equivalences, do so, remove them from OPS, and
682 return true. Otherwise, return false. */
684 static bool
685 eliminate_duplicate_pair (enum tree_code opcode,
686 vec<operand_entry *> *ops,
687 bool *all_done,
688 unsigned int i,
689 operand_entry *curr,
690 operand_entry *last)
693 /* If we have two of the same op, and the opcode is & |, min, or max,
694 we can eliminate one of them.
695 If we have two of the same op, and the opcode is ^, we can
696 eliminate both of them. */
698 if (last && last->op == curr->op)
700 switch (opcode)
702 case MAX_EXPR:
703 case MIN_EXPR:
704 case BIT_IOR_EXPR:
705 case BIT_AND_EXPR:
706 if (dump_file && (dump_flags & TDF_DETAILS))
708 fprintf (dump_file, "Equivalence: ");
709 print_generic_expr (dump_file, curr->op, 0);
710 fprintf (dump_file, " [&|minmax] ");
711 print_generic_expr (dump_file, last->op, 0);
712 fprintf (dump_file, " -> ");
713 print_generic_stmt (dump_file, last->op, 0);
716 ops->ordered_remove (i);
717 reassociate_stats.ops_eliminated ++;
719 return true;
721 case BIT_XOR_EXPR:
722 if (dump_file && (dump_flags & TDF_DETAILS))
724 fprintf (dump_file, "Equivalence: ");
725 print_generic_expr (dump_file, curr->op, 0);
726 fprintf (dump_file, " ^ ");
727 print_generic_expr (dump_file, last->op, 0);
728 fprintf (dump_file, " -> nothing\n");
731 reassociate_stats.ops_eliminated += 2;
733 if (ops->length () == 2)
735 ops->truncate (0);
736 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
737 *all_done = true;
739 else
741 ops->ordered_remove (i-1);
742 ops->ordered_remove (i-1);
745 return true;
747 default:
748 break;
751 return false;
754 static vec<tree> plus_negates;
756 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
757 expression, look in OPS for a corresponding positive operation to cancel
758 it out. If we find one, remove the other from OPS, replace
759 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
760 return false. */
762 static bool
763 eliminate_plus_minus_pair (enum tree_code opcode,
764 vec<operand_entry *> *ops,
765 unsigned int currindex,
766 operand_entry *curr)
768 tree negateop;
769 tree notop;
770 unsigned int i;
771 operand_entry *oe;
773 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
774 return false;
776 negateop = get_unary_op (curr->op, NEGATE_EXPR);
777 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
778 if (negateop == NULL_TREE && notop == NULL_TREE)
779 return false;
781 /* Any non-negated version will have a rank that is one less than
782 the current rank. So once we hit those ranks, if we don't find
783 one, we can stop. */
785 for (i = currindex + 1;
786 ops->iterate (i, &oe)
787 && oe->rank >= curr->rank - 1 ;
788 i++)
790 if (negateop
791 && ops_equal_values_p (oe->op, negateop))
793 if (dump_file && (dump_flags & TDF_DETAILS))
795 fprintf (dump_file, "Equivalence: ");
796 print_generic_expr (dump_file, negateop, 0);
797 fprintf (dump_file, " + -");
798 print_generic_expr (dump_file, oe->op, 0);
799 fprintf (dump_file, " -> 0\n");
802 ops->ordered_remove (i);
803 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
804 ops->ordered_remove (currindex);
805 reassociate_stats.ops_eliminated ++;
807 return true;
809 else if (notop
810 && ops_equal_values_p (oe->op, notop))
812 tree op_type = TREE_TYPE (oe->op);
814 if (dump_file && (dump_flags & TDF_DETAILS))
816 fprintf (dump_file, "Equivalence: ");
817 print_generic_expr (dump_file, notop, 0);
818 fprintf (dump_file, " + ~");
819 print_generic_expr (dump_file, oe->op, 0);
820 fprintf (dump_file, " -> -1\n");
823 ops->ordered_remove (i);
824 add_to_ops_vec (ops, build_all_ones_cst (op_type));
825 ops->ordered_remove (currindex);
826 reassociate_stats.ops_eliminated ++;
828 return true;
832 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
833 save it for later inspection in repropagate_negates(). */
834 if (negateop != NULL_TREE
835 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
836 plus_negates.safe_push (curr->op);
838 return false;
841 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
842 bitwise not expression, look in OPS for a corresponding operand to
843 cancel it out. If we find one, remove the other from OPS, replace
844 OPS[CURRINDEX] with 0, and return true. Otherwise, return
845 false. */
847 static bool
848 eliminate_not_pairs (enum tree_code opcode,
849 vec<operand_entry *> *ops,
850 unsigned int currindex,
851 operand_entry *curr)
853 tree notop;
854 unsigned int i;
855 operand_entry *oe;
857 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
858 || TREE_CODE (curr->op) != SSA_NAME)
859 return false;
861 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
862 if (notop == NULL_TREE)
863 return false;
865 /* Any non-not version will have a rank that is one less than
866 the current rank. So once we hit those ranks, if we don't find
867 one, we can stop. */
869 for (i = currindex + 1;
870 ops->iterate (i, &oe)
871 && oe->rank >= curr->rank - 1;
872 i++)
874 if (oe->op == notop)
876 if (dump_file && (dump_flags & TDF_DETAILS))
878 fprintf (dump_file, "Equivalence: ");
879 print_generic_expr (dump_file, notop, 0);
880 if (opcode == BIT_AND_EXPR)
881 fprintf (dump_file, " & ~");
882 else if (opcode == BIT_IOR_EXPR)
883 fprintf (dump_file, " | ~");
884 print_generic_expr (dump_file, oe->op, 0);
885 if (opcode == BIT_AND_EXPR)
886 fprintf (dump_file, " -> 0\n");
887 else if (opcode == BIT_IOR_EXPR)
888 fprintf (dump_file, " -> -1\n");
891 if (opcode == BIT_AND_EXPR)
892 oe->op = build_zero_cst (TREE_TYPE (oe->op));
893 else if (opcode == BIT_IOR_EXPR)
894 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
896 reassociate_stats.ops_eliminated += ops->length () - 1;
897 ops->truncate (0);
898 ops->quick_push (oe);
899 return true;
903 return false;
906 /* Use constant value that may be present in OPS to try to eliminate
907 operands. Note that this function is only really used when we've
908 eliminated ops for other reasons, or merged constants. Across
909 single statements, fold already does all of this, plus more. There
910 is little point in duplicating logic, so I've only included the
911 identities that I could ever construct testcases to trigger. */
913 static void
914 eliminate_using_constants (enum tree_code opcode,
915 vec<operand_entry *> *ops)
917 operand_entry *oelast = ops->last ();
918 tree type = TREE_TYPE (oelast->op);
920 if (oelast->rank == 0
921 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
923 switch (opcode)
925 case BIT_AND_EXPR:
926 if (integer_zerop (oelast->op))
928 if (ops->length () != 1)
930 if (dump_file && (dump_flags & TDF_DETAILS))
931 fprintf (dump_file, "Found & 0, removing all other ops\n");
933 reassociate_stats.ops_eliminated += ops->length () - 1;
935 ops->truncate (0);
936 ops->quick_push (oelast);
937 return;
940 else if (integer_all_onesp (oelast->op))
942 if (ops->length () != 1)
944 if (dump_file && (dump_flags & TDF_DETAILS))
945 fprintf (dump_file, "Found & -1, removing\n");
946 ops->pop ();
947 reassociate_stats.ops_eliminated++;
950 break;
951 case BIT_IOR_EXPR:
952 if (integer_all_onesp (oelast->op))
954 if (ops->length () != 1)
956 if (dump_file && (dump_flags & TDF_DETAILS))
957 fprintf (dump_file, "Found | -1, removing all other ops\n");
959 reassociate_stats.ops_eliminated += ops->length () - 1;
961 ops->truncate (0);
962 ops->quick_push (oelast);
963 return;
966 else if (integer_zerop (oelast->op))
968 if (ops->length () != 1)
970 if (dump_file && (dump_flags & TDF_DETAILS))
971 fprintf (dump_file, "Found | 0, removing\n");
972 ops->pop ();
973 reassociate_stats.ops_eliminated++;
976 break;
977 case MULT_EXPR:
978 if (integer_zerop (oelast->op)
979 || (FLOAT_TYPE_P (type)
980 && !HONOR_NANS (type)
981 && !HONOR_SIGNED_ZEROS (type)
982 && real_zerop (oelast->op)))
984 if (ops->length () != 1)
986 if (dump_file && (dump_flags & TDF_DETAILS))
987 fprintf (dump_file, "Found * 0, removing all other ops\n");
989 reassociate_stats.ops_eliminated += ops->length () - 1;
990 ops->truncate (1);
991 ops->quick_push (oelast);
992 return;
995 else if (integer_onep (oelast->op)
996 || (FLOAT_TYPE_P (type)
997 && !HONOR_SNANS (type)
998 && real_onep (oelast->op)))
1000 if (ops->length () != 1)
1002 if (dump_file && (dump_flags & TDF_DETAILS))
1003 fprintf (dump_file, "Found * 1, removing\n");
1004 ops->pop ();
1005 reassociate_stats.ops_eliminated++;
1006 return;
1009 break;
1010 case BIT_XOR_EXPR:
1011 case PLUS_EXPR:
1012 case MINUS_EXPR:
1013 if (integer_zerop (oelast->op)
1014 || (FLOAT_TYPE_P (type)
1015 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1016 && fold_real_zero_addition_p (type, oelast->op,
1017 opcode == MINUS_EXPR)))
1019 if (ops->length () != 1)
1021 if (dump_file && (dump_flags & TDF_DETAILS))
1022 fprintf (dump_file, "Found [|^+] 0, removing\n");
1023 ops->pop ();
1024 reassociate_stats.ops_eliminated++;
1025 return;
1028 break;
1029 default:
1030 break;
1036 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1037 bool, bool);
1039 /* Structure for tracking and counting operands. */
1040 struct oecount {
1041 int cnt;
1042 int id;
1043 enum tree_code oecode;
1044 tree op;
1048 /* The heap for the oecount hashtable and the sorted list of operands. */
1049 static vec<oecount> cvec;
1052 /* Oecount hashtable helpers. */
1054 struct oecount_hasher : int_hash <int, 0, 1>
1056 static inline hashval_t hash (int);
1057 static inline bool equal (int, int);
1060 /* Hash function for oecount. */
1062 inline hashval_t
1063 oecount_hasher::hash (int p)
1065 const oecount *c = &cvec[p - 42];
1066 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1069 /* Comparison function for oecount. */
1071 inline bool
1072 oecount_hasher::equal (int p1, int p2)
1074 const oecount *c1 = &cvec[p1 - 42];
1075 const oecount *c2 = &cvec[p2 - 42];
1076 return (c1->oecode == c2->oecode
1077 && c1->op == c2->op);
1080 /* Comparison function for qsort sorting oecount elements by count. */
1082 static int
1083 oecount_cmp (const void *p1, const void *p2)
1085 const oecount *c1 = (const oecount *)p1;
1086 const oecount *c2 = (const oecount *)p2;
1087 if (c1->cnt != c2->cnt)
1088 return c1->cnt - c2->cnt;
1089 else
1090 /* If counts are identical, use unique IDs to stabilize qsort. */
1091 return c1->id - c2->id;
1094 /* Return TRUE iff STMT represents a builtin call that raises OP
1095 to some exponent. */
1097 static bool
1098 stmt_is_power_of_op (gimple *stmt, tree op)
1100 if (!is_gimple_call (stmt))
1101 return false;
1103 switch (gimple_call_combined_fn (stmt))
1105 CASE_CFN_POW:
1106 CASE_CFN_POWI:
1107 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1109 default:
1110 return false;
1114 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1115 in place and return the result. Assumes that stmt_is_power_of_op
1116 was previously called for STMT and returned TRUE. */
1118 static HOST_WIDE_INT
1119 decrement_power (gimple *stmt)
1121 REAL_VALUE_TYPE c, cint;
1122 HOST_WIDE_INT power;
1123 tree arg1;
1125 switch (gimple_call_combined_fn (stmt))
1127 CASE_CFN_POW:
1128 arg1 = gimple_call_arg (stmt, 1);
1129 c = TREE_REAL_CST (arg1);
1130 power = real_to_integer (&c) - 1;
1131 real_from_integer (&cint, VOIDmode, power, SIGNED);
1132 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1133 return power;
1135 CASE_CFN_POWI:
1136 arg1 = gimple_call_arg (stmt, 1);
1137 power = TREE_INT_CST_LOW (arg1) - 1;
1138 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1139 return power;
1141 default:
1142 gcc_unreachable ();
1146 /* Find the single immediate use of STMT's LHS, and replace it
1147 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1148 replace *DEF with OP as well. */
1150 static void
1151 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1153 tree lhs;
1154 gimple *use_stmt;
1155 use_operand_p use;
1156 gimple_stmt_iterator gsi;
1158 if (is_gimple_call (stmt))
1159 lhs = gimple_call_lhs (stmt);
1160 else
1161 lhs = gimple_assign_lhs (stmt);
1163 gcc_assert (has_single_use (lhs));
1164 single_imm_use (lhs, &use, &use_stmt);
1165 if (lhs == *def)
1166 *def = op;
1167 SET_USE (use, op);
1168 if (TREE_CODE (op) != SSA_NAME)
1169 update_stmt (use_stmt);
1170 gsi = gsi_for_stmt (stmt);
1171 unlink_stmt_vdef (stmt);
1172 reassoc_remove_stmt (&gsi);
1173 release_defs (stmt);
1176 /* Walks the linear chain with result *DEF searching for an operation
1177 with operand OP and code OPCODE removing that from the chain. *DEF
1178 is updated if there is only one operand but no operation left. */
1180 static void
1181 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1183 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1187 tree name;
1189 if (opcode == MULT_EXPR
1190 && stmt_is_power_of_op (stmt, op))
1192 if (decrement_power (stmt) == 1)
1193 propagate_op_to_single_use (op, stmt, def);
1194 return;
1197 name = gimple_assign_rhs1 (stmt);
1199 /* If this is the operation we look for and one of the operands
1200 is ours simply propagate the other operand into the stmts
1201 single use. */
1202 if (gimple_assign_rhs_code (stmt) == opcode
1203 && (name == op
1204 || gimple_assign_rhs2 (stmt) == op))
1206 if (name == op)
1207 name = gimple_assign_rhs2 (stmt);
1208 propagate_op_to_single_use (name, stmt, def);
1209 return;
1212 /* We might have a multiply of two __builtin_pow* calls, and
1213 the operand might be hiding in the rightmost one. */
1214 if (opcode == MULT_EXPR
1215 && gimple_assign_rhs_code (stmt) == opcode
1216 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1217 && has_single_use (gimple_assign_rhs2 (stmt)))
1219 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1220 if (stmt_is_power_of_op (stmt2, op))
1222 if (decrement_power (stmt2) == 1)
1223 propagate_op_to_single_use (op, stmt2, def);
1224 return;
1228 /* Continue walking the chain. */
1229 gcc_assert (name != op
1230 && TREE_CODE (name) == SSA_NAME);
1231 stmt = SSA_NAME_DEF_STMT (name);
1233 while (1);
1236 /* Returns true if statement S1 dominates statement S2. Like
1237 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1239 static bool
1240 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1242 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1244 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1245 SSA_NAME. Assume it lives at the beginning of function and
1246 thus dominates everything. */
1247 if (!bb1 || s1 == s2)
1248 return true;
1250 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1251 if (!bb2)
1252 return false;
1254 if (bb1 == bb2)
1256 /* PHIs in the same basic block are assumed to be
1257 executed all in parallel, if only one stmt is a PHI,
1258 it dominates the other stmt in the same basic block. */
1259 if (gimple_code (s1) == GIMPLE_PHI)
1260 return true;
1262 if (gimple_code (s2) == GIMPLE_PHI)
1263 return false;
1265 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1267 if (gimple_uid (s1) < gimple_uid (s2))
1268 return true;
1270 if (gimple_uid (s1) > gimple_uid (s2))
1271 return false;
1273 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1274 unsigned int uid = gimple_uid (s1);
1275 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1277 gimple *s = gsi_stmt (gsi);
1278 if (gimple_uid (s) != uid)
1279 break;
1280 if (s == s2)
1281 return true;
1284 return false;
1287 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1290 /* Insert STMT after INSERT_POINT. */
1292 static void
1293 insert_stmt_after (gimple *stmt, gimple *insert_point)
1295 gimple_stmt_iterator gsi;
1296 basic_block bb;
1298 if (gimple_code (insert_point) == GIMPLE_PHI)
1299 bb = gimple_bb (insert_point);
1300 else if (!stmt_ends_bb_p (insert_point))
1302 gsi = gsi_for_stmt (insert_point);
1303 gimple_set_uid (stmt, gimple_uid (insert_point));
1304 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1305 return;
1307 else
1308 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1309 thus if it must end a basic block, it should be a call that can
1310 throw, or some assignment that can throw. If it throws, the LHS
1311 of it will not be initialized though, so only valid places using
1312 the SSA_NAME should be dominated by the fallthru edge. */
1313 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1314 gsi = gsi_after_labels (bb);
1315 if (gsi_end_p (gsi))
1317 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1318 gimple_set_uid (stmt,
1319 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1321 else
1322 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1323 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1326 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1327 the result. Places the statement after the definition of either
1328 OP1 or OP2. Returns the new statement. */
1330 static gimple *
1331 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1333 gimple *op1def = NULL, *op2def = NULL;
1334 gimple_stmt_iterator gsi;
1335 tree op;
1336 gassign *sum;
1338 /* Create the addition statement. */
1339 op = make_ssa_name (type);
1340 sum = gimple_build_assign (op, opcode, op1, op2);
1342 /* Find an insertion place and insert. */
1343 if (TREE_CODE (op1) == SSA_NAME)
1344 op1def = SSA_NAME_DEF_STMT (op1);
1345 if (TREE_CODE (op2) == SSA_NAME)
1346 op2def = SSA_NAME_DEF_STMT (op2);
1347 if ((!op1def || gimple_nop_p (op1def))
1348 && (!op2def || gimple_nop_p (op2def)))
1350 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1351 if (gsi_end_p (gsi))
1353 gimple_stmt_iterator gsi2
1354 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1355 gimple_set_uid (sum,
1356 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1358 else
1359 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1360 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1362 else
1364 gimple *insert_point;
1365 if ((!op1def || gimple_nop_p (op1def))
1366 || (op2def && !gimple_nop_p (op2def)
1367 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1368 insert_point = op2def;
1369 else
1370 insert_point = op1def;
1371 insert_stmt_after (sum, insert_point);
1373 update_stmt (sum);
1375 return sum;
1378 /* Perform un-distribution of divisions and multiplications.
1379 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1380 to (A + B) / X for real X.
1382 The algorithm is organized as follows.
1384 - First we walk the addition chain *OPS looking for summands that
1385 are defined by a multiplication or a real division. This results
1386 in the candidates bitmap with relevant indices into *OPS.
1388 - Second we build the chains of multiplications or divisions for
1389 these candidates, counting the number of occurrences of (operand, code)
1390 pairs in all of the candidates chains.
1392 - Third we sort the (operand, code) pairs by number of occurrence and
1393 process them starting with the pair with the most uses.
1395 * For each such pair we walk the candidates again to build a
1396 second candidate bitmap noting all multiplication/division chains
1397 that have at least one occurrence of (operand, code).
1399 * We build an alternate addition chain only covering these
1400 candidates with one (operand, code) operation removed from their
1401 multiplication/division chain.
1403 * The first candidate gets replaced by the alternate addition chain
1404 multiplied/divided by the operand.
1406 * All candidate chains get disabled for further processing and
1407 processing of (operand, code) pairs continues.
1409 The alternate addition chains built are re-processed by the main
1410 reassociation algorithm which allows optimizing a * x * y + b * y * x
1411 to (a + b ) * x * y in one invocation of the reassociation pass. */
1413 static bool
1414 undistribute_ops_list (enum tree_code opcode,
1415 vec<operand_entry *> *ops, struct loop *loop)
1417 unsigned int length = ops->length ();
1418 operand_entry *oe1;
1419 unsigned i, j;
1420 sbitmap candidates, candidates2;
1421 unsigned nr_candidates, nr_candidates2;
1422 sbitmap_iterator sbi0;
1423 vec<operand_entry *> *subops;
1424 bool changed = false;
1425 int next_oecount_id = 0;
1427 if (length <= 1
1428 || opcode != PLUS_EXPR)
1429 return false;
1431 /* Build a list of candidates to process. */
1432 candidates = sbitmap_alloc (length);
1433 bitmap_clear (candidates);
1434 nr_candidates = 0;
1435 FOR_EACH_VEC_ELT (*ops, i, oe1)
1437 enum tree_code dcode;
1438 gimple *oe1def;
1440 if (TREE_CODE (oe1->op) != SSA_NAME)
1441 continue;
1442 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1443 if (!is_gimple_assign (oe1def))
1444 continue;
1445 dcode = gimple_assign_rhs_code (oe1def);
1446 if ((dcode != MULT_EXPR
1447 && dcode != RDIV_EXPR)
1448 || !is_reassociable_op (oe1def, dcode, loop))
1449 continue;
1451 bitmap_set_bit (candidates, i);
1452 nr_candidates++;
1455 if (nr_candidates < 2)
1457 sbitmap_free (candidates);
1458 return false;
1461 if (dump_file && (dump_flags & TDF_DETAILS))
1463 fprintf (dump_file, "searching for un-distribute opportunities ");
1464 print_generic_expr (dump_file,
1465 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1466 fprintf (dump_file, " %d\n", nr_candidates);
1469 /* Build linearized sub-operand lists and the counting table. */
1470 cvec.create (0);
1472 hash_table<oecount_hasher> ctable (15);
1474 /* ??? Macro arguments cannot have multi-argument template types in
1475 them. This typedef is needed to workaround that limitation. */
1476 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1477 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1478 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1480 gimple *oedef;
1481 enum tree_code oecode;
1482 unsigned j;
1484 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1485 oecode = gimple_assign_rhs_code (oedef);
1486 linearize_expr_tree (&subops[i], oedef,
1487 associative_tree_code (oecode), false);
1489 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1491 oecount c;
1492 int *slot;
1493 int idx;
1494 c.oecode = oecode;
1495 c.cnt = 1;
1496 c.id = next_oecount_id++;
1497 c.op = oe1->op;
1498 cvec.safe_push (c);
1499 idx = cvec.length () + 41;
1500 slot = ctable.find_slot (idx, INSERT);
1501 if (!*slot)
1503 *slot = idx;
1505 else
1507 cvec.pop ();
1508 cvec[*slot - 42].cnt++;
1513 /* Sort the counting table. */
1514 cvec.qsort (oecount_cmp);
1516 if (dump_file && (dump_flags & TDF_DETAILS))
1518 oecount *c;
1519 fprintf (dump_file, "Candidates:\n");
1520 FOR_EACH_VEC_ELT (cvec, j, c)
1522 fprintf (dump_file, " %u %s: ", c->cnt,
1523 c->oecode == MULT_EXPR
1524 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1525 print_generic_expr (dump_file, c->op, 0);
1526 fprintf (dump_file, "\n");
1530 /* Process the (operand, code) pairs in order of most occurrence. */
1531 candidates2 = sbitmap_alloc (length);
1532 while (!cvec.is_empty ())
1534 oecount *c = &cvec.last ();
1535 if (c->cnt < 2)
1536 break;
1538 /* Now collect the operands in the outer chain that contain
1539 the common operand in their inner chain. */
1540 bitmap_clear (candidates2);
1541 nr_candidates2 = 0;
1542 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1544 gimple *oedef;
1545 enum tree_code oecode;
1546 unsigned j;
1547 tree op = (*ops)[i]->op;
1549 /* If we undistributed in this chain already this may be
1550 a constant. */
1551 if (TREE_CODE (op) != SSA_NAME)
1552 continue;
1554 oedef = SSA_NAME_DEF_STMT (op);
1555 oecode = gimple_assign_rhs_code (oedef);
1556 if (oecode != c->oecode)
1557 continue;
1559 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1561 if (oe1->op == c->op)
1563 bitmap_set_bit (candidates2, i);
1564 ++nr_candidates2;
1565 break;
1570 if (nr_candidates2 >= 2)
1572 operand_entry *oe1, *oe2;
1573 gimple *prod;
1574 int first = bitmap_first_set_bit (candidates2);
1576 /* Build the new addition chain. */
1577 oe1 = (*ops)[first];
1578 if (dump_file && (dump_flags & TDF_DETAILS))
1580 fprintf (dump_file, "Building (");
1581 print_generic_expr (dump_file, oe1->op, 0);
1583 zero_one_operation (&oe1->op, c->oecode, c->op);
1584 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1586 gimple *sum;
1587 oe2 = (*ops)[i];
1588 if (dump_file && (dump_flags & TDF_DETAILS))
1590 fprintf (dump_file, " + ");
1591 print_generic_expr (dump_file, oe2->op, 0);
1593 zero_one_operation (&oe2->op, c->oecode, c->op);
1594 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1595 oe1->op, oe2->op, opcode);
1596 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1597 oe2->rank = 0;
1598 oe1->op = gimple_get_lhs (sum);
1601 /* Apply the multiplication/division. */
1602 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1603 oe1->op, c->op, c->oecode);
1604 if (dump_file && (dump_flags & TDF_DETAILS))
1606 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1607 print_generic_expr (dump_file, c->op, 0);
1608 fprintf (dump_file, "\n");
1611 /* Record it in the addition chain and disable further
1612 undistribution with this op. */
1613 oe1->op = gimple_assign_lhs (prod);
1614 oe1->rank = get_rank (oe1->op);
1615 subops[first].release ();
1617 changed = true;
1620 cvec.pop ();
1623 for (i = 0; i < ops->length (); ++i)
1624 subops[i].release ();
1625 free (subops);
1626 cvec.release ();
1627 sbitmap_free (candidates);
1628 sbitmap_free (candidates2);
1630 return changed;
1633 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1634 expression, examine the other OPS to see if any of them are comparisons
1635 of the same values, which we may be able to combine or eliminate.
1636 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1638 static bool
1639 eliminate_redundant_comparison (enum tree_code opcode,
1640 vec<operand_entry *> *ops,
1641 unsigned int currindex,
1642 operand_entry *curr)
1644 tree op1, op2;
1645 enum tree_code lcode, rcode;
1646 gimple *def1, *def2;
1647 int i;
1648 operand_entry *oe;
1650 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1651 return false;
1653 /* Check that CURR is a comparison. */
1654 if (TREE_CODE (curr->op) != SSA_NAME)
1655 return false;
1656 def1 = SSA_NAME_DEF_STMT (curr->op);
1657 if (!is_gimple_assign (def1))
1658 return false;
1659 lcode = gimple_assign_rhs_code (def1);
1660 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1661 return false;
1662 op1 = gimple_assign_rhs1 (def1);
1663 op2 = gimple_assign_rhs2 (def1);
1665 /* Now look for a similar comparison in the remaining OPS. */
1666 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1668 tree t;
1670 if (TREE_CODE (oe->op) != SSA_NAME)
1671 continue;
1672 def2 = SSA_NAME_DEF_STMT (oe->op);
1673 if (!is_gimple_assign (def2))
1674 continue;
1675 rcode = gimple_assign_rhs_code (def2);
1676 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1677 continue;
1679 /* If we got here, we have a match. See if we can combine the
1680 two comparisons. */
1681 if (opcode == BIT_IOR_EXPR)
1682 t = maybe_fold_or_comparisons (lcode, op1, op2,
1683 rcode, gimple_assign_rhs1 (def2),
1684 gimple_assign_rhs2 (def2));
1685 else
1686 t = maybe_fold_and_comparisons (lcode, op1, op2,
1687 rcode, gimple_assign_rhs1 (def2),
1688 gimple_assign_rhs2 (def2));
1689 if (!t)
1690 continue;
1692 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1693 always give us a boolean_type_node value back. If the original
1694 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1695 we need to convert. */
1696 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1697 t = fold_convert (TREE_TYPE (curr->op), t);
1699 if (TREE_CODE (t) != INTEGER_CST
1700 && !operand_equal_p (t, curr->op, 0))
1702 enum tree_code subcode;
1703 tree newop1, newop2;
1704 if (!COMPARISON_CLASS_P (t))
1705 continue;
1706 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1707 STRIP_USELESS_TYPE_CONVERSION (newop1);
1708 STRIP_USELESS_TYPE_CONVERSION (newop2);
1709 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1710 continue;
1713 if (dump_file && (dump_flags & TDF_DETAILS))
1715 fprintf (dump_file, "Equivalence: ");
1716 print_generic_expr (dump_file, curr->op, 0);
1717 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1718 print_generic_expr (dump_file, oe->op, 0);
1719 fprintf (dump_file, " -> ");
1720 print_generic_expr (dump_file, t, 0);
1721 fprintf (dump_file, "\n");
1724 /* Now we can delete oe, as it has been subsumed by the new combined
1725 expression t. */
1726 ops->ordered_remove (i);
1727 reassociate_stats.ops_eliminated ++;
1729 /* If t is the same as curr->op, we're done. Otherwise we must
1730 replace curr->op with t. Special case is if we got a constant
1731 back, in which case we add it to the end instead of in place of
1732 the current entry. */
1733 if (TREE_CODE (t) == INTEGER_CST)
1735 ops->ordered_remove (currindex);
1736 add_to_ops_vec (ops, t);
1738 else if (!operand_equal_p (t, curr->op, 0))
1740 gimple *sum;
1741 enum tree_code subcode;
1742 tree newop1;
1743 tree newop2;
1744 gcc_assert (COMPARISON_CLASS_P (t));
1745 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1746 STRIP_USELESS_TYPE_CONVERSION (newop1);
1747 STRIP_USELESS_TYPE_CONVERSION (newop2);
1748 gcc_checking_assert (is_gimple_val (newop1)
1749 && is_gimple_val (newop2));
1750 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1751 curr->op = gimple_get_lhs (sum);
1753 return true;
1756 return false;
1759 /* Transform repeated addition of same values into multiply with
1760 constant. */
1761 static bool
1762 transform_add_to_multiply (gimple *stmt, vec<operand_entry *> *ops)
1764 operand_entry *oe;
1765 tree op = NULL_TREE;
1766 int j;
1767 int i, start = -1, end = 0, count = 0;
1768 vec<std::pair <int, int> > indxs = vNULL;
1769 bool changed = false;
1771 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1772 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1773 || !flag_unsafe_math_optimizations))
1774 return false;
1776 /* Look for repeated operands. */
1777 FOR_EACH_VEC_ELT (*ops, i, oe)
1779 if (start == -1)
1781 count = 1;
1782 op = oe->op;
1783 start = i;
1785 else if (operand_equal_p (oe->op, op, 0))
1787 count++;
1788 end = i;
1790 else
1792 if (count > 1)
1793 indxs.safe_push (std::make_pair (start, end));
1794 count = 1;
1795 op = oe->op;
1796 start = i;
1800 if (count > 1)
1801 indxs.safe_push (std::make_pair (start, end));
1803 for (j = indxs.length () - 1; j >= 0; --j)
1805 /* Convert repeated operand addition to multiplication. */
1806 start = indxs[j].first;
1807 end = indxs[j].second;
1808 op = (*ops)[start]->op;
1809 count = end - start + 1;
1810 for (i = end; i >= start; --i)
1811 ops->unordered_remove (i);
1812 tree tmp = make_ssa_name (TREE_TYPE (op));
1813 tree cst = build_int_cst (integer_type_node, count);
1814 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1815 gassign *mul_stmt
1816 = gimple_build_assign (tmp, MULT_EXPR,
1817 op, fold_convert (TREE_TYPE (op), cst));
1818 if (gimple_code (def_stmt) == GIMPLE_NOP
1819 || gimple_bb (stmt) != gimple_bb (def_stmt))
1821 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1822 gimple_set_uid (mul_stmt, gimple_uid (stmt));
1823 gsi_insert_before (&gsi, mul_stmt, GSI_NEW_STMT);
1825 else
1826 insert_stmt_after (mul_stmt, def_stmt);
1827 gimple_set_visited (mul_stmt, true);
1828 add_to_ops_vec (ops, tmp);
1829 changed = true;
1832 return changed;
1836 /* Perform various identities and other optimizations on the list of
1837 operand entries, stored in OPS. The tree code for the binary
1838 operation between all the operands is OPCODE. */
1840 static void
1841 optimize_ops_list (enum tree_code opcode,
1842 vec<operand_entry *> *ops)
1844 unsigned int length = ops->length ();
1845 unsigned int i;
1846 operand_entry *oe;
1847 operand_entry *oelast = NULL;
1848 bool iterate = false;
1850 if (length == 1)
1851 return;
1853 oelast = ops->last ();
1855 /* If the last two are constants, pop the constants off, merge them
1856 and try the next two. */
1857 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1859 operand_entry *oelm1 = (*ops)[length - 2];
1861 if (oelm1->rank == 0
1862 && is_gimple_min_invariant (oelm1->op)
1863 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1864 TREE_TYPE (oelast->op)))
1866 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1867 oelm1->op, oelast->op);
1869 if (folded && is_gimple_min_invariant (folded))
1871 if (dump_file && (dump_flags & TDF_DETAILS))
1872 fprintf (dump_file, "Merging constants\n");
1874 ops->pop ();
1875 ops->pop ();
1877 add_to_ops_vec (ops, folded);
1878 reassociate_stats.constants_eliminated++;
1880 optimize_ops_list (opcode, ops);
1881 return;
1886 eliminate_using_constants (opcode, ops);
1887 oelast = NULL;
1889 for (i = 0; ops->iterate (i, &oe);)
1891 bool done = false;
1893 if (eliminate_not_pairs (opcode, ops, i, oe))
1894 return;
1895 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1896 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1897 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1899 if (done)
1900 return;
1901 iterate = true;
1902 oelast = NULL;
1903 continue;
1905 oelast = oe;
1906 i++;
1909 length = ops->length ();
1910 oelast = ops->last ();
1912 if (iterate)
1913 optimize_ops_list (opcode, ops);
1916 /* The following functions are subroutines to optimize_range_tests and allow
1917 it to try to change a logical combination of comparisons into a range
1918 test.
1920 For example, both
1921 X == 2 || X == 5 || X == 3 || X == 4
1923 X >= 2 && X <= 5
1924 are converted to
1925 (unsigned) (X - 2) <= 3
1927 For more information see comments above fold_test_range in fold-const.c,
1928 this implementation is for GIMPLE. */
1930 struct range_entry
1932 tree exp;
1933 tree low;
1934 tree high;
1935 bool in_p;
1936 bool strict_overflow_p;
1937 unsigned int idx, next;
1940 /* This is similar to make_range in fold-const.c, but on top of
1941 GIMPLE instead of trees. If EXP is non-NULL, it should be
1942 an SSA_NAME and STMT argument is ignored, otherwise STMT
1943 argument should be a GIMPLE_COND. */
1945 static void
1946 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
1948 int in_p;
1949 tree low, high;
1950 bool is_bool, strict_overflow_p;
1952 r->exp = NULL_TREE;
1953 r->in_p = false;
1954 r->strict_overflow_p = false;
1955 r->low = NULL_TREE;
1956 r->high = NULL_TREE;
1957 if (exp != NULL_TREE
1958 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1959 return;
1961 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1962 and see if we can refine the range. Some of the cases below may not
1963 happen, but it doesn't seem worth worrying about this. We "continue"
1964 the outer loop when we've changed something; otherwise we "break"
1965 the switch, which will "break" the while. */
1966 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1967 high = low;
1968 in_p = 0;
1969 strict_overflow_p = false;
1970 is_bool = false;
1971 if (exp == NULL_TREE)
1972 is_bool = true;
1973 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1975 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1976 is_bool = true;
1977 else
1978 return;
1980 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1981 is_bool = true;
1983 while (1)
1985 enum tree_code code;
1986 tree arg0, arg1, exp_type;
1987 tree nexp;
1988 location_t loc;
1990 if (exp != NULL_TREE)
1992 if (TREE_CODE (exp) != SSA_NAME
1993 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1994 break;
1996 stmt = SSA_NAME_DEF_STMT (exp);
1997 if (!is_gimple_assign (stmt))
1998 break;
2000 code = gimple_assign_rhs_code (stmt);
2001 arg0 = gimple_assign_rhs1 (stmt);
2002 arg1 = gimple_assign_rhs2 (stmt);
2003 exp_type = TREE_TYPE (exp);
2005 else
2007 code = gimple_cond_code (stmt);
2008 arg0 = gimple_cond_lhs (stmt);
2009 arg1 = gimple_cond_rhs (stmt);
2010 exp_type = boolean_type_node;
2013 if (TREE_CODE (arg0) != SSA_NAME)
2014 break;
2015 loc = gimple_location (stmt);
2016 switch (code)
2018 case BIT_NOT_EXPR:
2019 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2020 /* Ensure the range is either +[-,0], +[0,0],
2021 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2022 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2023 or similar expression of unconditional true or
2024 false, it should not be negated. */
2025 && ((high && integer_zerop (high))
2026 || (low && integer_onep (low))))
2028 in_p = !in_p;
2029 exp = arg0;
2030 continue;
2032 break;
2033 case SSA_NAME:
2034 exp = arg0;
2035 continue;
2036 CASE_CONVERT:
2037 if (is_bool)
2038 goto do_default;
2039 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2041 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2042 is_bool = true;
2043 else
2044 return;
2046 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2047 is_bool = true;
2048 goto do_default;
2049 case EQ_EXPR:
2050 case NE_EXPR:
2051 case LT_EXPR:
2052 case LE_EXPR:
2053 case GE_EXPR:
2054 case GT_EXPR:
2055 is_bool = true;
2056 /* FALLTHRU */
2057 default:
2058 if (!is_bool)
2059 return;
2060 do_default:
2061 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2062 &low, &high, &in_p,
2063 &strict_overflow_p);
2064 if (nexp != NULL_TREE)
2066 exp = nexp;
2067 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2068 continue;
2070 break;
2072 break;
2074 if (is_bool)
2076 r->exp = exp;
2077 r->in_p = in_p;
2078 r->low = low;
2079 r->high = high;
2080 r->strict_overflow_p = strict_overflow_p;
2084 /* Comparison function for qsort. Sort entries
2085 without SSA_NAME exp first, then with SSA_NAMEs sorted
2086 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2087 by increasing ->low and if ->low is the same, by increasing
2088 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2089 maximum. */
2091 static int
2092 range_entry_cmp (const void *a, const void *b)
2094 const struct range_entry *p = (const struct range_entry *) a;
2095 const struct range_entry *q = (const struct range_entry *) b;
2097 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2099 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2101 /* Group range_entries for the same SSA_NAME together. */
2102 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2103 return -1;
2104 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2105 return 1;
2106 /* If ->low is different, NULL low goes first, then by
2107 ascending low. */
2108 if (p->low != NULL_TREE)
2110 if (q->low != NULL_TREE)
2112 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2113 p->low, q->low);
2114 if (tem && integer_onep (tem))
2115 return -1;
2116 tem = fold_binary (GT_EXPR, boolean_type_node,
2117 p->low, q->low);
2118 if (tem && integer_onep (tem))
2119 return 1;
2121 else
2122 return 1;
2124 else if (q->low != NULL_TREE)
2125 return -1;
2126 /* If ->high is different, NULL high goes last, before that by
2127 ascending high. */
2128 if (p->high != NULL_TREE)
2130 if (q->high != NULL_TREE)
2132 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2133 p->high, q->high);
2134 if (tem && integer_onep (tem))
2135 return -1;
2136 tem = fold_binary (GT_EXPR, boolean_type_node,
2137 p->high, q->high);
2138 if (tem && integer_onep (tem))
2139 return 1;
2141 else
2142 return -1;
2144 else if (q->high != NULL_TREE)
2145 return 1;
2146 /* If both ranges are the same, sort below by ascending idx. */
2148 else
2149 return 1;
2151 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2152 return -1;
2154 if (p->idx < q->idx)
2155 return -1;
2156 else
2158 gcc_checking_assert (p->idx > q->idx);
2159 return 1;
2163 /* Helper routine of optimize_range_test.
2164 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2165 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2166 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2167 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2168 an array of COUNT pointers to other ranges. Return
2169 true if the range merge has been successful.
2170 If OPCODE is ERROR_MARK, this is called from within
2171 maybe_optimize_range_tests and is performing inter-bb range optimization.
2172 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2173 oe->rank. */
2175 static bool
2176 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2177 struct range_entry **otherrangep,
2178 unsigned int count, enum tree_code opcode,
2179 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2180 bool in_p, tree low, tree high, bool strict_overflow_p)
2182 operand_entry *oe = (*ops)[range->idx];
2183 tree op = oe->op;
2184 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2185 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2186 location_t loc = gimple_location (stmt);
2187 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2188 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2189 in_p, low, high);
2190 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2191 gimple_stmt_iterator gsi;
2192 unsigned int i, uid;
2194 if (tem == NULL_TREE)
2195 return false;
2197 /* If op is default def SSA_NAME, there is no place to insert the
2198 new comparison. Give up, unless we can use OP itself as the
2199 range test. */
2200 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2202 if (op == range->exp
2203 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2204 || TREE_CODE (optype) == BOOLEAN_TYPE)
2205 && (op == tem
2206 || (TREE_CODE (tem) == EQ_EXPR
2207 && TREE_OPERAND (tem, 0) == op
2208 && integer_onep (TREE_OPERAND (tem, 1))))
2209 && opcode != BIT_IOR_EXPR
2210 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2212 stmt = NULL;
2213 tem = op;
2215 else
2216 return false;
2219 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2220 warning_at (loc, OPT_Wstrict_overflow,
2221 "assuming signed overflow does not occur "
2222 "when simplifying range test");
2224 if (dump_file && (dump_flags & TDF_DETAILS))
2226 struct range_entry *r;
2227 fprintf (dump_file, "Optimizing range tests ");
2228 print_generic_expr (dump_file, range->exp, 0);
2229 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2230 print_generic_expr (dump_file, range->low, 0);
2231 fprintf (dump_file, ", ");
2232 print_generic_expr (dump_file, range->high, 0);
2233 fprintf (dump_file, "]");
2234 for (i = 0; i < count; i++)
2236 if (otherrange)
2237 r = otherrange + i;
2238 else
2239 r = otherrangep[i];
2240 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2241 print_generic_expr (dump_file, r->low, 0);
2242 fprintf (dump_file, ", ");
2243 print_generic_expr (dump_file, r->high, 0);
2244 fprintf (dump_file, "]");
2246 fprintf (dump_file, "\n into ");
2247 print_generic_expr (dump_file, tem, 0);
2248 fprintf (dump_file, "\n");
2251 if (opcode == BIT_IOR_EXPR
2252 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2253 tem = invert_truthvalue_loc (loc, tem);
2255 tem = fold_convert_loc (loc, optype, tem);
2256 if (stmt)
2258 gsi = gsi_for_stmt (stmt);
2259 uid = gimple_uid (stmt);
2261 else
2263 gsi = gsi_none ();
2264 uid = 0;
2266 if (stmt == NULL)
2267 gcc_checking_assert (tem == op);
2268 /* In rare cases range->exp can be equal to lhs of stmt.
2269 In that case we have to insert after the stmt rather then before
2270 it. If stmt is a PHI, insert it at the start of the basic block. */
2271 else if (op != range->exp)
2273 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2274 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2275 GSI_SAME_STMT);
2276 gsi_prev (&gsi);
2278 else if (gimple_code (stmt) != GIMPLE_PHI)
2280 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2281 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2282 GSI_CONTINUE_LINKING);
2284 else
2286 gsi = gsi_after_labels (gimple_bb (stmt));
2287 if (!gsi_end_p (gsi))
2288 uid = gimple_uid (gsi_stmt (gsi));
2289 else
2291 gsi = gsi_start_bb (gimple_bb (stmt));
2292 uid = 1;
2293 while (!gsi_end_p (gsi))
2295 uid = gimple_uid (gsi_stmt (gsi));
2296 gsi_next (&gsi);
2299 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2300 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2301 GSI_SAME_STMT);
2302 if (gsi_end_p (gsi))
2303 gsi = gsi_last_bb (gimple_bb (stmt));
2304 else
2305 gsi_prev (&gsi);
2307 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2308 if (gimple_uid (gsi_stmt (gsi)))
2309 break;
2310 else
2311 gimple_set_uid (gsi_stmt (gsi), uid);
2313 oe->op = tem;
2314 range->exp = exp;
2315 range->low = low;
2316 range->high = high;
2317 range->in_p = in_p;
2318 range->strict_overflow_p = false;
2320 for (i = 0; i < count; i++)
2322 if (otherrange)
2323 range = otherrange + i;
2324 else
2325 range = otherrangep[i];
2326 oe = (*ops)[range->idx];
2327 /* Now change all the other range test immediate uses, so that
2328 those tests will be optimized away. */
2329 if (opcode == ERROR_MARK)
2331 if (oe->op)
2332 oe->op = build_int_cst (TREE_TYPE (oe->op),
2333 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2334 else
2335 oe->op = (oe->rank == BIT_IOR_EXPR
2336 ? boolean_false_node : boolean_true_node);
2338 else
2339 oe->op = error_mark_node;
2340 range->exp = NULL_TREE;
2342 return true;
2345 /* Optimize X == CST1 || X == CST2
2346 if popcount (CST1 ^ CST2) == 1 into
2347 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2348 Similarly for ranges. E.g.
2349 X != 2 && X != 3 && X != 10 && X != 11
2350 will be transformed by the previous optimization into
2351 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2352 and this loop can transform that into
2353 !(((X & ~8) - 2U) <= 1U). */
2355 static bool
2356 optimize_range_tests_xor (enum tree_code opcode, tree type,
2357 tree lowi, tree lowj, tree highi, tree highj,
2358 vec<operand_entry *> *ops,
2359 struct range_entry *rangei,
2360 struct range_entry *rangej)
2362 tree lowxor, highxor, tem, exp;
2363 /* Check lowi ^ lowj == highi ^ highj and
2364 popcount (lowi ^ lowj) == 1. */
2365 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2366 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2367 return false;
2368 if (!integer_pow2p (lowxor))
2369 return false;
2370 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2371 if (!tree_int_cst_equal (lowxor, highxor))
2372 return false;
2374 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2375 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2376 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2377 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2378 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2379 NULL, rangei->in_p, lowj, highj,
2380 rangei->strict_overflow_p
2381 || rangej->strict_overflow_p))
2382 return true;
2383 return false;
2386 /* Optimize X == CST1 || X == CST2
2387 if popcount (CST2 - CST1) == 1 into
2388 ((X - CST1) & ~(CST2 - CST1)) == 0.
2389 Similarly for ranges. E.g.
2390 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2391 || X == 75 || X == 45
2392 will be transformed by the previous optimization into
2393 (X - 43U) <= 3U || (X - 75U) <= 3U
2394 and this loop can transform that into
2395 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2396 static bool
2397 optimize_range_tests_diff (enum tree_code opcode, tree type,
2398 tree lowi, tree lowj, tree highi, tree highj,
2399 vec<operand_entry *> *ops,
2400 struct range_entry *rangei,
2401 struct range_entry *rangej)
2403 tree tem1, tem2, mask;
2404 /* Check highi - lowi == highj - lowj. */
2405 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2406 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2407 return false;
2408 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2409 if (!tree_int_cst_equal (tem1, tem2))
2410 return false;
2411 /* Check popcount (lowj - lowi) == 1. */
2412 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2413 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2414 return false;
2415 if (!integer_pow2p (tem1))
2416 return false;
2418 type = unsigned_type_for (type);
2419 tem1 = fold_convert (type, tem1);
2420 tem2 = fold_convert (type, tem2);
2421 lowi = fold_convert (type, lowi);
2422 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2423 tem1 = fold_binary (MINUS_EXPR, type,
2424 fold_convert (type, rangei->exp), lowi);
2425 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2426 lowj = build_int_cst (type, 0);
2427 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2428 NULL, rangei->in_p, lowj, tem2,
2429 rangei->strict_overflow_p
2430 || rangej->strict_overflow_p))
2431 return true;
2432 return false;
2435 /* It does some common checks for function optimize_range_tests_xor and
2436 optimize_range_tests_diff.
2437 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2438 Else it calls optimize_range_tests_diff. */
2440 static bool
2441 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2442 bool optimize_xor, vec<operand_entry *> *ops,
2443 struct range_entry *ranges)
2445 int i, j;
2446 bool any_changes = false;
2447 for (i = first; i < length; i++)
2449 tree lowi, highi, lowj, highj, type, tem;
2451 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2452 continue;
2453 type = TREE_TYPE (ranges[i].exp);
2454 if (!INTEGRAL_TYPE_P (type))
2455 continue;
2456 lowi = ranges[i].low;
2457 if (lowi == NULL_TREE)
2458 lowi = TYPE_MIN_VALUE (type);
2459 highi = ranges[i].high;
2460 if (highi == NULL_TREE)
2461 continue;
2462 for (j = i + 1; j < length && j < i + 64; j++)
2464 bool changes;
2465 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2466 continue;
2467 lowj = ranges[j].low;
2468 if (lowj == NULL_TREE)
2469 continue;
2470 highj = ranges[j].high;
2471 if (highj == NULL_TREE)
2472 highj = TYPE_MAX_VALUE (type);
2473 /* Check lowj > highi. */
2474 tem = fold_binary (GT_EXPR, boolean_type_node,
2475 lowj, highi);
2476 if (tem == NULL_TREE || !integer_onep (tem))
2477 continue;
2478 if (optimize_xor)
2479 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2480 highi, highj, ops,
2481 ranges + i, ranges + j);
2482 else
2483 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2484 highi, highj, ops,
2485 ranges + i, ranges + j);
2486 if (changes)
2488 any_changes = true;
2489 break;
2493 return any_changes;
2496 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2497 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2498 EXP on success, NULL otherwise. */
2500 static tree
2501 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2502 wide_int *mask, tree *totallowp)
2504 tree tem = int_const_binop (MINUS_EXPR, high, low);
2505 if (tem == NULL_TREE
2506 || TREE_CODE (tem) != INTEGER_CST
2507 || TREE_OVERFLOW (tem)
2508 || tree_int_cst_sgn (tem) == -1
2509 || compare_tree_int (tem, prec) != -1)
2510 return NULL_TREE;
2512 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2513 *mask = wi::shifted_mask (0, max, false, prec);
2514 if (TREE_CODE (exp) == BIT_AND_EXPR
2515 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2517 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2518 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2519 if (wi::popcount (msk) == 1
2520 && wi::ltu_p (msk, prec - max))
2522 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2523 max += msk.to_uhwi ();
2524 exp = TREE_OPERAND (exp, 0);
2525 if (integer_zerop (low)
2526 && TREE_CODE (exp) == PLUS_EXPR
2527 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2529 tree ret = TREE_OPERAND (exp, 0);
2530 STRIP_NOPS (ret);
2531 widest_int bias
2532 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2533 TYPE_PRECISION (TREE_TYPE (low))));
2534 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2535 if (totallowp)
2537 *totallowp = tbias;
2538 return ret;
2540 else if (!tree_int_cst_lt (totallow, tbias))
2541 return NULL_TREE;
2542 bias = wi::to_widest (tbias);
2543 bias -= wi::to_widest (totallow);
2544 if (bias >= 0 && bias < prec - max)
2546 *mask = wi::lshift (*mask, bias);
2547 return ret;
2552 if (totallowp)
2553 return exp;
2554 if (!tree_int_cst_lt (totallow, low))
2555 return exp;
2556 tem = int_const_binop (MINUS_EXPR, low, totallow);
2557 if (tem == NULL_TREE
2558 || TREE_CODE (tem) != INTEGER_CST
2559 || TREE_OVERFLOW (tem)
2560 || compare_tree_int (tem, prec - max) == 1)
2561 return NULL_TREE;
2563 *mask = wi::lshift (*mask, wi::to_widest (tem));
2564 return exp;
2567 /* Attempt to optimize small range tests using bit test.
2568 E.g.
2569 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2570 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2571 has been by earlier optimizations optimized into:
2572 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2573 As all the 43 through 82 range is less than 64 numbers,
2574 for 64-bit word targets optimize that into:
2575 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2577 static bool
2578 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2579 vec<operand_entry *> *ops,
2580 struct range_entry *ranges)
2582 int i, j;
2583 bool any_changes = false;
2584 int prec = GET_MODE_BITSIZE (word_mode);
2585 auto_vec<struct range_entry *, 64> candidates;
2587 for (i = first; i < length - 2; i++)
2589 tree lowi, highi, lowj, highj, type;
2591 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2592 continue;
2593 type = TREE_TYPE (ranges[i].exp);
2594 if (!INTEGRAL_TYPE_P (type))
2595 continue;
2596 lowi = ranges[i].low;
2597 if (lowi == NULL_TREE)
2598 lowi = TYPE_MIN_VALUE (type);
2599 highi = ranges[i].high;
2600 if (highi == NULL_TREE)
2601 continue;
2602 wide_int mask;
2603 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2604 highi, &mask, &lowi);
2605 if (exp == NULL_TREE)
2606 continue;
2607 bool strict_overflow_p = ranges[i].strict_overflow_p;
2608 candidates.truncate (0);
2609 int end = MIN (i + 64, length);
2610 for (j = i + 1; j < end; j++)
2612 tree exp2;
2613 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2614 continue;
2615 if (ranges[j].exp == exp)
2617 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2619 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2620 if (exp2 == exp)
2622 else if (TREE_CODE (exp2) == PLUS_EXPR)
2624 exp2 = TREE_OPERAND (exp2, 0);
2625 STRIP_NOPS (exp2);
2626 if (exp2 != exp)
2627 continue;
2629 else
2630 continue;
2632 else
2633 continue;
2634 lowj = ranges[j].low;
2635 if (lowj == NULL_TREE)
2636 continue;
2637 highj = ranges[j].high;
2638 if (highj == NULL_TREE)
2639 highj = TYPE_MAX_VALUE (type);
2640 wide_int mask2;
2641 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2642 highj, &mask2, NULL);
2643 if (exp2 != exp)
2644 continue;
2645 mask |= mask2;
2646 strict_overflow_p |= ranges[j].strict_overflow_p;
2647 candidates.safe_push (&ranges[j]);
2650 /* If we need otherwise 3 or more comparisons, use a bit test. */
2651 if (candidates.length () >= 2)
2653 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2654 wi::to_widest (lowi)
2655 + prec - 1 - wi::clz (mask));
2656 operand_entry *oe = (*ops)[ranges[i].idx];
2657 tree op = oe->op;
2658 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2659 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2660 location_t loc = gimple_location (stmt);
2661 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2663 /* See if it isn't cheaper to pretend the minimum value of the
2664 range is 0, if maximum value is small enough.
2665 We can avoid then subtraction of the minimum value, but the
2666 mask constant could be perhaps more expensive. */
2667 if (compare_tree_int (lowi, 0) > 0
2668 && compare_tree_int (high, prec) < 0)
2670 int cost_diff;
2671 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2672 rtx reg = gen_raw_REG (word_mode, 10000);
2673 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2674 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2675 GEN_INT (-m)), speed_p);
2676 rtx r = immed_wide_int_const (mask, word_mode);
2677 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2678 word_mode, speed_p);
2679 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2680 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2681 word_mode, speed_p);
2682 if (cost_diff > 0)
2684 mask = wi::lshift (mask, m);
2685 lowi = build_zero_cst (TREE_TYPE (lowi));
2689 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2690 false, lowi, high);
2691 if (tem == NULL_TREE || is_gimple_val (tem))
2692 continue;
2693 tree etype = unsigned_type_for (TREE_TYPE (exp));
2694 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2695 fold_convert_loc (loc, etype, exp),
2696 fold_convert_loc (loc, etype, lowi));
2697 exp = fold_convert_loc (loc, integer_type_node, exp);
2698 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2699 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2700 build_int_cst (word_type, 1), exp);
2701 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2702 wide_int_to_tree (word_type, mask));
2703 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2704 build_zero_cst (word_type));
2705 if (is_gimple_val (exp))
2706 continue;
2708 /* The shift might have undefined behavior if TEM is true,
2709 but reassociate_bb isn't prepared to have basic blocks
2710 split when it is running. So, temporarily emit a code
2711 with BIT_IOR_EXPR instead of &&, and fix it up in
2712 branch_fixup. */
2713 gimple_seq seq;
2714 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2715 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2716 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2717 gimple_seq seq2;
2718 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2719 gimple_seq_add_seq_without_update (&seq, seq2);
2720 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2721 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2722 gimple *g = gimple_build_assign (make_ssa_name (optype),
2723 BIT_IOR_EXPR, tem, exp);
2724 gimple_set_location (g, loc);
2725 gimple_seq_add_stmt_without_update (&seq, g);
2726 exp = gimple_assign_lhs (g);
2727 tree val = build_zero_cst (optype);
2728 if (update_range_test (&ranges[i], NULL, candidates.address (),
2729 candidates.length (), opcode, ops, exp,
2730 seq, false, val, val, strict_overflow_p))
2732 any_changes = true;
2733 reassoc_branch_fixups.safe_push (tem);
2735 else
2736 gimple_seq_discard (seq);
2739 return any_changes;
2742 /* Optimize range tests, similarly how fold_range_test optimizes
2743 it on trees. The tree code for the binary
2744 operation between all the operands is OPCODE.
2745 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2746 maybe_optimize_range_tests for inter-bb range optimization.
2747 In that case if oe->op is NULL, oe->id is bb->index whose
2748 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2749 the actual opcode. */
2751 static bool
2752 optimize_range_tests (enum tree_code opcode,
2753 vec<operand_entry *> *ops)
2755 unsigned int length = ops->length (), i, j, first;
2756 operand_entry *oe;
2757 struct range_entry *ranges;
2758 bool any_changes = false;
2760 if (length == 1)
2761 return false;
2763 ranges = XNEWVEC (struct range_entry, length);
2764 for (i = 0; i < length; i++)
2766 oe = (*ops)[i];
2767 ranges[i].idx = i;
2768 init_range_entry (ranges + i, oe->op,
2769 oe->op
2770 ? NULL
2771 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2772 /* For | invert it now, we will invert it again before emitting
2773 the optimized expression. */
2774 if (opcode == BIT_IOR_EXPR
2775 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2776 ranges[i].in_p = !ranges[i].in_p;
2779 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2780 for (i = 0; i < length; i++)
2781 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2782 break;
2784 /* Try to merge ranges. */
2785 for (first = i; i < length; i++)
2787 tree low = ranges[i].low;
2788 tree high = ranges[i].high;
2789 int in_p = ranges[i].in_p;
2790 bool strict_overflow_p = ranges[i].strict_overflow_p;
2791 int update_fail_count = 0;
2793 for (j = i + 1; j < length; j++)
2795 if (ranges[i].exp != ranges[j].exp)
2796 break;
2797 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2798 ranges[j].in_p, ranges[j].low, ranges[j].high))
2799 break;
2800 strict_overflow_p |= ranges[j].strict_overflow_p;
2803 if (j == i + 1)
2804 continue;
2806 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2807 opcode, ops, ranges[i].exp, NULL, in_p,
2808 low, high, strict_overflow_p))
2810 i = j - 1;
2811 any_changes = true;
2813 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2814 while update_range_test would fail. */
2815 else if (update_fail_count == 64)
2816 i = j - 1;
2817 else
2818 ++update_fail_count;
2821 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2822 ops, ranges);
2824 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2825 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2826 ops, ranges);
2827 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2828 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2829 ops, ranges);
2831 if (any_changes && opcode != ERROR_MARK)
2833 j = 0;
2834 FOR_EACH_VEC_ELT (*ops, i, oe)
2836 if (oe->op == error_mark_node)
2837 continue;
2838 else if (i != j)
2839 (*ops)[j] = oe;
2840 j++;
2842 ops->truncate (j);
2845 XDELETEVEC (ranges);
2846 return any_changes;
2849 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
2850 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
2851 otherwise the comparison code. */
2853 static tree_code
2854 ovce_extract_ops (tree var, gassign **rets, bool *reti)
2856 if (TREE_CODE (var) != SSA_NAME)
2857 return ERROR_MARK;
2859 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
2860 if (stmt == NULL)
2861 return ERROR_MARK;
2863 /* ??? If we start creating more COND_EXPR, we could perform
2864 this same optimization with them. For now, simplify. */
2865 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
2866 return ERROR_MARK;
2868 tree cond = gimple_assign_rhs1 (stmt);
2869 tree_code cmp = TREE_CODE (cond);
2870 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
2871 return ERROR_MARK;
2873 /* ??? For now, allow only canonical true and false result vectors.
2874 We could expand this to other constants should the need arise,
2875 but at the moment we don't create them. */
2876 tree t = gimple_assign_rhs2 (stmt);
2877 tree f = gimple_assign_rhs3 (stmt);
2878 bool inv;
2879 if (integer_all_onesp (t))
2880 inv = false;
2881 else if (integer_all_onesp (f))
2883 cmp = invert_tree_comparison (cmp, false);
2884 inv = true;
2886 else
2887 return ERROR_MARK;
2888 if (!integer_zerop (f))
2889 return ERROR_MARK;
2891 /* Success! */
2892 if (rets)
2893 *rets = stmt;
2894 if (reti)
2895 *reti = inv;
2896 return cmp;
2899 /* Optimize the condition of VEC_COND_EXPRs which have been combined
2900 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
2902 static bool
2903 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
2905 unsigned int length = ops->length (), i, j;
2906 bool any_changes = false;
2908 if (length == 1)
2909 return false;
2911 for (i = 0; i < length; ++i)
2913 tree elt0 = (*ops)[i]->op;
2915 gassign *stmt0;
2916 bool invert;
2917 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
2918 if (cmp0 == ERROR_MARK)
2919 continue;
2921 for (j = i + 1; j < length; ++j)
2923 tree &elt1 = (*ops)[j]->op;
2925 gassign *stmt1;
2926 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
2927 if (cmp1 == ERROR_MARK)
2928 continue;
2930 tree cond0 = gimple_assign_rhs1 (stmt0);
2931 tree x0 = TREE_OPERAND (cond0, 0);
2932 tree y0 = TREE_OPERAND (cond0, 1);
2934 tree cond1 = gimple_assign_rhs1 (stmt1);
2935 tree x1 = TREE_OPERAND (cond1, 0);
2936 tree y1 = TREE_OPERAND (cond1, 1);
2938 tree comb;
2939 if (opcode == BIT_AND_EXPR)
2940 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2941 else if (opcode == BIT_IOR_EXPR)
2942 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2943 else
2944 gcc_unreachable ();
2945 if (comb == NULL)
2946 continue;
2948 /* Success! */
2949 if (dump_file && (dump_flags & TDF_DETAILS))
2951 fprintf (dump_file, "Transforming ");
2952 print_generic_expr (dump_file, cond0, 0);
2953 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
2954 print_generic_expr (dump_file, cond1, 0);
2955 fprintf (dump_file, " into ");
2956 print_generic_expr (dump_file, comb, 0);
2957 fputc ('\n', dump_file);
2960 gimple_assign_set_rhs1 (stmt0, comb);
2961 if (invert)
2962 std::swap (*gimple_assign_rhs2_ptr (stmt0),
2963 *gimple_assign_rhs3_ptr (stmt0));
2964 update_stmt (stmt0);
2966 elt1 = error_mark_node;
2967 any_changes = true;
2971 if (any_changes)
2973 operand_entry *oe;
2974 j = 0;
2975 FOR_EACH_VEC_ELT (*ops, i, oe)
2977 if (oe->op == error_mark_node)
2978 continue;
2979 else if (i != j)
2980 (*ops)[j] = oe;
2981 j++;
2983 ops->truncate (j);
2986 return any_changes;
2989 /* Return true if STMT is a cast like:
2990 <bb N>:
2992 _123 = (int) _234;
2994 <bb M>:
2995 # _345 = PHI <_123(N), 1(...), 1(...)>
2996 where _234 has bool type, _123 has single use and
2997 bb N has a single successor M. This is commonly used in
2998 the last block of a range test. */
3000 static bool
3001 final_range_test_p (gimple *stmt)
3003 basic_block bb, rhs_bb;
3004 edge e;
3005 tree lhs, rhs;
3006 use_operand_p use_p;
3007 gimple *use_stmt;
3009 if (!gimple_assign_cast_p (stmt))
3010 return false;
3011 bb = gimple_bb (stmt);
3012 if (!single_succ_p (bb))
3013 return false;
3014 e = single_succ_edge (bb);
3015 if (e->flags & EDGE_COMPLEX)
3016 return false;
3018 lhs = gimple_assign_lhs (stmt);
3019 rhs = gimple_assign_rhs1 (stmt);
3020 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3021 || TREE_CODE (rhs) != SSA_NAME
3022 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
3023 return false;
3025 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3026 if (!single_imm_use (lhs, &use_p, &use_stmt))
3027 return false;
3029 if (gimple_code (use_stmt) != GIMPLE_PHI
3030 || gimple_bb (use_stmt) != e->dest)
3031 return false;
3033 /* And that the rhs is defined in the same loop. */
3034 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
3035 if (rhs_bb == NULL
3036 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3037 return false;
3039 return true;
3042 /* Return true if BB is suitable basic block for inter-bb range test
3043 optimization. If BACKWARD is true, BB should be the only predecessor
3044 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3045 or compared with to find a common basic block to which all conditions
3046 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3047 be the only predecessor of BB. */
3049 static bool
3050 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3051 bool backward)
3053 edge_iterator ei, ei2;
3054 edge e, e2;
3055 gimple *stmt;
3056 gphi_iterator gsi;
3057 bool other_edge_seen = false;
3058 bool is_cond;
3060 if (test_bb == bb)
3061 return false;
3062 /* Check last stmt first. */
3063 stmt = last_stmt (bb);
3064 if (stmt == NULL
3065 || (gimple_code (stmt) != GIMPLE_COND
3066 && (backward || !final_range_test_p (stmt)))
3067 || gimple_visited_p (stmt)
3068 || stmt_could_throw_p (stmt)
3069 || *other_bb == bb)
3070 return false;
3071 is_cond = gimple_code (stmt) == GIMPLE_COND;
3072 if (is_cond)
3074 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3075 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3076 to *OTHER_BB (if not set yet, try to find it out). */
3077 if (EDGE_COUNT (bb->succs) != 2)
3078 return false;
3079 FOR_EACH_EDGE (e, ei, bb->succs)
3081 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3082 return false;
3083 if (e->dest == test_bb)
3085 if (backward)
3086 continue;
3087 else
3088 return false;
3090 if (e->dest == bb)
3091 return false;
3092 if (*other_bb == NULL)
3094 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3095 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3096 return false;
3097 else if (e->dest == e2->dest)
3098 *other_bb = e->dest;
3099 if (*other_bb == NULL)
3100 return false;
3102 if (e->dest == *other_bb)
3103 other_edge_seen = true;
3104 else if (backward)
3105 return false;
3107 if (*other_bb == NULL || !other_edge_seen)
3108 return false;
3110 else if (single_succ (bb) != *other_bb)
3111 return false;
3113 /* Now check all PHIs of *OTHER_BB. */
3114 e = find_edge (bb, *other_bb);
3115 e2 = find_edge (test_bb, *other_bb);
3116 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3118 gphi *phi = gsi.phi ();
3119 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3120 corresponding to BB and TEST_BB predecessor must be the same. */
3121 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3122 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3124 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3125 one of the PHIs should have the lhs of the last stmt in
3126 that block as PHI arg and that PHI should have 0 or 1
3127 corresponding to it in all other range test basic blocks
3128 considered. */
3129 if (!is_cond)
3131 if (gimple_phi_arg_def (phi, e->dest_idx)
3132 == gimple_assign_lhs (stmt)
3133 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3134 || integer_onep (gimple_phi_arg_def (phi,
3135 e2->dest_idx))))
3136 continue;
3138 else
3140 gimple *test_last = last_stmt (test_bb);
3141 if (gimple_code (test_last) != GIMPLE_COND
3142 && gimple_phi_arg_def (phi, e2->dest_idx)
3143 == gimple_assign_lhs (test_last)
3144 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3145 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3146 continue;
3149 return false;
3152 return true;
3155 /* Return true if BB doesn't have side-effects that would disallow
3156 range test optimization, all SSA_NAMEs set in the bb are consumed
3157 in the bb and there are no PHIs. */
3159 static bool
3160 no_side_effect_bb (basic_block bb)
3162 gimple_stmt_iterator gsi;
3163 gimple *last;
3165 if (!gimple_seq_empty_p (phi_nodes (bb)))
3166 return false;
3167 last = last_stmt (bb);
3168 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3170 gimple *stmt = gsi_stmt (gsi);
3171 tree lhs;
3172 imm_use_iterator imm_iter;
3173 use_operand_p use_p;
3175 if (is_gimple_debug (stmt))
3176 continue;
3177 if (gimple_has_side_effects (stmt))
3178 return false;
3179 if (stmt == last)
3180 return true;
3181 if (!is_gimple_assign (stmt))
3182 return false;
3183 lhs = gimple_assign_lhs (stmt);
3184 if (TREE_CODE (lhs) != SSA_NAME)
3185 return false;
3186 if (gimple_assign_rhs_could_trap_p (stmt))
3187 return false;
3188 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3190 gimple *use_stmt = USE_STMT (use_p);
3191 if (is_gimple_debug (use_stmt))
3192 continue;
3193 if (gimple_bb (use_stmt) != bb)
3194 return false;
3197 return false;
3200 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3201 return true and fill in *OPS recursively. */
3203 static bool
3204 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3205 struct loop *loop)
3207 gimple *stmt = SSA_NAME_DEF_STMT (var);
3208 tree rhs[2];
3209 int i;
3211 if (!is_reassociable_op (stmt, code, loop))
3212 return false;
3214 rhs[0] = gimple_assign_rhs1 (stmt);
3215 rhs[1] = gimple_assign_rhs2 (stmt);
3216 gimple_set_visited (stmt, true);
3217 for (i = 0; i < 2; i++)
3218 if (TREE_CODE (rhs[i]) == SSA_NAME
3219 && !get_ops (rhs[i], code, ops, loop)
3220 && has_single_use (rhs[i]))
3222 operand_entry *oe = operand_entry_pool.allocate ();
3224 oe->op = rhs[i];
3225 oe->rank = code;
3226 oe->id = 0;
3227 oe->count = 1;
3228 ops->safe_push (oe);
3230 return true;
3233 /* Find the ops that were added by get_ops starting from VAR, see if
3234 they were changed during update_range_test and if yes, create new
3235 stmts. */
3237 static tree
3238 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3239 unsigned int *pidx, struct loop *loop)
3241 gimple *stmt = SSA_NAME_DEF_STMT (var);
3242 tree rhs[4];
3243 int i;
3245 if (!is_reassociable_op (stmt, code, loop))
3246 return NULL;
3248 rhs[0] = gimple_assign_rhs1 (stmt);
3249 rhs[1] = gimple_assign_rhs2 (stmt);
3250 rhs[2] = rhs[0];
3251 rhs[3] = rhs[1];
3252 for (i = 0; i < 2; i++)
3253 if (TREE_CODE (rhs[i]) == SSA_NAME)
3255 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3256 if (rhs[2 + i] == NULL_TREE)
3258 if (has_single_use (rhs[i]))
3259 rhs[2 + i] = ops[(*pidx)++]->op;
3260 else
3261 rhs[2 + i] = rhs[i];
3264 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3265 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3267 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3268 var = make_ssa_name (TREE_TYPE (var));
3269 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3270 rhs[2], rhs[3]);
3271 gimple_set_uid (g, gimple_uid (stmt));
3272 gimple_set_visited (g, true);
3273 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3275 return var;
3278 /* Structure to track the initial value passed to get_ops and
3279 the range in the ops vector for each basic block. */
3281 struct inter_bb_range_test_entry
3283 tree op;
3284 unsigned int first_idx, last_idx;
3287 /* Inter-bb range test optimization.
3289 Returns TRUE if a gimple conditional is optimized to a true/false,
3290 otherwise return FALSE.
3292 This indicates to the caller that it should run a CFG cleanup pass
3293 once reassociation is completed. */
3295 static bool
3296 maybe_optimize_range_tests (gimple *stmt)
3298 basic_block first_bb = gimple_bb (stmt);
3299 basic_block last_bb = first_bb;
3300 basic_block other_bb = NULL;
3301 basic_block bb;
3302 edge_iterator ei;
3303 edge e;
3304 auto_vec<operand_entry *> ops;
3305 auto_vec<inter_bb_range_test_entry> bbinfo;
3306 bool any_changes = false;
3307 bool cfg_cleanup_needed = false;
3309 /* Consider only basic blocks that end with GIMPLE_COND or
3310 a cast statement satisfying final_range_test_p. All
3311 but the last bb in the first_bb .. last_bb range
3312 should end with GIMPLE_COND. */
3313 if (gimple_code (stmt) == GIMPLE_COND)
3315 if (EDGE_COUNT (first_bb->succs) != 2)
3316 return cfg_cleanup_needed;
3318 else if (final_range_test_p (stmt))
3319 other_bb = single_succ (first_bb);
3320 else
3321 return cfg_cleanup_needed;
3323 if (stmt_could_throw_p (stmt))
3324 return cfg_cleanup_needed;
3326 /* As relative ordering of post-dominator sons isn't fixed,
3327 maybe_optimize_range_tests can be called first on any
3328 bb in the range we want to optimize. So, start searching
3329 backwards, if first_bb can be set to a predecessor. */
3330 while (single_pred_p (first_bb))
3332 basic_block pred_bb = single_pred (first_bb);
3333 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3334 break;
3335 if (!no_side_effect_bb (first_bb))
3336 break;
3337 first_bb = pred_bb;
3339 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3340 Before starting forward search in last_bb successors, find
3341 out the other_bb. */
3342 if (first_bb == last_bb)
3344 other_bb = NULL;
3345 /* As non-GIMPLE_COND last stmt always terminates the range,
3346 if forward search didn't discover anything, just give up. */
3347 if (gimple_code (stmt) != GIMPLE_COND)
3348 return cfg_cleanup_needed;
3349 /* Look at both successors. Either it ends with a GIMPLE_COND
3350 and satisfies suitable_cond_bb, or ends with a cast and
3351 other_bb is that cast's successor. */
3352 FOR_EACH_EDGE (e, ei, first_bb->succs)
3353 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3354 || e->dest == first_bb)
3355 return cfg_cleanup_needed;
3356 else if (single_pred_p (e->dest))
3358 stmt = last_stmt (e->dest);
3359 if (stmt
3360 && gimple_code (stmt) == GIMPLE_COND
3361 && EDGE_COUNT (e->dest->succs) == 2)
3363 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3364 break;
3365 else
3366 other_bb = NULL;
3368 else if (stmt
3369 && final_range_test_p (stmt)
3370 && find_edge (first_bb, single_succ (e->dest)))
3372 other_bb = single_succ (e->dest);
3373 if (other_bb == first_bb)
3374 other_bb = NULL;
3377 if (other_bb == NULL)
3378 return cfg_cleanup_needed;
3380 /* Now do the forward search, moving last_bb to successor bbs
3381 that aren't other_bb. */
3382 while (EDGE_COUNT (last_bb->succs) == 2)
3384 FOR_EACH_EDGE (e, ei, last_bb->succs)
3385 if (e->dest != other_bb)
3386 break;
3387 if (e == NULL)
3388 break;
3389 if (!single_pred_p (e->dest))
3390 break;
3391 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3392 break;
3393 if (!no_side_effect_bb (e->dest))
3394 break;
3395 last_bb = e->dest;
3397 if (first_bb == last_bb)
3398 return cfg_cleanup_needed;
3399 /* Here basic blocks first_bb through last_bb's predecessor
3400 end with GIMPLE_COND, all of them have one of the edges to
3401 other_bb and another to another block in the range,
3402 all blocks except first_bb don't have side-effects and
3403 last_bb ends with either GIMPLE_COND, or cast satisfying
3404 final_range_test_p. */
3405 for (bb = last_bb; ; bb = single_pred (bb))
3407 enum tree_code code;
3408 tree lhs, rhs;
3409 inter_bb_range_test_entry bb_ent;
3411 bb_ent.op = NULL_TREE;
3412 bb_ent.first_idx = ops.length ();
3413 bb_ent.last_idx = bb_ent.first_idx;
3414 e = find_edge (bb, other_bb);
3415 stmt = last_stmt (bb);
3416 gimple_set_visited (stmt, true);
3417 if (gimple_code (stmt) != GIMPLE_COND)
3419 use_operand_p use_p;
3420 gimple *phi;
3421 edge e2;
3422 unsigned int d;
3424 lhs = gimple_assign_lhs (stmt);
3425 rhs = gimple_assign_rhs1 (stmt);
3426 gcc_assert (bb == last_bb);
3428 /* stmt is
3429 _123 = (int) _234;
3431 followed by:
3432 <bb M>:
3433 # _345 = PHI <_123(N), 1(...), 1(...)>
3435 or 0 instead of 1. If it is 0, the _234
3436 range test is anded together with all the
3437 other range tests, if it is 1, it is ored with
3438 them. */
3439 single_imm_use (lhs, &use_p, &phi);
3440 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3441 e2 = find_edge (first_bb, other_bb);
3442 d = e2->dest_idx;
3443 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3444 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3445 code = BIT_AND_EXPR;
3446 else
3448 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3449 code = BIT_IOR_EXPR;
3452 /* If _234 SSA_NAME_DEF_STMT is
3453 _234 = _567 | _789;
3454 (or &, corresponding to 1/0 in the phi arguments,
3455 push into ops the individual range test arguments
3456 of the bitwise or resp. and, recursively. */
3457 if (!get_ops (rhs, code, &ops,
3458 loop_containing_stmt (stmt))
3459 && has_single_use (rhs))
3461 /* Otherwise, push the _234 range test itself. */
3462 operand_entry *oe = operand_entry_pool.allocate ();
3464 oe->op = rhs;
3465 oe->rank = code;
3466 oe->id = 0;
3467 oe->count = 1;
3468 ops.safe_push (oe);
3469 bb_ent.last_idx++;
3471 else
3472 bb_ent.last_idx = ops.length ();
3473 bb_ent.op = rhs;
3474 bbinfo.safe_push (bb_ent);
3475 continue;
3477 /* Otherwise stmt is GIMPLE_COND. */
3478 code = gimple_cond_code (stmt);
3479 lhs = gimple_cond_lhs (stmt);
3480 rhs = gimple_cond_rhs (stmt);
3481 if (TREE_CODE (lhs) == SSA_NAME
3482 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3483 && ((code != EQ_EXPR && code != NE_EXPR)
3484 || rhs != boolean_false_node
3485 /* Either push into ops the individual bitwise
3486 or resp. and operands, depending on which
3487 edge is other_bb. */
3488 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3489 ^ (code == EQ_EXPR))
3490 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3491 loop_containing_stmt (stmt))))
3493 /* Or push the GIMPLE_COND stmt itself. */
3494 operand_entry *oe = operand_entry_pool.allocate ();
3496 oe->op = NULL;
3497 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3498 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3499 /* oe->op = NULL signs that there is no SSA_NAME
3500 for the range test, and oe->id instead is the
3501 basic block number, at which's end the GIMPLE_COND
3502 is. */
3503 oe->id = bb->index;
3504 oe->count = 1;
3505 ops.safe_push (oe);
3506 bb_ent.op = NULL;
3507 bb_ent.last_idx++;
3509 else if (ops.length () > bb_ent.first_idx)
3511 bb_ent.op = lhs;
3512 bb_ent.last_idx = ops.length ();
3514 bbinfo.safe_push (bb_ent);
3515 if (bb == first_bb)
3516 break;
3518 if (ops.length () > 1)
3519 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3520 if (any_changes)
3522 unsigned int idx, max_idx = 0;
3523 /* update_ops relies on has_single_use predicates returning the
3524 same values as it did during get_ops earlier. Additionally it
3525 never removes statements, only adds new ones and it should walk
3526 from the single imm use and check the predicate already before
3527 making those changes.
3528 On the other side, the handling of GIMPLE_COND directly can turn
3529 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3530 it needs to be done in a separate loop afterwards. */
3531 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3533 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3534 && bbinfo[idx].op != NULL_TREE)
3536 tree new_op;
3538 max_idx = idx;
3539 stmt = last_stmt (bb);
3540 new_op = update_ops (bbinfo[idx].op,
3541 (enum tree_code)
3542 ops[bbinfo[idx].first_idx]->rank,
3543 ops, &bbinfo[idx].first_idx,
3544 loop_containing_stmt (stmt));
3545 if (new_op == NULL_TREE)
3547 gcc_assert (bb == last_bb);
3548 new_op = ops[bbinfo[idx].first_idx++]->op;
3550 if (bbinfo[idx].op != new_op)
3552 imm_use_iterator iter;
3553 use_operand_p use_p;
3554 gimple *use_stmt, *cast_stmt = NULL;
3556 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3557 if (is_gimple_debug (use_stmt))
3558 continue;
3559 else if (gimple_code (use_stmt) == GIMPLE_COND
3560 || gimple_code (use_stmt) == GIMPLE_PHI)
3561 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3562 SET_USE (use_p, new_op);
3563 else if (gimple_assign_cast_p (use_stmt))
3564 cast_stmt = use_stmt;
3565 else
3566 gcc_unreachable ();
3567 if (cast_stmt)
3569 gcc_assert (bb == last_bb);
3570 tree lhs = gimple_assign_lhs (cast_stmt);
3571 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3572 enum tree_code rhs_code
3573 = gimple_assign_rhs_code (cast_stmt);
3574 gassign *g;
3575 if (is_gimple_min_invariant (new_op))
3577 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3578 g = gimple_build_assign (new_lhs, new_op);
3580 else
3581 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3582 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3583 gimple_set_uid (g, gimple_uid (cast_stmt));
3584 gimple_set_visited (g, true);
3585 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3586 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3587 if (is_gimple_debug (use_stmt))
3588 continue;
3589 else if (gimple_code (use_stmt) == GIMPLE_COND
3590 || gimple_code (use_stmt) == GIMPLE_PHI)
3591 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3592 SET_USE (use_p, new_lhs);
3593 else
3594 gcc_unreachable ();
3598 if (bb == first_bb)
3599 break;
3601 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3603 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3604 && bbinfo[idx].op == NULL_TREE
3605 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3607 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3609 if (idx > max_idx)
3610 max_idx = idx;
3612 /* If we collapse the conditional to a true/false
3613 condition, then bubble that knowledge up to our caller. */
3614 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3616 gimple_cond_make_false (cond_stmt);
3617 cfg_cleanup_needed = true;
3619 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3621 gimple_cond_make_true (cond_stmt);
3622 cfg_cleanup_needed = true;
3624 else
3626 gimple_cond_set_code (cond_stmt, NE_EXPR);
3627 gimple_cond_set_lhs (cond_stmt,
3628 ops[bbinfo[idx].first_idx]->op);
3629 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3631 update_stmt (cond_stmt);
3633 if (bb == first_bb)
3634 break;
3637 /* The above changes could result in basic blocks after the first
3638 modified one, up to and including last_bb, to be executed even if
3639 they would not be in the original program. If the value ranges of
3640 assignment lhs' in those bbs were dependent on the conditions
3641 guarding those basic blocks which now can change, the VRs might
3642 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
3643 are only used within the same bb, it should be not a big deal if
3644 we just reset all the VRs in those bbs. See PR68671. */
3645 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
3646 reset_flow_sensitive_info_in_bb (bb);
3648 return cfg_cleanup_needed;
3651 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3652 of STMT in it's operands. This is also known as a "destructive
3653 update" operation. */
3655 static bool
3656 is_phi_for_stmt (gimple *stmt, tree operand)
3658 gimple *def_stmt;
3659 gphi *def_phi;
3660 tree lhs;
3661 use_operand_p arg_p;
3662 ssa_op_iter i;
3664 if (TREE_CODE (operand) != SSA_NAME)
3665 return false;
3667 lhs = gimple_assign_lhs (stmt);
3669 def_stmt = SSA_NAME_DEF_STMT (operand);
3670 def_phi = dyn_cast <gphi *> (def_stmt);
3671 if (!def_phi)
3672 return false;
3674 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3675 if (lhs == USE_FROM_PTR (arg_p))
3676 return true;
3677 return false;
3680 /* Remove def stmt of VAR if VAR has zero uses and recurse
3681 on rhs1 operand if so. */
3683 static void
3684 remove_visited_stmt_chain (tree var)
3686 gimple *stmt;
3687 gimple_stmt_iterator gsi;
3689 while (1)
3691 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3692 return;
3693 stmt = SSA_NAME_DEF_STMT (var);
3694 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3696 var = gimple_assign_rhs1 (stmt);
3697 gsi = gsi_for_stmt (stmt);
3698 reassoc_remove_stmt (&gsi);
3699 release_defs (stmt);
3701 else
3702 return;
3706 /* This function checks three consequtive operands in
3707 passed operands vector OPS starting from OPINDEX and
3708 swaps two operands if it is profitable for binary operation
3709 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3711 We pair ops with the same rank if possible.
3713 The alternative we try is to see if STMT is a destructive
3714 update style statement, which is like:
3715 b = phi (a, ...)
3716 a = c + b;
3717 In that case, we want to use the destructive update form to
3718 expose the possible vectorizer sum reduction opportunity.
3719 In that case, the third operand will be the phi node. This
3720 check is not performed if STMT is null.
3722 We could, of course, try to be better as noted above, and do a
3723 lot of work to try to find these opportunities in >3 operand
3724 cases, but it is unlikely to be worth it. */
3726 static void
3727 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
3728 unsigned int opindex, gimple *stmt)
3730 operand_entry *oe1, *oe2, *oe3;
3732 oe1 = ops[opindex];
3733 oe2 = ops[opindex + 1];
3734 oe3 = ops[opindex + 2];
3736 if ((oe1->rank == oe2->rank
3737 && oe2->rank != oe3->rank)
3738 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3739 && !is_phi_for_stmt (stmt, oe1->op)
3740 && !is_phi_for_stmt (stmt, oe2->op)))
3742 operand_entry temp = *oe3;
3743 oe3->op = oe1->op;
3744 oe3->rank = oe1->rank;
3745 oe1->op = temp.op;
3746 oe1->rank= temp.rank;
3748 else if ((oe1->rank == oe3->rank
3749 && oe2->rank != oe3->rank)
3750 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3751 && !is_phi_for_stmt (stmt, oe1->op)
3752 && !is_phi_for_stmt (stmt, oe3->op)))
3754 operand_entry temp = *oe2;
3755 oe2->op = oe1->op;
3756 oe2->rank = oe1->rank;
3757 oe1->op = temp.op;
3758 oe1->rank = temp.rank;
3762 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3763 two definitions, otherwise return STMT. */
3765 static inline gimple *
3766 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
3768 if (TREE_CODE (rhs1) == SSA_NAME
3769 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3770 stmt = SSA_NAME_DEF_STMT (rhs1);
3771 if (TREE_CODE (rhs2) == SSA_NAME
3772 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3773 stmt = SSA_NAME_DEF_STMT (rhs2);
3774 return stmt;
3777 /* Recursively rewrite our linearized statements so that the operators
3778 match those in OPS[OPINDEX], putting the computation in rank
3779 order. Return new lhs. */
3781 static tree
3782 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
3783 vec<operand_entry *> ops, bool changed)
3785 tree rhs1 = gimple_assign_rhs1 (stmt);
3786 tree rhs2 = gimple_assign_rhs2 (stmt);
3787 tree lhs = gimple_assign_lhs (stmt);
3788 operand_entry *oe;
3790 /* The final recursion case for this function is that you have
3791 exactly two operations left.
3792 If we had exactly one op in the entire list to start with, we
3793 would have never called this function, and the tail recursion
3794 rewrites them one at a time. */
3795 if (opindex + 2 == ops.length ())
3797 operand_entry *oe1, *oe2;
3799 oe1 = ops[opindex];
3800 oe2 = ops[opindex + 1];
3802 if (rhs1 != oe1->op || rhs2 != oe2->op)
3804 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3805 unsigned int uid = gimple_uid (stmt);
3807 if (dump_file && (dump_flags & TDF_DETAILS))
3809 fprintf (dump_file, "Transforming ");
3810 print_gimple_stmt (dump_file, stmt, 0, 0);
3813 /* Even when changed is false, reassociation could have e.g. removed
3814 some redundant operations, so unless we are just swapping the
3815 arguments or unless there is no change at all (then we just
3816 return lhs), force creation of a new SSA_NAME. */
3817 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3819 gimple *insert_point
3820 = find_insert_point (stmt, oe1->op, oe2->op);
3821 lhs = make_ssa_name (TREE_TYPE (lhs));
3822 stmt
3823 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3824 oe1->op, oe2->op);
3825 gimple_set_uid (stmt, uid);
3826 gimple_set_visited (stmt, true);
3827 if (insert_point == gsi_stmt (gsi))
3828 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3829 else
3830 insert_stmt_after (stmt, insert_point);
3832 else
3834 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3835 == stmt);
3836 gimple_assign_set_rhs1 (stmt, oe1->op);
3837 gimple_assign_set_rhs2 (stmt, oe2->op);
3838 update_stmt (stmt);
3841 if (rhs1 != oe1->op && rhs1 != oe2->op)
3842 remove_visited_stmt_chain (rhs1);
3844 if (dump_file && (dump_flags & TDF_DETAILS))
3846 fprintf (dump_file, " into ");
3847 print_gimple_stmt (dump_file, stmt, 0, 0);
3850 return lhs;
3853 /* If we hit here, we should have 3 or more ops left. */
3854 gcc_assert (opindex + 2 < ops.length ());
3856 /* Rewrite the next operator. */
3857 oe = ops[opindex];
3859 /* Recurse on the LHS of the binary operator, which is guaranteed to
3860 be the non-leaf side. */
3861 tree new_rhs1
3862 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3863 changed || oe->op != rhs2);
3865 if (oe->op != rhs2 || new_rhs1 != rhs1)
3867 if (dump_file && (dump_flags & TDF_DETAILS))
3869 fprintf (dump_file, "Transforming ");
3870 print_gimple_stmt (dump_file, stmt, 0, 0);
3873 /* If changed is false, this is either opindex == 0
3874 or all outer rhs2's were equal to corresponding oe->op,
3875 and powi_result is NULL.
3876 That means lhs is equivalent before and after reassociation.
3877 Otherwise ensure the old lhs SSA_NAME is not reused and
3878 create a new stmt as well, so that any debug stmts will be
3879 properly adjusted. */
3880 if (changed)
3882 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3883 unsigned int uid = gimple_uid (stmt);
3884 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3886 lhs = make_ssa_name (TREE_TYPE (lhs));
3887 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3888 new_rhs1, oe->op);
3889 gimple_set_uid (stmt, uid);
3890 gimple_set_visited (stmt, true);
3891 if (insert_point == gsi_stmt (gsi))
3892 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3893 else
3894 insert_stmt_after (stmt, insert_point);
3896 else
3898 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3899 == stmt);
3900 gimple_assign_set_rhs1 (stmt, new_rhs1);
3901 gimple_assign_set_rhs2 (stmt, oe->op);
3902 update_stmt (stmt);
3905 if (dump_file && (dump_flags & TDF_DETAILS))
3907 fprintf (dump_file, " into ");
3908 print_gimple_stmt (dump_file, stmt, 0, 0);
3911 return lhs;
3914 /* Find out how many cycles we need to compute statements chain.
3915 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3916 maximum number of independent statements we may execute per cycle. */
3918 static int
3919 get_required_cycles (int ops_num, int cpu_width)
3921 int res;
3922 int elog;
3923 unsigned int rest;
3925 /* While we have more than 2 * cpu_width operands
3926 we may reduce number of operands by cpu_width
3927 per cycle. */
3928 res = ops_num / (2 * cpu_width);
3930 /* Remained operands count may be reduced twice per cycle
3931 until we have only one operand. */
3932 rest = (unsigned)(ops_num - res * cpu_width);
3933 elog = exact_log2 (rest);
3934 if (elog >= 0)
3935 res += elog;
3936 else
3937 res += floor_log2 (rest) + 1;
3939 return res;
3942 /* Returns an optimal number of registers to use for computation of
3943 given statements. */
3945 static int
3946 get_reassociation_width (int ops_num, enum tree_code opc,
3947 machine_mode mode)
3949 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3950 int width;
3951 int width_min;
3952 int cycles_best;
3954 if (param_width > 0)
3955 width = param_width;
3956 else
3957 width = targetm.sched.reassociation_width (opc, mode);
3959 if (width == 1)
3960 return width;
3962 /* Get the minimal time required for sequence computation. */
3963 cycles_best = get_required_cycles (ops_num, width);
3965 /* Check if we may use less width and still compute sequence for
3966 the same time. It will allow us to reduce registers usage.
3967 get_required_cycles is monotonically increasing with lower width
3968 so we can perform a binary search for the minimal width that still
3969 results in the optimal cycle count. */
3970 width_min = 1;
3971 while (width > width_min)
3973 int width_mid = (width + width_min) / 2;
3975 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3976 width = width_mid;
3977 else if (width_min < width_mid)
3978 width_min = width_mid;
3979 else
3980 break;
3983 return width;
3986 /* Recursively rewrite our linearized statements so that the operators
3987 match those in OPS[OPINDEX], putting the computation in rank
3988 order and trying to allow operations to be executed in
3989 parallel. */
3991 static void
3992 rewrite_expr_tree_parallel (gassign *stmt, int width,
3993 vec<operand_entry *> ops)
3995 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3996 int op_num = ops.length ();
3997 int stmt_num = op_num - 1;
3998 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
3999 int op_index = op_num - 1;
4000 int stmt_index = 0;
4001 int ready_stmts_end = 0;
4002 int i = 0;
4003 tree last_rhs1 = gimple_assign_rhs1 (stmt);
4005 /* We start expression rewriting from the top statements.
4006 So, in this loop we create a full list of statements
4007 we will work with. */
4008 stmts[stmt_num - 1] = stmt;
4009 for (i = stmt_num - 2; i >= 0; i--)
4010 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4012 for (i = 0; i < stmt_num; i++)
4014 tree op1, op2;
4016 /* Determine whether we should use results of
4017 already handled statements or not. */
4018 if (ready_stmts_end == 0
4019 && (i - stmt_index >= width || op_index < 1))
4020 ready_stmts_end = i;
4022 /* Now we choose operands for the next statement. Non zero
4023 value in ready_stmts_end means here that we should use
4024 the result of already generated statements as new operand. */
4025 if (ready_stmts_end > 0)
4027 op1 = gimple_assign_lhs (stmts[stmt_index++]);
4028 if (ready_stmts_end > stmt_index)
4029 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4030 else if (op_index >= 0)
4031 op2 = ops[op_index--]->op;
4032 else
4034 gcc_assert (stmt_index < i);
4035 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4038 if (stmt_index >= ready_stmts_end)
4039 ready_stmts_end = 0;
4041 else
4043 if (op_index > 1)
4044 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4045 op2 = ops[op_index--]->op;
4046 op1 = ops[op_index--]->op;
4049 /* If we emit the last statement then we should put
4050 operands into the last statement. It will also
4051 break the loop. */
4052 if (op_index < 0 && stmt_index == i)
4053 i = stmt_num - 1;
4055 if (dump_file && (dump_flags & TDF_DETAILS))
4057 fprintf (dump_file, "Transforming ");
4058 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4061 /* We keep original statement only for the last one. All
4062 others are recreated. */
4063 if (i == stmt_num - 1)
4065 gimple_assign_set_rhs1 (stmts[i], op1);
4066 gimple_assign_set_rhs2 (stmts[i], op2);
4067 update_stmt (stmts[i]);
4069 else
4070 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4072 if (dump_file && (dump_flags & TDF_DETAILS))
4074 fprintf (dump_file, " into ");
4075 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4079 remove_visited_stmt_chain (last_rhs1);
4082 /* Transform STMT, which is really (A +B) + (C + D) into the left
4083 linear form, ((A+B)+C)+D.
4084 Recurse on D if necessary. */
4086 static void
4087 linearize_expr (gimple *stmt)
4089 gimple_stmt_iterator gsi;
4090 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4091 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4092 gimple *oldbinrhs = binrhs;
4093 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4094 gimple *newbinrhs = NULL;
4095 struct loop *loop = loop_containing_stmt (stmt);
4096 tree lhs = gimple_assign_lhs (stmt);
4098 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4099 && is_reassociable_op (binrhs, rhscode, loop));
4101 gsi = gsi_for_stmt (stmt);
4103 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4104 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4105 gimple_assign_rhs_code (binrhs),
4106 gimple_assign_lhs (binlhs),
4107 gimple_assign_rhs2 (binrhs));
4108 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4109 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4110 gimple_set_uid (binrhs, gimple_uid (stmt));
4112 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4113 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4115 if (dump_file && (dump_flags & TDF_DETAILS))
4117 fprintf (dump_file, "Linearized: ");
4118 print_gimple_stmt (dump_file, stmt, 0, 0);
4121 reassociate_stats.linearized++;
4122 update_stmt (stmt);
4124 gsi = gsi_for_stmt (oldbinrhs);
4125 reassoc_remove_stmt (&gsi);
4126 release_defs (oldbinrhs);
4128 gimple_set_visited (stmt, true);
4129 gimple_set_visited (binlhs, true);
4130 gimple_set_visited (binrhs, true);
4132 /* Tail recurse on the new rhs if it still needs reassociation. */
4133 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4134 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4135 want to change the algorithm while converting to tuples. */
4136 linearize_expr (stmt);
4139 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4140 it. Otherwise, return NULL. */
4142 static gimple *
4143 get_single_immediate_use (tree lhs)
4145 use_operand_p immuse;
4146 gimple *immusestmt;
4148 if (TREE_CODE (lhs) == SSA_NAME
4149 && single_imm_use (lhs, &immuse, &immusestmt)
4150 && is_gimple_assign (immusestmt))
4151 return immusestmt;
4153 return NULL;
4156 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4157 representing the negated value. Insertions of any necessary
4158 instructions go before GSI.
4159 This function is recursive in that, if you hand it "a_5" as the
4160 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4161 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4163 static tree
4164 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4166 gimple *negatedefstmt = NULL;
4167 tree resultofnegate;
4168 gimple_stmt_iterator gsi;
4169 unsigned int uid;
4171 /* If we are trying to negate a name, defined by an add, negate the
4172 add operands instead. */
4173 if (TREE_CODE (tonegate) == SSA_NAME)
4174 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4175 if (TREE_CODE (tonegate) == SSA_NAME
4176 && is_gimple_assign (negatedefstmt)
4177 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4178 && has_single_use (gimple_assign_lhs (negatedefstmt))
4179 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4181 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4182 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4183 tree lhs = gimple_assign_lhs (negatedefstmt);
4184 gimple *g;
4186 gsi = gsi_for_stmt (negatedefstmt);
4187 rhs1 = negate_value (rhs1, &gsi);
4189 gsi = gsi_for_stmt (negatedefstmt);
4190 rhs2 = negate_value (rhs2, &gsi);
4192 gsi = gsi_for_stmt (negatedefstmt);
4193 lhs = make_ssa_name (TREE_TYPE (lhs));
4194 gimple_set_visited (negatedefstmt, true);
4195 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4196 gimple_set_uid (g, gimple_uid (negatedefstmt));
4197 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4198 return lhs;
4201 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4202 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4203 NULL_TREE, true, GSI_SAME_STMT);
4204 gsi = *gsip;
4205 uid = gimple_uid (gsi_stmt (gsi));
4206 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4208 gimple *stmt = gsi_stmt (gsi);
4209 if (gimple_uid (stmt) != 0)
4210 break;
4211 gimple_set_uid (stmt, uid);
4213 return resultofnegate;
4216 /* Return true if we should break up the subtract in STMT into an add
4217 with negate. This is true when we the subtract operands are really
4218 adds, or the subtract itself is used in an add expression. In
4219 either case, breaking up the subtract into an add with negate
4220 exposes the adds to reassociation. */
4222 static bool
4223 should_break_up_subtract (gimple *stmt)
4225 tree lhs = gimple_assign_lhs (stmt);
4226 tree binlhs = gimple_assign_rhs1 (stmt);
4227 tree binrhs = gimple_assign_rhs2 (stmt);
4228 gimple *immusestmt;
4229 struct loop *loop = loop_containing_stmt (stmt);
4231 if (TREE_CODE (binlhs) == SSA_NAME
4232 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4233 return true;
4235 if (TREE_CODE (binrhs) == SSA_NAME
4236 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4237 return true;
4239 if (TREE_CODE (lhs) == SSA_NAME
4240 && (immusestmt = get_single_immediate_use (lhs))
4241 && is_gimple_assign (immusestmt)
4242 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4243 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4244 return true;
4245 return false;
4248 /* Transform STMT from A - B into A + -B. */
4250 static void
4251 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4253 tree rhs1 = gimple_assign_rhs1 (stmt);
4254 tree rhs2 = gimple_assign_rhs2 (stmt);
4256 if (dump_file && (dump_flags & TDF_DETAILS))
4258 fprintf (dump_file, "Breaking up subtract ");
4259 print_gimple_stmt (dump_file, stmt, 0, 0);
4262 rhs2 = negate_value (rhs2, gsip);
4263 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4264 update_stmt (stmt);
4267 /* Determine whether STMT is a builtin call that raises an SSA name
4268 to an integer power and has only one use. If so, and this is early
4269 reassociation and unsafe math optimizations are permitted, place
4270 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4271 If any of these conditions does not hold, return FALSE. */
4273 static bool
4274 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4276 tree arg1;
4277 REAL_VALUE_TYPE c, cint;
4279 switch (gimple_call_combined_fn (stmt))
4281 CASE_CFN_POW:
4282 if (flag_errno_math)
4283 return false;
4285 *base = gimple_call_arg (stmt, 0);
4286 arg1 = gimple_call_arg (stmt, 1);
4288 if (TREE_CODE (arg1) != REAL_CST)
4289 return false;
4291 c = TREE_REAL_CST (arg1);
4293 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4294 return false;
4296 *exponent = real_to_integer (&c);
4297 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4298 if (!real_identical (&c, &cint))
4299 return false;
4301 break;
4303 CASE_CFN_POWI:
4304 *base = gimple_call_arg (stmt, 0);
4305 arg1 = gimple_call_arg (stmt, 1);
4307 if (!tree_fits_shwi_p (arg1))
4308 return false;
4310 *exponent = tree_to_shwi (arg1);
4311 break;
4313 default:
4314 return false;
4317 /* Expanding negative exponents is generally unproductive, so we don't
4318 complicate matters with those. Exponents of zero and one should
4319 have been handled by expression folding. */
4320 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4321 return false;
4323 return true;
4326 /* Try to derive and add operand entry for OP to *OPS. Return false if
4327 unsuccessful. */
4329 static bool
4330 try_special_add_to_ops (vec<operand_entry *> *ops,
4331 enum tree_code code,
4332 tree op, gimple* def_stmt)
4334 tree base = NULL_TREE;
4335 HOST_WIDE_INT exponent = 0;
4337 if (TREE_CODE (op) != SSA_NAME
4338 || ! has_single_use (op))
4339 return false;
4341 if (code == MULT_EXPR
4342 && reassoc_insert_powi_p
4343 && flag_unsafe_math_optimizations
4344 && is_gimple_call (def_stmt)
4345 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
4347 add_repeat_to_ops_vec (ops, base, exponent);
4348 gimple_set_visited (def_stmt, true);
4349 return true;
4351 else if (code == MULT_EXPR
4352 && is_gimple_assign (def_stmt)
4353 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
4354 && !HONOR_SNANS (TREE_TYPE (op))
4355 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
4356 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
4358 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4359 tree cst = build_minus_one_cst (TREE_TYPE (op));
4360 add_to_ops_vec (ops, rhs1);
4361 add_to_ops_vec (ops, cst);
4362 gimple_set_visited (def_stmt, true);
4363 return true;
4366 return false;
4369 /* Recursively linearize a binary expression that is the RHS of STMT.
4370 Place the operands of the expression tree in the vector named OPS. */
4372 static void
4373 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4374 bool is_associative, bool set_visited)
4376 tree binlhs = gimple_assign_rhs1 (stmt);
4377 tree binrhs = gimple_assign_rhs2 (stmt);
4378 gimple *binlhsdef = NULL, *binrhsdef = NULL;
4379 bool binlhsisreassoc = false;
4380 bool binrhsisreassoc = false;
4381 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4382 struct loop *loop = loop_containing_stmt (stmt);
4384 if (set_visited)
4385 gimple_set_visited (stmt, true);
4387 if (TREE_CODE (binlhs) == SSA_NAME)
4389 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4390 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4391 && !stmt_could_throw_p (binlhsdef));
4394 if (TREE_CODE (binrhs) == SSA_NAME)
4396 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4397 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4398 && !stmt_could_throw_p (binrhsdef));
4401 /* If the LHS is not reassociable, but the RHS is, we need to swap
4402 them. If neither is reassociable, there is nothing we can do, so
4403 just put them in the ops vector. If the LHS is reassociable,
4404 linearize it. If both are reassociable, then linearize the RHS
4405 and the LHS. */
4407 if (!binlhsisreassoc)
4409 /* If this is not a associative operation like division, give up. */
4410 if (!is_associative)
4412 add_to_ops_vec (ops, binrhs);
4413 return;
4416 if (!binrhsisreassoc)
4418 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4419 add_to_ops_vec (ops, binrhs);
4421 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
4422 add_to_ops_vec (ops, binlhs);
4424 return;
4427 if (dump_file && (dump_flags & TDF_DETAILS))
4429 fprintf (dump_file, "swapping operands of ");
4430 print_gimple_stmt (dump_file, stmt, 0, 0);
4433 swap_ssa_operands (stmt,
4434 gimple_assign_rhs1_ptr (stmt),
4435 gimple_assign_rhs2_ptr (stmt));
4436 update_stmt (stmt);
4438 if (dump_file && (dump_flags & TDF_DETAILS))
4440 fprintf (dump_file, " is now ");
4441 print_gimple_stmt (dump_file, stmt, 0, 0);
4444 /* We want to make it so the lhs is always the reassociative op,
4445 so swap. */
4446 std::swap (binlhs, binrhs);
4448 else if (binrhsisreassoc)
4450 linearize_expr (stmt);
4451 binlhs = gimple_assign_rhs1 (stmt);
4452 binrhs = gimple_assign_rhs2 (stmt);
4455 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4456 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4457 rhscode, loop));
4458 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4459 is_associative, set_visited);
4461 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4462 add_to_ops_vec (ops, binrhs);
4465 /* Repropagate the negates back into subtracts, since no other pass
4466 currently does it. */
4468 static void
4469 repropagate_negates (void)
4471 unsigned int i = 0;
4472 tree negate;
4474 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4476 gimple *user = get_single_immediate_use (negate);
4478 if (!user || !is_gimple_assign (user))
4479 continue;
4481 /* The negate operand can be either operand of a PLUS_EXPR
4482 (it can be the LHS if the RHS is a constant for example).
4484 Force the negate operand to the RHS of the PLUS_EXPR, then
4485 transform the PLUS_EXPR into a MINUS_EXPR. */
4486 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4488 /* If the negated operand appears on the LHS of the
4489 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4490 to force the negated operand to the RHS of the PLUS_EXPR. */
4491 if (gimple_assign_rhs1 (user) == negate)
4493 swap_ssa_operands (user,
4494 gimple_assign_rhs1_ptr (user),
4495 gimple_assign_rhs2_ptr (user));
4498 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4499 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4500 if (gimple_assign_rhs2 (user) == negate)
4502 tree rhs1 = gimple_assign_rhs1 (user);
4503 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
4504 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4505 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4506 update_stmt (user);
4509 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4511 if (gimple_assign_rhs1 (user) == negate)
4513 /* We have
4514 x = -a
4515 y = x - b
4516 which we transform into
4517 x = a + b
4518 y = -x .
4519 This pushes down the negate which we possibly can merge
4520 into some other operation, hence insert it into the
4521 plus_negates vector. */
4522 gimple *feed = SSA_NAME_DEF_STMT (negate);
4523 tree a = gimple_assign_rhs1 (feed);
4524 tree b = gimple_assign_rhs2 (user);
4525 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4526 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4527 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4528 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4529 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4530 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4531 user = gsi_stmt (gsi2);
4532 update_stmt (user);
4533 reassoc_remove_stmt (&gsi);
4534 release_defs (feed);
4535 plus_negates.safe_push (gimple_assign_lhs (user));
4537 else
4539 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4540 rid of one operation. */
4541 gimple *feed = SSA_NAME_DEF_STMT (negate);
4542 tree a = gimple_assign_rhs1 (feed);
4543 tree rhs1 = gimple_assign_rhs1 (user);
4544 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4545 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4546 update_stmt (gsi_stmt (gsi));
4552 /* Returns true if OP is of a type for which we can do reassociation.
4553 That is for integral or non-saturating fixed-point types, and for
4554 floating point type when associative-math is enabled. */
4556 static bool
4557 can_reassociate_p (tree op)
4559 tree type = TREE_TYPE (op);
4560 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4561 || NON_SAT_FIXED_POINT_TYPE_P (type)
4562 || (flag_associative_math && FLOAT_TYPE_P (type)))
4563 return true;
4564 return false;
4567 /* Break up subtract operations in block BB.
4569 We do this top down because we don't know whether the subtract is
4570 part of a possible chain of reassociation except at the top.
4572 IE given
4573 d = f + g
4574 c = a + e
4575 b = c - d
4576 q = b - r
4577 k = t - q
4579 we want to break up k = t - q, but we won't until we've transformed q
4580 = b - r, which won't be broken up until we transform b = c - d.
4582 En passant, clear the GIMPLE visited flag on every statement
4583 and set UIDs within each basic block. */
4585 static void
4586 break_up_subtract_bb (basic_block bb)
4588 gimple_stmt_iterator gsi;
4589 basic_block son;
4590 unsigned int uid = 1;
4592 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4594 gimple *stmt = gsi_stmt (gsi);
4595 gimple_set_visited (stmt, false);
4596 gimple_set_uid (stmt, uid++);
4598 if (!is_gimple_assign (stmt)
4599 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4600 continue;
4602 /* Look for simple gimple subtract operations. */
4603 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4605 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4606 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4607 continue;
4609 /* Check for a subtract used only in an addition. If this
4610 is the case, transform it into add of a negate for better
4611 reassociation. IE transform C = A-B into C = A + -B if C
4612 is only used in an addition. */
4613 if (should_break_up_subtract (stmt))
4614 break_up_subtract (stmt, &gsi);
4616 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4617 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4618 plus_negates.safe_push (gimple_assign_lhs (stmt));
4620 for (son = first_dom_son (CDI_DOMINATORS, bb);
4621 son;
4622 son = next_dom_son (CDI_DOMINATORS, son))
4623 break_up_subtract_bb (son);
4626 /* Used for repeated factor analysis. */
4627 struct repeat_factor
4629 /* An SSA name that occurs in a multiply chain. */
4630 tree factor;
4632 /* Cached rank of the factor. */
4633 unsigned rank;
4635 /* Number of occurrences of the factor in the chain. */
4636 HOST_WIDE_INT count;
4638 /* An SSA name representing the product of this factor and
4639 all factors appearing later in the repeated factor vector. */
4640 tree repr;
4644 static vec<repeat_factor> repeat_factor_vec;
4646 /* Used for sorting the repeat factor vector. Sort primarily by
4647 ascending occurrence count, secondarily by descending rank. */
4649 static int
4650 compare_repeat_factors (const void *x1, const void *x2)
4652 const repeat_factor *rf1 = (const repeat_factor *) x1;
4653 const repeat_factor *rf2 = (const repeat_factor *) x2;
4655 if (rf1->count != rf2->count)
4656 return rf1->count - rf2->count;
4658 return rf2->rank - rf1->rank;
4661 /* Look for repeated operands in OPS in the multiply tree rooted at
4662 STMT. Replace them with an optimal sequence of multiplies and powi
4663 builtin calls, and remove the used operands from OPS. Return an
4664 SSA name representing the value of the replacement sequence. */
4666 static tree
4667 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
4669 unsigned i, j, vec_len;
4670 int ii;
4671 operand_entry *oe;
4672 repeat_factor *rf1, *rf2;
4673 repeat_factor rfnew;
4674 tree result = NULL_TREE;
4675 tree target_ssa, iter_result;
4676 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4677 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4678 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4679 gimple *mul_stmt, *pow_stmt;
4681 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4682 target. */
4683 if (!powi_fndecl)
4684 return NULL_TREE;
4686 /* Allocate the repeated factor vector. */
4687 repeat_factor_vec.create (10);
4689 /* Scan the OPS vector for all SSA names in the product and build
4690 up a vector of occurrence counts for each factor. */
4691 FOR_EACH_VEC_ELT (*ops, i, oe)
4693 if (TREE_CODE (oe->op) == SSA_NAME)
4695 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4697 if (rf1->factor == oe->op)
4699 rf1->count += oe->count;
4700 break;
4704 if (j >= repeat_factor_vec.length ())
4706 rfnew.factor = oe->op;
4707 rfnew.rank = oe->rank;
4708 rfnew.count = oe->count;
4709 rfnew.repr = NULL_TREE;
4710 repeat_factor_vec.safe_push (rfnew);
4715 /* Sort the repeated factor vector by (a) increasing occurrence count,
4716 and (b) decreasing rank. */
4717 repeat_factor_vec.qsort (compare_repeat_factors);
4719 /* It is generally best to combine as many base factors as possible
4720 into a product before applying __builtin_powi to the result.
4721 However, the sort order chosen for the repeated factor vector
4722 allows us to cache partial results for the product of the base
4723 factors for subsequent use. When we already have a cached partial
4724 result from a previous iteration, it is best to make use of it
4725 before looking for another __builtin_pow opportunity.
4727 As an example, consider x * x * y * y * y * z * z * z * z.
4728 We want to first compose the product x * y * z, raise it to the
4729 second power, then multiply this by y * z, and finally multiply
4730 by z. This can be done in 5 multiplies provided we cache y * z
4731 for use in both expressions:
4733 t1 = y * z
4734 t2 = t1 * x
4735 t3 = t2 * t2
4736 t4 = t1 * t3
4737 result = t4 * z
4739 If we instead ignored the cached y * z and first multiplied by
4740 the __builtin_pow opportunity z * z, we would get the inferior:
4742 t1 = y * z
4743 t2 = t1 * x
4744 t3 = t2 * t2
4745 t4 = z * z
4746 t5 = t3 * t4
4747 result = t5 * y */
4749 vec_len = repeat_factor_vec.length ();
4751 /* Repeatedly look for opportunities to create a builtin_powi call. */
4752 while (true)
4754 HOST_WIDE_INT power;
4756 /* First look for the largest cached product of factors from
4757 preceding iterations. If found, create a builtin_powi for
4758 it if the minimum occurrence count for its factors is at
4759 least 2, or just use this cached product as our next
4760 multiplicand if the minimum occurrence count is 1. */
4761 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4763 if (rf1->repr && rf1->count > 0)
4764 break;
4767 if (j < vec_len)
4769 power = rf1->count;
4771 if (power == 1)
4773 iter_result = rf1->repr;
4775 if (dump_file && (dump_flags & TDF_DETAILS))
4777 unsigned elt;
4778 repeat_factor *rf;
4779 fputs ("Multiplying by cached product ", dump_file);
4780 for (elt = j; elt < vec_len; elt++)
4782 rf = &repeat_factor_vec[elt];
4783 print_generic_expr (dump_file, rf->factor, 0);
4784 if (elt < vec_len - 1)
4785 fputs (" * ", dump_file);
4787 fputs ("\n", dump_file);
4790 else
4792 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4793 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4794 build_int_cst (integer_type_node,
4795 power));
4796 gimple_call_set_lhs (pow_stmt, iter_result);
4797 gimple_set_location (pow_stmt, gimple_location (stmt));
4798 gimple_set_uid (pow_stmt, gimple_uid (stmt));
4799 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4801 if (dump_file && (dump_flags & TDF_DETAILS))
4803 unsigned elt;
4804 repeat_factor *rf;
4805 fputs ("Building __builtin_pow call for cached product (",
4806 dump_file);
4807 for (elt = j; elt < vec_len; elt++)
4809 rf = &repeat_factor_vec[elt];
4810 print_generic_expr (dump_file, rf->factor, 0);
4811 if (elt < vec_len - 1)
4812 fputs (" * ", dump_file);
4814 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4815 power);
4819 else
4821 /* Otherwise, find the first factor in the repeated factor
4822 vector whose occurrence count is at least 2. If no such
4823 factor exists, there are no builtin_powi opportunities
4824 remaining. */
4825 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4827 if (rf1->count >= 2)
4828 break;
4831 if (j >= vec_len)
4832 break;
4834 power = rf1->count;
4836 if (dump_file && (dump_flags & TDF_DETAILS))
4838 unsigned elt;
4839 repeat_factor *rf;
4840 fputs ("Building __builtin_pow call for (", dump_file);
4841 for (elt = j; elt < vec_len; elt++)
4843 rf = &repeat_factor_vec[elt];
4844 print_generic_expr (dump_file, rf->factor, 0);
4845 if (elt < vec_len - 1)
4846 fputs (" * ", dump_file);
4848 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4851 reassociate_stats.pows_created++;
4853 /* Visit each element of the vector in reverse order (so that
4854 high-occurrence elements are visited first, and within the
4855 same occurrence count, lower-ranked elements are visited
4856 first). Form a linear product of all elements in this order
4857 whose occurrencce count is at least that of element J.
4858 Record the SSA name representing the product of each element
4859 with all subsequent elements in the vector. */
4860 if (j == vec_len - 1)
4861 rf1->repr = rf1->factor;
4862 else
4864 for (ii = vec_len - 2; ii >= (int)j; ii--)
4866 tree op1, op2;
4868 rf1 = &repeat_factor_vec[ii];
4869 rf2 = &repeat_factor_vec[ii + 1];
4871 /* Init the last factor's representative to be itself. */
4872 if (!rf2->repr)
4873 rf2->repr = rf2->factor;
4875 op1 = rf1->factor;
4876 op2 = rf2->repr;
4878 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4879 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4880 op1, op2);
4881 gimple_set_location (mul_stmt, gimple_location (stmt));
4882 gimple_set_uid (mul_stmt, gimple_uid (stmt));
4883 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4884 rf1->repr = target_ssa;
4886 /* Don't reprocess the multiply we just introduced. */
4887 gimple_set_visited (mul_stmt, true);
4891 /* Form a call to __builtin_powi for the maximum product
4892 just formed, raised to the power obtained earlier. */
4893 rf1 = &repeat_factor_vec[j];
4894 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4895 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4896 build_int_cst (integer_type_node,
4897 power));
4898 gimple_call_set_lhs (pow_stmt, iter_result);
4899 gimple_set_location (pow_stmt, gimple_location (stmt));
4900 gimple_set_uid (pow_stmt, gimple_uid (stmt));
4901 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4904 /* If we previously formed at least one other builtin_powi call,
4905 form the product of this one and those others. */
4906 if (result)
4908 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4909 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4910 result, iter_result);
4911 gimple_set_location (mul_stmt, gimple_location (stmt));
4912 gimple_set_uid (mul_stmt, gimple_uid (stmt));
4913 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4914 gimple_set_visited (mul_stmt, true);
4915 result = new_result;
4917 else
4918 result = iter_result;
4920 /* Decrement the occurrence count of each element in the product
4921 by the count found above, and remove this many copies of each
4922 factor from OPS. */
4923 for (i = j; i < vec_len; i++)
4925 unsigned k = power;
4926 unsigned n;
4928 rf1 = &repeat_factor_vec[i];
4929 rf1->count -= power;
4931 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4933 if (oe->op == rf1->factor)
4935 if (oe->count <= k)
4937 ops->ordered_remove (n);
4938 k -= oe->count;
4940 if (k == 0)
4941 break;
4943 else
4945 oe->count -= k;
4946 break;
4953 /* At this point all elements in the repeated factor vector have a
4954 remaining occurrence count of 0 or 1, and those with a count of 1
4955 don't have cached representatives. Re-sort the ops vector and
4956 clean up. */
4957 ops->qsort (sort_by_operand_rank);
4958 repeat_factor_vec.release ();
4960 /* Return the final product computed herein. Note that there may
4961 still be some elements with single occurrence count left in OPS;
4962 those will be handled by the normal reassociation logic. */
4963 return result;
4966 /* Attempt to optimize
4967 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
4968 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
4970 static void
4971 attempt_builtin_copysign (vec<operand_entry *> *ops)
4973 operand_entry *oe;
4974 unsigned int i;
4975 unsigned int length = ops->length ();
4976 tree cst = ops->last ()->op;
4978 if (length == 1 || TREE_CODE (cst) != REAL_CST)
4979 return;
4981 FOR_EACH_VEC_ELT (*ops, i, oe)
4983 if (TREE_CODE (oe->op) == SSA_NAME
4984 && has_single_use (oe->op))
4986 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
4987 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
4989 tree arg0, arg1;
4990 switch (gimple_call_combined_fn (old_call))
4992 CASE_CFN_COPYSIGN:
4993 arg0 = gimple_call_arg (old_call, 0);
4994 arg1 = gimple_call_arg (old_call, 1);
4995 /* The first argument of copysign must be a constant,
4996 otherwise there's nothing to do. */
4997 if (TREE_CODE (arg0) == REAL_CST)
4999 tree type = TREE_TYPE (arg0);
5000 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5001 /* If we couldn't fold to a single constant, skip it.
5002 That happens e.g. for inexact multiplication when
5003 -frounding-math. */
5004 if (mul == NULL_TREE)
5005 break;
5006 /* Instead of adjusting OLD_CALL, let's build a new
5007 call to not leak the LHS and prevent keeping bogus
5008 debug statements. DCE will clean up the old call. */
5009 gcall *new_call;
5010 if (gimple_call_internal_p (old_call))
5011 new_call = gimple_build_call_internal
5012 (IFN_COPYSIGN, 2, mul, arg1);
5013 else
5014 new_call = gimple_build_call
5015 (gimple_call_fndecl (old_call), 2, mul, arg1);
5016 tree lhs = make_ssa_name (type);
5017 gimple_call_set_lhs (new_call, lhs);
5018 gimple_set_location (new_call,
5019 gimple_location (old_call));
5020 insert_stmt_after (new_call, old_call);
5021 /* We've used the constant, get rid of it. */
5022 ops->pop ();
5023 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5024 /* Handle the CST1 < 0 case by negating the result. */
5025 if (cst1_neg)
5027 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5028 gimple *negate_stmt
5029 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5030 insert_stmt_after (negate_stmt, new_call);
5031 oe->op = negrhs;
5033 else
5034 oe->op = lhs;
5035 if (dump_file && (dump_flags & TDF_DETAILS))
5037 fprintf (dump_file, "Optimizing copysign: ");
5038 print_generic_expr (dump_file, cst, 0);
5039 fprintf (dump_file, " * COPYSIGN (");
5040 print_generic_expr (dump_file, arg0, 0);
5041 fprintf (dump_file, ", ");
5042 print_generic_expr (dump_file, arg1, 0);
5043 fprintf (dump_file, ") into %sCOPYSIGN (",
5044 cst1_neg ? "-" : "");
5045 print_generic_expr (dump_file, mul, 0);
5046 fprintf (dump_file, ", ");
5047 print_generic_expr (dump_file, arg1, 0);
5048 fprintf (dump_file, "\n");
5050 return;
5052 break;
5053 default:
5054 break;
5061 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5063 static void
5064 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5066 tree rhs1;
5068 if (dump_file && (dump_flags & TDF_DETAILS))
5070 fprintf (dump_file, "Transforming ");
5071 print_gimple_stmt (dump_file, stmt, 0, 0);
5074 rhs1 = gimple_assign_rhs1 (stmt);
5075 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5076 update_stmt (stmt);
5077 remove_visited_stmt_chain (rhs1);
5079 if (dump_file && (dump_flags & TDF_DETAILS))
5081 fprintf (dump_file, " into ");
5082 print_gimple_stmt (dump_file, stmt, 0, 0);
5086 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5088 static void
5089 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5090 tree rhs1, tree rhs2)
5092 if (dump_file && (dump_flags & TDF_DETAILS))
5094 fprintf (dump_file, "Transforming ");
5095 print_gimple_stmt (dump_file, stmt, 0, 0);
5098 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5099 update_stmt (gsi_stmt (*gsi));
5100 remove_visited_stmt_chain (rhs1);
5102 if (dump_file && (dump_flags & TDF_DETAILS))
5104 fprintf (dump_file, " into ");
5105 print_gimple_stmt (dump_file, stmt, 0, 0);
5109 /* Reassociate expressions in basic block BB and its post-dominator as
5110 children.
5112 Bubble up return status from maybe_optimize_range_tests. */
5114 static bool
5115 reassociate_bb (basic_block bb)
5117 gimple_stmt_iterator gsi;
5118 basic_block son;
5119 gimple *stmt = last_stmt (bb);
5120 bool cfg_cleanup_needed = false;
5122 if (stmt && !gimple_visited_p (stmt))
5123 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5125 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5127 stmt = gsi_stmt (gsi);
5129 if (is_gimple_assign (stmt)
5130 && !stmt_could_throw_p (stmt))
5132 tree lhs, rhs1, rhs2;
5133 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5135 /* If this is not a gimple binary expression, there is
5136 nothing for us to do with it. */
5137 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5138 continue;
5140 /* If this was part of an already processed statement,
5141 we don't need to touch it again. */
5142 if (gimple_visited_p (stmt))
5144 /* This statement might have become dead because of previous
5145 reassociations. */
5146 if (has_zero_uses (gimple_get_lhs (stmt)))
5148 reassoc_remove_stmt (&gsi);
5149 release_defs (stmt);
5150 /* We might end up removing the last stmt above which
5151 places the iterator to the end of the sequence.
5152 Reset it to the last stmt in this case which might
5153 be the end of the sequence as well if we removed
5154 the last statement of the sequence. In which case
5155 we need to bail out. */
5156 if (gsi_end_p (gsi))
5158 gsi = gsi_last_bb (bb);
5159 if (gsi_end_p (gsi))
5160 break;
5163 continue;
5166 lhs = gimple_assign_lhs (stmt);
5167 rhs1 = gimple_assign_rhs1 (stmt);
5168 rhs2 = gimple_assign_rhs2 (stmt);
5170 /* For non-bit or min/max operations we can't associate
5171 all types. Verify that here. */
5172 if (rhs_code != BIT_IOR_EXPR
5173 && rhs_code != BIT_AND_EXPR
5174 && rhs_code != BIT_XOR_EXPR
5175 && rhs_code != MIN_EXPR
5176 && rhs_code != MAX_EXPR
5177 && (!can_reassociate_p (lhs)
5178 || !can_reassociate_p (rhs1)
5179 || !can_reassociate_p (rhs2)))
5180 continue;
5182 if (associative_tree_code (rhs_code))
5184 auto_vec<operand_entry *> ops;
5185 tree powi_result = NULL_TREE;
5186 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5188 /* There may be no immediate uses left by the time we
5189 get here because we may have eliminated them all. */
5190 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5191 continue;
5193 gimple_set_visited (stmt, true);
5194 linearize_expr_tree (&ops, stmt, true, true);
5195 ops.qsort (sort_by_operand_rank);
5196 optimize_ops_list (rhs_code, &ops);
5197 if (undistribute_ops_list (rhs_code, &ops,
5198 loop_containing_stmt (stmt)))
5200 ops.qsort (sort_by_operand_rank);
5201 optimize_ops_list (rhs_code, &ops);
5204 if (rhs_code == PLUS_EXPR
5205 && transform_add_to_multiply (stmt, &ops))
5206 ops.qsort (sort_by_operand_rank);
5208 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5210 if (is_vector)
5211 optimize_vec_cond_expr (rhs_code, &ops);
5212 else
5213 optimize_range_tests (rhs_code, &ops);
5216 if (rhs_code == MULT_EXPR && !is_vector)
5218 attempt_builtin_copysign (&ops);
5220 if (reassoc_insert_powi_p
5221 && flag_unsafe_math_optimizations)
5222 powi_result = attempt_builtin_powi (stmt, &ops);
5225 operand_entry *last;
5226 bool negate_result = false;
5227 if (ops.length () > 1
5228 && rhs_code == MULT_EXPR)
5230 last = ops.last ();
5231 if (((TREE_CODE (last->op) == INTEGER_CST
5232 && integer_minus_onep (last->op))
5233 || real_minus_onep (last->op))
5234 && !HONOR_SNANS (TREE_TYPE (lhs))
5235 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5236 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5238 ops.pop ();
5239 negate_result = true;
5243 /* If the operand vector is now empty, all operands were
5244 consumed by the __builtin_powi optimization. */
5245 if (ops.length () == 0)
5246 transform_stmt_to_copy (&gsi, stmt, powi_result);
5247 else if (ops.length () == 1)
5249 tree last_op = ops.last ()->op;
5251 if (powi_result)
5252 transform_stmt_to_multiply (&gsi, stmt, last_op,
5253 powi_result);
5254 else
5255 transform_stmt_to_copy (&gsi, stmt, last_op);
5257 else
5259 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5260 int ops_num = ops.length ();
5261 int width = get_reassociation_width (ops_num, rhs_code, mode);
5262 tree new_lhs = lhs;
5264 if (dump_file && (dump_flags & TDF_DETAILS))
5265 fprintf (dump_file,
5266 "Width = %d was chosen for reassociation\n", width);
5268 if (width > 1
5269 && ops.length () > 3)
5270 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5271 width, ops);
5272 else
5274 /* When there are three operands left, we want
5275 to make sure the ones that get the double
5276 binary op are chosen wisely. */
5277 int len = ops.length ();
5278 if (len >= 3)
5279 swap_ops_for_binary_stmt (ops, len - 3, stmt);
5281 new_lhs = rewrite_expr_tree (stmt, 0, ops,
5282 powi_result != NULL);
5285 /* If we combined some repeated factors into a
5286 __builtin_powi call, multiply that result by the
5287 reassociated operands. */
5288 if (powi_result)
5290 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5291 tree type = TREE_TYPE (lhs);
5292 tree target_ssa = make_temp_ssa_name (type, NULL,
5293 "reassocpow");
5294 gimple_set_lhs (lhs_stmt, target_ssa);
5295 update_stmt (lhs_stmt);
5296 if (lhs != new_lhs)
5297 target_ssa = new_lhs;
5298 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5299 powi_result, target_ssa);
5300 gimple_set_location (mul_stmt, gimple_location (stmt));
5301 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5302 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5306 if (negate_result)
5308 stmt = SSA_NAME_DEF_STMT (lhs);
5309 tree tmp = make_ssa_name (TREE_TYPE (lhs));
5310 gimple_set_lhs (stmt, tmp);
5311 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
5312 tmp);
5313 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5314 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
5315 update_stmt (stmt);
5320 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5321 son;
5322 son = next_dom_son (CDI_POST_DOMINATORS, son))
5323 cfg_cleanup_needed |= reassociate_bb (son);
5325 return cfg_cleanup_needed;
5328 /* Add jumps around shifts for range tests turned into bit tests.
5329 For each SSA_NAME VAR we have code like:
5330 VAR = ...; // final stmt of range comparison
5331 // bit test here...;
5332 OTHERVAR = ...; // final stmt of the bit test sequence
5333 RES = VAR | OTHERVAR;
5334 Turn the above into:
5335 VAR = ...;
5336 if (VAR != 0)
5337 goto <l3>;
5338 else
5339 goto <l2>;
5340 <l2>:
5341 // bit test here...;
5342 OTHERVAR = ...;
5343 <l3>:
5344 # RES = PHI<1(l1), OTHERVAR(l2)>; */
5346 static void
5347 branch_fixup (void)
5349 tree var;
5350 unsigned int i;
5352 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
5354 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
5355 gimple *use_stmt;
5356 use_operand_p use;
5357 bool ok = single_imm_use (var, &use, &use_stmt);
5358 gcc_assert (ok
5359 && is_gimple_assign (use_stmt)
5360 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5361 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5363 basic_block cond_bb = gimple_bb (def_stmt);
5364 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5365 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5367 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5368 gimple *g = gimple_build_cond (NE_EXPR, var,
5369 build_zero_cst (TREE_TYPE (var)),
5370 NULL_TREE, NULL_TREE);
5371 location_t loc = gimple_location (use_stmt);
5372 gimple_set_location (g, loc);
5373 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5375 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5376 etrue->probability = REG_BR_PROB_BASE / 2;
5377 etrue->count = cond_bb->count / 2;
5378 edge efalse = find_edge (cond_bb, then_bb);
5379 efalse->flags = EDGE_FALSE_VALUE;
5380 efalse->probability -= etrue->probability;
5381 efalse->count -= etrue->count;
5382 then_bb->count -= etrue->count;
5384 tree othervar = NULL_TREE;
5385 if (gimple_assign_rhs1 (use_stmt) == var)
5386 othervar = gimple_assign_rhs2 (use_stmt);
5387 else if (gimple_assign_rhs2 (use_stmt) == var)
5388 othervar = gimple_assign_rhs1 (use_stmt);
5389 else
5390 gcc_unreachable ();
5391 tree lhs = gimple_assign_lhs (use_stmt);
5392 gphi *phi = create_phi_node (lhs, merge_bb);
5393 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5394 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5395 gsi = gsi_for_stmt (use_stmt);
5396 gsi_remove (&gsi, true);
5398 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5399 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5401 reassoc_branch_fixups.release ();
5404 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5405 void debug_ops_vector (vec<operand_entry *> ops);
5407 /* Dump the operand entry vector OPS to FILE. */
5409 void
5410 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5412 operand_entry *oe;
5413 unsigned int i;
5415 FOR_EACH_VEC_ELT (ops, i, oe)
5417 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5418 print_generic_expr (file, oe->op, 0);
5419 fprintf (file, "\n");
5423 /* Dump the operand entry vector OPS to STDERR. */
5425 DEBUG_FUNCTION void
5426 debug_ops_vector (vec<operand_entry *> ops)
5428 dump_ops_vector (stderr, ops);
5431 /* Bubble up return status from reassociate_bb. */
5433 static bool
5434 do_reassoc (void)
5436 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5437 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5440 /* Initialize the reassociation pass. */
5442 static void
5443 init_reassoc (void)
5445 int i;
5446 long rank = 2;
5447 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5449 /* Find the loops, so that we can prevent moving calculations in
5450 them. */
5451 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5453 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5455 next_operand_entry_id = 0;
5457 /* Reverse RPO (Reverse Post Order) will give us something where
5458 deeper loops come later. */
5459 pre_and_rev_post_order_compute (NULL, bbs, false);
5460 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5461 operand_rank = new hash_map<tree, long>;
5463 /* Give each default definition a distinct rank. This includes
5464 parameters and the static chain. Walk backwards over all
5465 SSA names so that we get proper rank ordering according
5466 to tree_swap_operands_p. */
5467 for (i = num_ssa_names - 1; i > 0; --i)
5469 tree name = ssa_name (i);
5470 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5471 insert_operand_rank (name, ++rank);
5474 /* Set up rank for each BB */
5475 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5476 bb_rank[bbs[i]] = ++rank << 16;
5478 free (bbs);
5479 calculate_dominance_info (CDI_POST_DOMINATORS);
5480 plus_negates = vNULL;
5483 /* Cleanup after the reassociation pass, and print stats if
5484 requested. */
5486 static void
5487 fini_reassoc (void)
5489 statistics_counter_event (cfun, "Linearized",
5490 reassociate_stats.linearized);
5491 statistics_counter_event (cfun, "Constants eliminated",
5492 reassociate_stats.constants_eliminated);
5493 statistics_counter_event (cfun, "Ops eliminated",
5494 reassociate_stats.ops_eliminated);
5495 statistics_counter_event (cfun, "Statements rewritten",
5496 reassociate_stats.rewritten);
5497 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5498 reassociate_stats.pows_encountered);
5499 statistics_counter_event (cfun, "Built-in powi calls created",
5500 reassociate_stats.pows_created);
5502 delete operand_rank;
5503 operand_entry_pool.release ();
5504 free (bb_rank);
5505 plus_negates.release ();
5506 free_dominance_info (CDI_POST_DOMINATORS);
5507 loop_optimizer_finalize ();
5510 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
5511 insertion of __builtin_powi calls.
5513 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5514 optimization of a gimple conditional. Otherwise returns zero. */
5516 static unsigned int
5517 execute_reassoc (bool insert_powi_p)
5519 reassoc_insert_powi_p = insert_powi_p;
5521 init_reassoc ();
5523 bool cfg_cleanup_needed;
5524 cfg_cleanup_needed = do_reassoc ();
5525 repropagate_negates ();
5526 branch_fixup ();
5528 fini_reassoc ();
5529 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
5532 namespace {
5534 const pass_data pass_data_reassoc =
5536 GIMPLE_PASS, /* type */
5537 "reassoc", /* name */
5538 OPTGROUP_NONE, /* optinfo_flags */
5539 TV_TREE_REASSOC, /* tv_id */
5540 ( PROP_cfg | PROP_ssa ), /* properties_required */
5541 0, /* properties_provided */
5542 0, /* properties_destroyed */
5543 0, /* todo_flags_start */
5544 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5547 class pass_reassoc : public gimple_opt_pass
5549 public:
5550 pass_reassoc (gcc::context *ctxt)
5551 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
5554 /* opt_pass methods: */
5555 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5556 void set_pass_param (unsigned int n, bool param)
5558 gcc_assert (n == 0);
5559 insert_powi_p = param;
5561 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5562 virtual unsigned int execute (function *)
5563 { return execute_reassoc (insert_powi_p); }
5565 private:
5566 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
5567 point 3a in the pass header comment. */
5568 bool insert_powi_p;
5569 }; // class pass_reassoc
5571 } // anon namespace
5573 gimple_opt_pass *
5574 make_pass_reassoc (gcc::context *ctxt)
5576 return new pass_reassoc (ctxt);