Add qdf24xx base tuning support.
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob9264e0b60344d6d1f565fd42120c34bcce5baf9a
1 /* Reassociation for trees.
2 Copyright (C) 2005-2016 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "tm_p.h"
33 #include "ssa.h"
34 #include "optabs-tree.h"
35 #include "gimple-pretty-print.h"
36 #include "diagnostic-core.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "cfganal.h"
40 #include "gimple-fold.h"
41 #include "tree-eh.h"
42 #include "gimple-iterator.h"
43 #include "gimplify-me.h"
44 #include "tree-cfg.h"
45 #include "tree-ssa-loop.h"
46 #include "flags.h"
47 #include "tree-ssa.h"
48 #include "langhooks.h"
49 #include "cfgloop.h"
50 #include "params.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
59 It consists of five steps:
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_*
69 3. Optimization of the operand lists, eliminating things like a +
70 -a, a & a, etc.
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
79 5. Repropagate negates, as nothing else will clean it up ATM.
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
100 a (1), b (1), c (1), d (2), e (2)
103 We start with our merge worklist empty, and the ops list with all of
104 those on it.
106 You want to first merge all leaves of the same rank, as much as
107 possible.
109 So first build a binary op of
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
121 Then build a binary op of d + e
122 mergetmp2 = d + e
124 and put mergetmp2 on the merge worklist.
126 so merge worklist = {mergetmp, c, mergetmp2}
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
131 So we have
133 build binary op
134 mergetmp3 = mergetmp + c
136 worklist = {mergetmp2, mergetmp3}
138 mergetmp4 = mergetmp2 + mergetmp3
140 worklist = {mergetmp4}
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
151 So why don't we do this?
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
159 mergetmp = op1 + op2
160 newstmt = mergetmp + op3
162 instead of
163 mergetmp = op2 + op3
164 newstmt = mergetmp + op1
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
168 can do.
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
176 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
177 point 3a in the pass header comment. */
178 static bool reassoc_insert_powi_p;
180 /* Statistics */
181 static struct
183 int linearized;
184 int constants_eliminated;
185 int ops_eliminated;
186 int rewritten;
187 int pows_encountered;
188 int pows_created;
189 } reassociate_stats;
191 /* Operator, rank pair. */
192 struct operand_entry
194 unsigned int rank;
195 int id;
196 tree op;
197 unsigned int count;
198 gimple *stmt_to_insert;
201 static object_allocator<operand_entry> operand_entry_pool
202 ("operand entry pool");
204 /* This is used to assign a unique ID to each struct operand_entry
205 so that qsort results are identical on different hosts. */
206 static int next_operand_entry_id;
208 /* Starting rank number for a given basic block, so that we can rank
209 operations using unmovable instructions in that BB based on the bb
210 depth. */
211 static long *bb_rank;
213 /* Operand->rank hashtable. */
214 static hash_map<tree, long> *operand_rank;
216 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
217 all basic blocks the CFG should be adjusted - basic blocks
218 split right after that SSA_NAME's definition statement and before
219 the only use, which must be a bit ior. */
220 static vec<tree> reassoc_branch_fixups;
222 /* Forward decls. */
223 static long get_rank (tree);
224 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
226 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
227 possibly added by gsi_remove. */
229 bool
230 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
232 gimple *stmt = gsi_stmt (*gsi);
234 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
235 return gsi_remove (gsi, true);
237 gimple_stmt_iterator prev = *gsi;
238 gsi_prev (&prev);
239 unsigned uid = gimple_uid (stmt);
240 basic_block bb = gimple_bb (stmt);
241 bool ret = gsi_remove (gsi, true);
242 if (!gsi_end_p (prev))
243 gsi_next (&prev);
244 else
245 prev = gsi_start_bb (bb);
246 gimple *end_stmt = gsi_stmt (*gsi);
247 while ((stmt = gsi_stmt (prev)) != end_stmt)
249 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
250 gimple_set_uid (stmt, uid);
251 gsi_next (&prev);
253 return ret;
256 /* Bias amount for loop-carried phis. We want this to be larger than
257 the depth of any reassociation tree we can see, but not larger than
258 the rank difference between two blocks. */
259 #define PHI_LOOP_BIAS (1 << 15)
261 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
262 an innermost loop, and the phi has only a single use which is inside
263 the loop, then the rank is the block rank of the loop latch plus an
264 extra bias for the loop-carried dependence. This causes expressions
265 calculated into an accumulator variable to be independent for each
266 iteration of the loop. If STMT is some other phi, the rank is the
267 block rank of its containing block. */
268 static long
269 phi_rank (gimple *stmt)
271 basic_block bb = gimple_bb (stmt);
272 struct loop *father = bb->loop_father;
273 tree res;
274 unsigned i;
275 use_operand_p use;
276 gimple *use_stmt;
278 /* We only care about real loops (those with a latch). */
279 if (!father->latch)
280 return bb_rank[bb->index];
282 /* Interesting phis must be in headers of innermost loops. */
283 if (bb != father->header
284 || father->inner)
285 return bb_rank[bb->index];
287 /* Ignore virtual SSA_NAMEs. */
288 res = gimple_phi_result (stmt);
289 if (virtual_operand_p (res))
290 return bb_rank[bb->index];
292 /* The phi definition must have a single use, and that use must be
293 within the loop. Otherwise this isn't an accumulator pattern. */
294 if (!single_imm_use (res, &use, &use_stmt)
295 || gimple_bb (use_stmt)->loop_father != father)
296 return bb_rank[bb->index];
298 /* Look for phi arguments from within the loop. If found, bias this phi. */
299 for (i = 0; i < gimple_phi_num_args (stmt); i++)
301 tree arg = gimple_phi_arg_def (stmt, i);
302 if (TREE_CODE (arg) == SSA_NAME
303 && !SSA_NAME_IS_DEFAULT_DEF (arg))
305 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
306 if (gimple_bb (def_stmt)->loop_father == father)
307 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
311 /* Must be an uninteresting phi. */
312 return bb_rank[bb->index];
315 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
316 loop-carried dependence of an innermost loop, return TRUE; else
317 return FALSE. */
318 static bool
319 loop_carried_phi (tree exp)
321 gimple *phi_stmt;
322 long block_rank;
324 if (TREE_CODE (exp) != SSA_NAME
325 || SSA_NAME_IS_DEFAULT_DEF (exp))
326 return false;
328 phi_stmt = SSA_NAME_DEF_STMT (exp);
330 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
331 return false;
333 /* Non-loop-carried phis have block rank. Loop-carried phis have
334 an additional bias added in. If this phi doesn't have block rank,
335 it's biased and should not be propagated. */
336 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
338 if (phi_rank (phi_stmt) != block_rank)
339 return true;
341 return false;
344 /* Return the maximum of RANK and the rank that should be propagated
345 from expression OP. For most operands, this is just the rank of OP.
346 For loop-carried phis, the value is zero to avoid undoing the bias
347 in favor of the phi. */
348 static long
349 propagate_rank (long rank, tree op)
351 long op_rank;
353 if (loop_carried_phi (op))
354 return rank;
356 op_rank = get_rank (op);
358 return MAX (rank, op_rank);
361 /* Look up the operand rank structure for expression E. */
363 static inline long
364 find_operand_rank (tree e)
366 long *slot = operand_rank->get (e);
367 return slot ? *slot : -1;
370 /* Insert {E,RANK} into the operand rank hashtable. */
372 static inline void
373 insert_operand_rank (tree e, long rank)
375 gcc_assert (rank > 0);
376 gcc_assert (!operand_rank->put (e, rank));
379 /* Given an expression E, return the rank of the expression. */
381 static long
382 get_rank (tree e)
384 /* SSA_NAME's have the rank of the expression they are the result
386 For globals and uninitialized values, the rank is 0.
387 For function arguments, use the pre-setup rank.
388 For PHI nodes, stores, asm statements, etc, we use the rank of
389 the BB.
390 For simple operations, the rank is the maximum rank of any of
391 its operands, or the bb_rank, whichever is less.
392 I make no claims that this is optimal, however, it gives good
393 results. */
395 /* We make an exception to the normal ranking system to break
396 dependences of accumulator variables in loops. Suppose we
397 have a simple one-block loop containing:
399 x_1 = phi(x_0, x_2)
400 b = a + x_1
401 c = b + d
402 x_2 = c + e
404 As shown, each iteration of the calculation into x is fully
405 dependent upon the iteration before it. We would prefer to
406 see this in the form:
408 x_1 = phi(x_0, x_2)
409 b = a + d
410 c = b + e
411 x_2 = c + x_1
413 If the loop is unrolled, the calculations of b and c from
414 different iterations can be interleaved.
416 To obtain this result during reassociation, we bias the rank
417 of the phi definition x_1 upward, when it is recognized as an
418 accumulator pattern. The artificial rank causes it to be
419 added last, providing the desired independence. */
421 if (TREE_CODE (e) == SSA_NAME)
423 ssa_op_iter iter;
424 gimple *stmt;
425 long rank;
426 tree op;
428 if (SSA_NAME_IS_DEFAULT_DEF (e))
429 return find_operand_rank (e);
431 stmt = SSA_NAME_DEF_STMT (e);
432 if (gimple_code (stmt) == GIMPLE_PHI)
433 return phi_rank (stmt);
435 if (!is_gimple_assign (stmt))
436 return bb_rank[gimple_bb (stmt)->index];
438 /* If we already have a rank for this expression, use that. */
439 rank = find_operand_rank (e);
440 if (rank != -1)
441 return rank;
443 /* Otherwise, find the maximum rank for the operands. As an
444 exception, remove the bias from loop-carried phis when propagating
445 the rank so that dependent operations are not also biased. */
446 /* Simply walk over all SSA uses - this takes advatage of the
447 fact that non-SSA operands are is_gimple_min_invariant and
448 thus have rank 0. */
449 rank = 0;
450 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
451 rank = propagate_rank (rank, op);
453 if (dump_file && (dump_flags & TDF_DETAILS))
455 fprintf (dump_file, "Rank for ");
456 print_generic_expr (dump_file, e, 0);
457 fprintf (dump_file, " is %ld\n", (rank + 1));
460 /* Note the rank in the hashtable so we don't recompute it. */
461 insert_operand_rank (e, (rank + 1));
462 return (rank + 1);
465 /* Constants, globals, etc., are rank 0 */
466 return 0;
470 /* We want integer ones to end up last no matter what, since they are
471 the ones we can do the most with. */
472 #define INTEGER_CONST_TYPE 1 << 3
473 #define FLOAT_CONST_TYPE 1 << 2
474 #define OTHER_CONST_TYPE 1 << 1
476 /* Classify an invariant tree into integer, float, or other, so that
477 we can sort them to be near other constants of the same type. */
478 static inline int
479 constant_type (tree t)
481 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
482 return INTEGER_CONST_TYPE;
483 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
484 return FLOAT_CONST_TYPE;
485 else
486 return OTHER_CONST_TYPE;
489 /* qsort comparison function to sort operand entries PA and PB by rank
490 so that the sorted array is ordered by rank in decreasing order. */
491 static int
492 sort_by_operand_rank (const void *pa, const void *pb)
494 const operand_entry *oea = *(const operand_entry *const *)pa;
495 const operand_entry *oeb = *(const operand_entry *const *)pb;
497 /* It's nicer for optimize_expression if constants that are likely
498 to fold when added/multiplied//whatever are put next to each
499 other. Since all constants have rank 0, order them by type. */
500 if (oeb->rank == 0 && oea->rank == 0)
502 if (constant_type (oeb->op) != constant_type (oea->op))
503 return constant_type (oeb->op) - constant_type (oea->op);
504 else
505 /* To make sorting result stable, we use unique IDs to determine
506 order. */
507 return oeb->id - oea->id;
510 /* Lastly, make sure the versions that are the same go next to each
511 other. */
512 if ((oeb->rank - oea->rank == 0)
513 && TREE_CODE (oea->op) == SSA_NAME
514 && TREE_CODE (oeb->op) == SSA_NAME)
516 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
517 versions of removed SSA_NAMEs, so if possible, prefer to sort
518 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
519 See PR60418. */
520 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
521 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
522 && !oea->stmt_to_insert
523 && !oeb->stmt_to_insert
524 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
526 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
527 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
528 basic_block bba = gimple_bb (stmta);
529 basic_block bbb = gimple_bb (stmtb);
530 if (bbb != bba)
532 if (bb_rank[bbb->index] != bb_rank[bba->index])
533 return bb_rank[bbb->index] - bb_rank[bba->index];
535 else
537 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
538 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
539 if (da != db)
540 return da ? 1 : -1;
544 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
545 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
546 else
547 return oeb->id - oea->id;
550 if (oeb->rank != oea->rank)
551 return oeb->rank - oea->rank;
552 else
553 return oeb->id - oea->id;
556 /* Add an operand entry to *OPS for the tree operand OP. */
558 static void
559 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
561 operand_entry *oe = operand_entry_pool.allocate ();
563 oe->op = op;
564 oe->rank = get_rank (op);
565 oe->id = next_operand_entry_id++;
566 oe->count = 1;
567 oe->stmt_to_insert = stmt_to_insert;
568 ops->safe_push (oe);
571 /* Add an operand entry to *OPS for the tree operand OP with repeat
572 count REPEAT. */
574 static void
575 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
576 HOST_WIDE_INT repeat)
578 operand_entry *oe = operand_entry_pool.allocate ();
580 oe->op = op;
581 oe->rank = get_rank (op);
582 oe->id = next_operand_entry_id++;
583 oe->count = repeat;
584 oe->stmt_to_insert = NULL;
585 ops->safe_push (oe);
587 reassociate_stats.pows_encountered++;
590 /* Return true if STMT is reassociable operation containing a binary
591 operation with tree code CODE, and is inside LOOP. */
593 static bool
594 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
596 basic_block bb = gimple_bb (stmt);
598 if (gimple_bb (stmt) == NULL)
599 return false;
601 if (!flow_bb_inside_loop_p (loop, bb))
602 return false;
604 if (is_gimple_assign (stmt)
605 && gimple_assign_rhs_code (stmt) == code
606 && has_single_use (gimple_assign_lhs (stmt)))
607 return true;
609 return false;
613 /* Return true if STMT is a nop-conversion. */
615 static bool
616 gimple_nop_conversion_p (gimple *stmt)
618 if (gassign *ass = dyn_cast <gassign *> (stmt))
620 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
621 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
622 TREE_TYPE (gimple_assign_rhs1 (ass))))
623 return true;
625 return false;
628 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
629 operand of the negate operation. Otherwise, return NULL. */
631 static tree
632 get_unary_op (tree name, enum tree_code opcode)
634 gimple *stmt = SSA_NAME_DEF_STMT (name);
636 /* Look through nop conversions (sign changes). */
637 if (gimple_nop_conversion_p (stmt)
638 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
639 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
641 if (!is_gimple_assign (stmt))
642 return NULL_TREE;
644 if (gimple_assign_rhs_code (stmt) == opcode)
645 return gimple_assign_rhs1 (stmt);
646 return NULL_TREE;
649 /* Return true if OP1 and OP2 have the same value if casted to either type. */
651 static bool
652 ops_equal_values_p (tree op1, tree op2)
654 if (op1 == op2)
655 return true;
657 tree orig_op1 = op1;
658 if (TREE_CODE (op1) == SSA_NAME)
660 gimple *stmt = SSA_NAME_DEF_STMT (op1);
661 if (gimple_nop_conversion_p (stmt))
663 op1 = gimple_assign_rhs1 (stmt);
664 if (op1 == op2)
665 return true;
669 if (TREE_CODE (op2) == SSA_NAME)
671 gimple *stmt = SSA_NAME_DEF_STMT (op2);
672 if (gimple_nop_conversion_p (stmt))
674 op2 = gimple_assign_rhs1 (stmt);
675 if (op1 == op2
676 || orig_op1 == op2)
677 return true;
681 return false;
685 /* If CURR and LAST are a pair of ops that OPCODE allows us to
686 eliminate through equivalences, do so, remove them from OPS, and
687 return true. Otherwise, return false. */
689 static bool
690 eliminate_duplicate_pair (enum tree_code opcode,
691 vec<operand_entry *> *ops,
692 bool *all_done,
693 unsigned int i,
694 operand_entry *curr,
695 operand_entry *last)
698 /* If we have two of the same op, and the opcode is & |, min, or max,
699 we can eliminate one of them.
700 If we have two of the same op, and the opcode is ^, we can
701 eliminate both of them. */
703 if (last && last->op == curr->op)
705 switch (opcode)
707 case MAX_EXPR:
708 case MIN_EXPR:
709 case BIT_IOR_EXPR:
710 case BIT_AND_EXPR:
711 if (dump_file && (dump_flags & TDF_DETAILS))
713 fprintf (dump_file, "Equivalence: ");
714 print_generic_expr (dump_file, curr->op, 0);
715 fprintf (dump_file, " [&|minmax] ");
716 print_generic_expr (dump_file, last->op, 0);
717 fprintf (dump_file, " -> ");
718 print_generic_stmt (dump_file, last->op, 0);
721 ops->ordered_remove (i);
722 reassociate_stats.ops_eliminated ++;
724 return true;
726 case BIT_XOR_EXPR:
727 if (dump_file && (dump_flags & TDF_DETAILS))
729 fprintf (dump_file, "Equivalence: ");
730 print_generic_expr (dump_file, curr->op, 0);
731 fprintf (dump_file, " ^ ");
732 print_generic_expr (dump_file, last->op, 0);
733 fprintf (dump_file, " -> nothing\n");
736 reassociate_stats.ops_eliminated += 2;
738 if (ops->length () == 2)
740 ops->truncate (0);
741 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
742 *all_done = true;
744 else
746 ops->ordered_remove (i-1);
747 ops->ordered_remove (i-1);
750 return true;
752 default:
753 break;
756 return false;
759 static vec<tree> plus_negates;
761 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
762 expression, look in OPS for a corresponding positive operation to cancel
763 it out. If we find one, remove the other from OPS, replace
764 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
765 return false. */
767 static bool
768 eliminate_plus_minus_pair (enum tree_code opcode,
769 vec<operand_entry *> *ops,
770 unsigned int currindex,
771 operand_entry *curr)
773 tree negateop;
774 tree notop;
775 unsigned int i;
776 operand_entry *oe;
778 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
779 return false;
781 negateop = get_unary_op (curr->op, NEGATE_EXPR);
782 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
783 if (negateop == NULL_TREE && notop == NULL_TREE)
784 return false;
786 /* Any non-negated version will have a rank that is one less than
787 the current rank. So once we hit those ranks, if we don't find
788 one, we can stop. */
790 for (i = currindex + 1;
791 ops->iterate (i, &oe)
792 && oe->rank >= curr->rank - 1 ;
793 i++)
795 if (negateop
796 && ops_equal_values_p (oe->op, negateop))
798 if (dump_file && (dump_flags & TDF_DETAILS))
800 fprintf (dump_file, "Equivalence: ");
801 print_generic_expr (dump_file, negateop, 0);
802 fprintf (dump_file, " + -");
803 print_generic_expr (dump_file, oe->op, 0);
804 fprintf (dump_file, " -> 0\n");
807 ops->ordered_remove (i);
808 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
809 ops->ordered_remove (currindex);
810 reassociate_stats.ops_eliminated ++;
812 return true;
814 else if (notop
815 && ops_equal_values_p (oe->op, notop))
817 tree op_type = TREE_TYPE (oe->op);
819 if (dump_file && (dump_flags & TDF_DETAILS))
821 fprintf (dump_file, "Equivalence: ");
822 print_generic_expr (dump_file, notop, 0);
823 fprintf (dump_file, " + ~");
824 print_generic_expr (dump_file, oe->op, 0);
825 fprintf (dump_file, " -> -1\n");
828 ops->ordered_remove (i);
829 add_to_ops_vec (ops, build_all_ones_cst (op_type));
830 ops->ordered_remove (currindex);
831 reassociate_stats.ops_eliminated ++;
833 return true;
837 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
838 save it for later inspection in repropagate_negates(). */
839 if (negateop != NULL_TREE
840 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
841 plus_negates.safe_push (curr->op);
843 return false;
846 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
847 bitwise not expression, look in OPS for a corresponding operand to
848 cancel it out. If we find one, remove the other from OPS, replace
849 OPS[CURRINDEX] with 0, and return true. Otherwise, return
850 false. */
852 static bool
853 eliminate_not_pairs (enum tree_code opcode,
854 vec<operand_entry *> *ops,
855 unsigned int currindex,
856 operand_entry *curr)
858 tree notop;
859 unsigned int i;
860 operand_entry *oe;
862 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
863 || TREE_CODE (curr->op) != SSA_NAME)
864 return false;
866 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
867 if (notop == NULL_TREE)
868 return false;
870 /* Any non-not version will have a rank that is one less than
871 the current rank. So once we hit those ranks, if we don't find
872 one, we can stop. */
874 for (i = currindex + 1;
875 ops->iterate (i, &oe)
876 && oe->rank >= curr->rank - 1;
877 i++)
879 if (oe->op == notop)
881 if (dump_file && (dump_flags & TDF_DETAILS))
883 fprintf (dump_file, "Equivalence: ");
884 print_generic_expr (dump_file, notop, 0);
885 if (opcode == BIT_AND_EXPR)
886 fprintf (dump_file, " & ~");
887 else if (opcode == BIT_IOR_EXPR)
888 fprintf (dump_file, " | ~");
889 print_generic_expr (dump_file, oe->op, 0);
890 if (opcode == BIT_AND_EXPR)
891 fprintf (dump_file, " -> 0\n");
892 else if (opcode == BIT_IOR_EXPR)
893 fprintf (dump_file, " -> -1\n");
896 if (opcode == BIT_AND_EXPR)
897 oe->op = build_zero_cst (TREE_TYPE (oe->op));
898 else if (opcode == BIT_IOR_EXPR)
899 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
901 reassociate_stats.ops_eliminated += ops->length () - 1;
902 ops->truncate (0);
903 ops->quick_push (oe);
904 return true;
908 return false;
911 /* Use constant value that may be present in OPS to try to eliminate
912 operands. Note that this function is only really used when we've
913 eliminated ops for other reasons, or merged constants. Across
914 single statements, fold already does all of this, plus more. There
915 is little point in duplicating logic, so I've only included the
916 identities that I could ever construct testcases to trigger. */
918 static void
919 eliminate_using_constants (enum tree_code opcode,
920 vec<operand_entry *> *ops)
922 operand_entry *oelast = ops->last ();
923 tree type = TREE_TYPE (oelast->op);
925 if (oelast->rank == 0
926 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
928 switch (opcode)
930 case BIT_AND_EXPR:
931 if (integer_zerop (oelast->op))
933 if (ops->length () != 1)
935 if (dump_file && (dump_flags & TDF_DETAILS))
936 fprintf (dump_file, "Found & 0, removing all other ops\n");
938 reassociate_stats.ops_eliminated += ops->length () - 1;
940 ops->truncate (0);
941 ops->quick_push (oelast);
942 return;
945 else if (integer_all_onesp (oelast->op))
947 if (ops->length () != 1)
949 if (dump_file && (dump_flags & TDF_DETAILS))
950 fprintf (dump_file, "Found & -1, removing\n");
951 ops->pop ();
952 reassociate_stats.ops_eliminated++;
955 break;
956 case BIT_IOR_EXPR:
957 if (integer_all_onesp (oelast->op))
959 if (ops->length () != 1)
961 if (dump_file && (dump_flags & TDF_DETAILS))
962 fprintf (dump_file, "Found | -1, removing all other ops\n");
964 reassociate_stats.ops_eliminated += ops->length () - 1;
966 ops->truncate (0);
967 ops->quick_push (oelast);
968 return;
971 else if (integer_zerop (oelast->op))
973 if (ops->length () != 1)
975 if (dump_file && (dump_flags & TDF_DETAILS))
976 fprintf (dump_file, "Found | 0, removing\n");
977 ops->pop ();
978 reassociate_stats.ops_eliminated++;
981 break;
982 case MULT_EXPR:
983 if (integer_zerop (oelast->op)
984 || (FLOAT_TYPE_P (type)
985 && !HONOR_NANS (type)
986 && !HONOR_SIGNED_ZEROS (type)
987 && real_zerop (oelast->op)))
989 if (ops->length () != 1)
991 if (dump_file && (dump_flags & TDF_DETAILS))
992 fprintf (dump_file, "Found * 0, removing all other ops\n");
994 reassociate_stats.ops_eliminated += ops->length () - 1;
995 ops->truncate (1);
996 ops->quick_push (oelast);
997 return;
1000 else if (integer_onep (oelast->op)
1001 || (FLOAT_TYPE_P (type)
1002 && !HONOR_SNANS (type)
1003 && real_onep (oelast->op)))
1005 if (ops->length () != 1)
1007 if (dump_file && (dump_flags & TDF_DETAILS))
1008 fprintf (dump_file, "Found * 1, removing\n");
1009 ops->pop ();
1010 reassociate_stats.ops_eliminated++;
1011 return;
1014 break;
1015 case BIT_XOR_EXPR:
1016 case PLUS_EXPR:
1017 case MINUS_EXPR:
1018 if (integer_zerop (oelast->op)
1019 || (FLOAT_TYPE_P (type)
1020 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1021 && fold_real_zero_addition_p (type, oelast->op,
1022 opcode == MINUS_EXPR)))
1024 if (ops->length () != 1)
1026 if (dump_file && (dump_flags & TDF_DETAILS))
1027 fprintf (dump_file, "Found [|^+] 0, removing\n");
1028 ops->pop ();
1029 reassociate_stats.ops_eliminated++;
1030 return;
1033 break;
1034 default:
1035 break;
1041 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1042 bool, bool);
1044 /* Structure for tracking and counting operands. */
1045 struct oecount {
1046 int cnt;
1047 int id;
1048 enum tree_code oecode;
1049 tree op;
1053 /* The heap for the oecount hashtable and the sorted list of operands. */
1054 static vec<oecount> cvec;
1057 /* Oecount hashtable helpers. */
1059 struct oecount_hasher : int_hash <int, 0, 1>
1061 static inline hashval_t hash (int);
1062 static inline bool equal (int, int);
1065 /* Hash function for oecount. */
1067 inline hashval_t
1068 oecount_hasher::hash (int p)
1070 const oecount *c = &cvec[p - 42];
1071 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1074 /* Comparison function for oecount. */
1076 inline bool
1077 oecount_hasher::equal (int p1, int p2)
1079 const oecount *c1 = &cvec[p1 - 42];
1080 const oecount *c2 = &cvec[p2 - 42];
1081 return (c1->oecode == c2->oecode
1082 && c1->op == c2->op);
1085 /* Comparison function for qsort sorting oecount elements by count. */
1087 static int
1088 oecount_cmp (const void *p1, const void *p2)
1090 const oecount *c1 = (const oecount *)p1;
1091 const oecount *c2 = (const oecount *)p2;
1092 if (c1->cnt != c2->cnt)
1093 return c1->cnt - c2->cnt;
1094 else
1095 /* If counts are identical, use unique IDs to stabilize qsort. */
1096 return c1->id - c2->id;
1099 /* Return TRUE iff STMT represents a builtin call that raises OP
1100 to some exponent. */
1102 static bool
1103 stmt_is_power_of_op (gimple *stmt, tree op)
1105 if (!is_gimple_call (stmt))
1106 return false;
1108 switch (gimple_call_combined_fn (stmt))
1110 CASE_CFN_POW:
1111 CASE_CFN_POWI:
1112 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1114 default:
1115 return false;
1119 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1120 in place and return the result. Assumes that stmt_is_power_of_op
1121 was previously called for STMT and returned TRUE. */
1123 static HOST_WIDE_INT
1124 decrement_power (gimple *stmt)
1126 REAL_VALUE_TYPE c, cint;
1127 HOST_WIDE_INT power;
1128 tree arg1;
1130 switch (gimple_call_combined_fn (stmt))
1132 CASE_CFN_POW:
1133 arg1 = gimple_call_arg (stmt, 1);
1134 c = TREE_REAL_CST (arg1);
1135 power = real_to_integer (&c) - 1;
1136 real_from_integer (&cint, VOIDmode, power, SIGNED);
1137 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1138 return power;
1140 CASE_CFN_POWI:
1141 arg1 = gimple_call_arg (stmt, 1);
1142 power = TREE_INT_CST_LOW (arg1) - 1;
1143 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1144 return power;
1146 default:
1147 gcc_unreachable ();
1151 /* Find the single immediate use of STMT's LHS, and replace it
1152 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1153 replace *DEF with OP as well. */
1155 static void
1156 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1158 tree lhs;
1159 gimple *use_stmt;
1160 use_operand_p use;
1161 gimple_stmt_iterator gsi;
1163 if (is_gimple_call (stmt))
1164 lhs = gimple_call_lhs (stmt);
1165 else
1166 lhs = gimple_assign_lhs (stmt);
1168 gcc_assert (has_single_use (lhs));
1169 single_imm_use (lhs, &use, &use_stmt);
1170 if (lhs == *def)
1171 *def = op;
1172 SET_USE (use, op);
1173 if (TREE_CODE (op) != SSA_NAME)
1174 update_stmt (use_stmt);
1175 gsi = gsi_for_stmt (stmt);
1176 unlink_stmt_vdef (stmt);
1177 reassoc_remove_stmt (&gsi);
1178 release_defs (stmt);
1181 /* Walks the linear chain with result *DEF searching for an operation
1182 with operand OP and code OPCODE removing that from the chain. *DEF
1183 is updated if there is only one operand but no operation left. */
1185 static void
1186 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1188 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1192 tree name;
1194 if (opcode == MULT_EXPR)
1196 if (stmt_is_power_of_op (stmt, op))
1198 if (decrement_power (stmt) == 1)
1199 propagate_op_to_single_use (op, stmt, def);
1200 return;
1202 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1204 if (gimple_assign_rhs1 (stmt) == op)
1206 tree cst = build_minus_one_cst (TREE_TYPE (op));
1207 propagate_op_to_single_use (cst, stmt, def);
1208 return;
1210 else if (integer_minus_onep (op)
1211 || real_minus_onep (op))
1213 gimple_assign_set_rhs_code
1214 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1215 return;
1220 name = gimple_assign_rhs1 (stmt);
1222 /* If this is the operation we look for and one of the operands
1223 is ours simply propagate the other operand into the stmts
1224 single use. */
1225 if (gimple_assign_rhs_code (stmt) == opcode
1226 && (name == op
1227 || gimple_assign_rhs2 (stmt) == op))
1229 if (name == op)
1230 name = gimple_assign_rhs2 (stmt);
1231 propagate_op_to_single_use (name, stmt, def);
1232 return;
1235 /* We might have a multiply of two __builtin_pow* calls, and
1236 the operand might be hiding in the rightmost one. Likewise
1237 this can happen for a negate. */
1238 if (opcode == MULT_EXPR
1239 && gimple_assign_rhs_code (stmt) == opcode
1240 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1241 && has_single_use (gimple_assign_rhs2 (stmt)))
1243 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1244 if (stmt_is_power_of_op (stmt2, op))
1246 if (decrement_power (stmt2) == 1)
1247 propagate_op_to_single_use (op, stmt2, def);
1248 return;
1250 else if (is_gimple_assign (stmt2)
1251 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1253 if (gimple_assign_rhs1 (stmt2) == op)
1255 tree cst = build_minus_one_cst (TREE_TYPE (op));
1256 propagate_op_to_single_use (cst, stmt2, def);
1257 return;
1259 else if (integer_minus_onep (op)
1260 || real_minus_onep (op))
1262 gimple_assign_set_rhs_code
1263 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1264 return;
1269 /* Continue walking the chain. */
1270 gcc_assert (name != op
1271 && TREE_CODE (name) == SSA_NAME);
1272 stmt = SSA_NAME_DEF_STMT (name);
1274 while (1);
1277 /* Returns true if statement S1 dominates statement S2. Like
1278 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1280 static bool
1281 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1283 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1285 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1286 SSA_NAME. Assume it lives at the beginning of function and
1287 thus dominates everything. */
1288 if (!bb1 || s1 == s2)
1289 return true;
1291 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1292 if (!bb2)
1293 return false;
1295 if (bb1 == bb2)
1297 /* PHIs in the same basic block are assumed to be
1298 executed all in parallel, if only one stmt is a PHI,
1299 it dominates the other stmt in the same basic block. */
1300 if (gimple_code (s1) == GIMPLE_PHI)
1301 return true;
1303 if (gimple_code (s2) == GIMPLE_PHI)
1304 return false;
1306 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1308 if (gimple_uid (s1) < gimple_uid (s2))
1309 return true;
1311 if (gimple_uid (s1) > gimple_uid (s2))
1312 return false;
1314 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1315 unsigned int uid = gimple_uid (s1);
1316 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1318 gimple *s = gsi_stmt (gsi);
1319 if (gimple_uid (s) != uid)
1320 break;
1321 if (s == s2)
1322 return true;
1325 return false;
1328 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1331 /* Insert STMT after INSERT_POINT. */
1333 static void
1334 insert_stmt_after (gimple *stmt, gimple *insert_point)
1336 gimple_stmt_iterator gsi;
1337 basic_block bb;
1339 if (gimple_code (insert_point) == GIMPLE_PHI)
1340 bb = gimple_bb (insert_point);
1341 else if (!stmt_ends_bb_p (insert_point))
1343 gsi = gsi_for_stmt (insert_point);
1344 gimple_set_uid (stmt, gimple_uid (insert_point));
1345 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1346 return;
1348 else
1349 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1350 thus if it must end a basic block, it should be a call that can
1351 throw, or some assignment that can throw. If it throws, the LHS
1352 of it will not be initialized though, so only valid places using
1353 the SSA_NAME should be dominated by the fallthru edge. */
1354 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1355 gsi = gsi_after_labels (bb);
1356 if (gsi_end_p (gsi))
1358 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1359 gimple_set_uid (stmt,
1360 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1362 else
1363 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1364 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1367 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1368 the result. Places the statement after the definition of either
1369 OP1 or OP2. Returns the new statement. */
1371 static gimple *
1372 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1374 gimple *op1def = NULL, *op2def = NULL;
1375 gimple_stmt_iterator gsi;
1376 tree op;
1377 gassign *sum;
1379 /* Create the addition statement. */
1380 op = make_ssa_name (type);
1381 sum = gimple_build_assign (op, opcode, op1, op2);
1383 /* Find an insertion place and insert. */
1384 if (TREE_CODE (op1) == SSA_NAME)
1385 op1def = SSA_NAME_DEF_STMT (op1);
1386 if (TREE_CODE (op2) == SSA_NAME)
1387 op2def = SSA_NAME_DEF_STMT (op2);
1388 if ((!op1def || gimple_nop_p (op1def))
1389 && (!op2def || gimple_nop_p (op2def)))
1391 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1392 if (gsi_end_p (gsi))
1394 gimple_stmt_iterator gsi2
1395 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1396 gimple_set_uid (sum,
1397 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1399 else
1400 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1401 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1403 else
1405 gimple *insert_point;
1406 if ((!op1def || gimple_nop_p (op1def))
1407 || (op2def && !gimple_nop_p (op2def)
1408 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1409 insert_point = op2def;
1410 else
1411 insert_point = op1def;
1412 insert_stmt_after (sum, insert_point);
1414 update_stmt (sum);
1416 return sum;
1419 /* Perform un-distribution of divisions and multiplications.
1420 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1421 to (A + B) / X for real X.
1423 The algorithm is organized as follows.
1425 - First we walk the addition chain *OPS looking for summands that
1426 are defined by a multiplication or a real division. This results
1427 in the candidates bitmap with relevant indices into *OPS.
1429 - Second we build the chains of multiplications or divisions for
1430 these candidates, counting the number of occurrences of (operand, code)
1431 pairs in all of the candidates chains.
1433 - Third we sort the (operand, code) pairs by number of occurrence and
1434 process them starting with the pair with the most uses.
1436 * For each such pair we walk the candidates again to build a
1437 second candidate bitmap noting all multiplication/division chains
1438 that have at least one occurrence of (operand, code).
1440 * We build an alternate addition chain only covering these
1441 candidates with one (operand, code) operation removed from their
1442 multiplication/division chain.
1444 * The first candidate gets replaced by the alternate addition chain
1445 multiplied/divided by the operand.
1447 * All candidate chains get disabled for further processing and
1448 processing of (operand, code) pairs continues.
1450 The alternate addition chains built are re-processed by the main
1451 reassociation algorithm which allows optimizing a * x * y + b * y * x
1452 to (a + b ) * x * y in one invocation of the reassociation pass. */
1454 static bool
1455 undistribute_ops_list (enum tree_code opcode,
1456 vec<operand_entry *> *ops, struct loop *loop)
1458 unsigned int length = ops->length ();
1459 operand_entry *oe1;
1460 unsigned i, j;
1461 sbitmap candidates, candidates2;
1462 unsigned nr_candidates, nr_candidates2;
1463 sbitmap_iterator sbi0;
1464 vec<operand_entry *> *subops;
1465 bool changed = false;
1466 int next_oecount_id = 0;
1468 if (length <= 1
1469 || opcode != PLUS_EXPR)
1470 return false;
1472 /* Build a list of candidates to process. */
1473 candidates = sbitmap_alloc (length);
1474 bitmap_clear (candidates);
1475 nr_candidates = 0;
1476 FOR_EACH_VEC_ELT (*ops, i, oe1)
1478 enum tree_code dcode;
1479 gimple *oe1def;
1481 if (TREE_CODE (oe1->op) != SSA_NAME)
1482 continue;
1483 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1484 if (!is_gimple_assign (oe1def))
1485 continue;
1486 dcode = gimple_assign_rhs_code (oe1def);
1487 if ((dcode != MULT_EXPR
1488 && dcode != RDIV_EXPR)
1489 || !is_reassociable_op (oe1def, dcode, loop))
1490 continue;
1492 bitmap_set_bit (candidates, i);
1493 nr_candidates++;
1496 if (nr_candidates < 2)
1498 sbitmap_free (candidates);
1499 return false;
1502 if (dump_file && (dump_flags & TDF_DETAILS))
1504 fprintf (dump_file, "searching for un-distribute opportunities ");
1505 print_generic_expr (dump_file,
1506 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1507 fprintf (dump_file, " %d\n", nr_candidates);
1510 /* Build linearized sub-operand lists and the counting table. */
1511 cvec.create (0);
1513 hash_table<oecount_hasher> ctable (15);
1515 /* ??? Macro arguments cannot have multi-argument template types in
1516 them. This typedef is needed to workaround that limitation. */
1517 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1518 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1519 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1521 gimple *oedef;
1522 enum tree_code oecode;
1523 unsigned j;
1525 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1526 oecode = gimple_assign_rhs_code (oedef);
1527 linearize_expr_tree (&subops[i], oedef,
1528 associative_tree_code (oecode), false);
1530 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1532 oecount c;
1533 int *slot;
1534 int idx;
1535 c.oecode = oecode;
1536 c.cnt = 1;
1537 c.id = next_oecount_id++;
1538 c.op = oe1->op;
1539 cvec.safe_push (c);
1540 idx = cvec.length () + 41;
1541 slot = ctable.find_slot (idx, INSERT);
1542 if (!*slot)
1544 *slot = idx;
1546 else
1548 cvec.pop ();
1549 cvec[*slot - 42].cnt++;
1554 /* Sort the counting table. */
1555 cvec.qsort (oecount_cmp);
1557 if (dump_file && (dump_flags & TDF_DETAILS))
1559 oecount *c;
1560 fprintf (dump_file, "Candidates:\n");
1561 FOR_EACH_VEC_ELT (cvec, j, c)
1563 fprintf (dump_file, " %u %s: ", c->cnt,
1564 c->oecode == MULT_EXPR
1565 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1566 print_generic_expr (dump_file, c->op, 0);
1567 fprintf (dump_file, "\n");
1571 /* Process the (operand, code) pairs in order of most occurrence. */
1572 candidates2 = sbitmap_alloc (length);
1573 while (!cvec.is_empty ())
1575 oecount *c = &cvec.last ();
1576 if (c->cnt < 2)
1577 break;
1579 /* Now collect the operands in the outer chain that contain
1580 the common operand in their inner chain. */
1581 bitmap_clear (candidates2);
1582 nr_candidates2 = 0;
1583 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1585 gimple *oedef;
1586 enum tree_code oecode;
1587 unsigned j;
1588 tree op = (*ops)[i]->op;
1590 /* If we undistributed in this chain already this may be
1591 a constant. */
1592 if (TREE_CODE (op) != SSA_NAME)
1593 continue;
1595 oedef = SSA_NAME_DEF_STMT (op);
1596 oecode = gimple_assign_rhs_code (oedef);
1597 if (oecode != c->oecode)
1598 continue;
1600 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1602 if (oe1->op == c->op)
1604 bitmap_set_bit (candidates2, i);
1605 ++nr_candidates2;
1606 break;
1611 if (nr_candidates2 >= 2)
1613 operand_entry *oe1, *oe2;
1614 gimple *prod;
1615 int first = bitmap_first_set_bit (candidates2);
1617 /* Build the new addition chain. */
1618 oe1 = (*ops)[first];
1619 if (dump_file && (dump_flags & TDF_DETAILS))
1621 fprintf (dump_file, "Building (");
1622 print_generic_expr (dump_file, oe1->op, 0);
1624 zero_one_operation (&oe1->op, c->oecode, c->op);
1625 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1627 gimple *sum;
1628 oe2 = (*ops)[i];
1629 if (dump_file && (dump_flags & TDF_DETAILS))
1631 fprintf (dump_file, " + ");
1632 print_generic_expr (dump_file, oe2->op, 0);
1634 zero_one_operation (&oe2->op, c->oecode, c->op);
1635 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1636 oe1->op, oe2->op, opcode);
1637 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1638 oe2->rank = 0;
1639 oe1->op = gimple_get_lhs (sum);
1642 /* Apply the multiplication/division. */
1643 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1644 oe1->op, c->op, c->oecode);
1645 if (dump_file && (dump_flags & TDF_DETAILS))
1647 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1648 print_generic_expr (dump_file, c->op, 0);
1649 fprintf (dump_file, "\n");
1652 /* Record it in the addition chain and disable further
1653 undistribution with this op. */
1654 oe1->op = gimple_assign_lhs (prod);
1655 oe1->rank = get_rank (oe1->op);
1656 subops[first].release ();
1658 changed = true;
1661 cvec.pop ();
1664 for (i = 0; i < ops->length (); ++i)
1665 subops[i].release ();
1666 free (subops);
1667 cvec.release ();
1668 sbitmap_free (candidates);
1669 sbitmap_free (candidates2);
1671 return changed;
1674 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1675 expression, examine the other OPS to see if any of them are comparisons
1676 of the same values, which we may be able to combine or eliminate.
1677 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1679 static bool
1680 eliminate_redundant_comparison (enum tree_code opcode,
1681 vec<operand_entry *> *ops,
1682 unsigned int currindex,
1683 operand_entry *curr)
1685 tree op1, op2;
1686 enum tree_code lcode, rcode;
1687 gimple *def1, *def2;
1688 int i;
1689 operand_entry *oe;
1691 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1692 return false;
1694 /* Check that CURR is a comparison. */
1695 if (TREE_CODE (curr->op) != SSA_NAME)
1696 return false;
1697 def1 = SSA_NAME_DEF_STMT (curr->op);
1698 if (!is_gimple_assign (def1))
1699 return false;
1700 lcode = gimple_assign_rhs_code (def1);
1701 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1702 return false;
1703 op1 = gimple_assign_rhs1 (def1);
1704 op2 = gimple_assign_rhs2 (def1);
1706 /* Now look for a similar comparison in the remaining OPS. */
1707 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1709 tree t;
1711 if (TREE_CODE (oe->op) != SSA_NAME)
1712 continue;
1713 def2 = SSA_NAME_DEF_STMT (oe->op);
1714 if (!is_gimple_assign (def2))
1715 continue;
1716 rcode = gimple_assign_rhs_code (def2);
1717 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1718 continue;
1720 /* If we got here, we have a match. See if we can combine the
1721 two comparisons. */
1722 if (opcode == BIT_IOR_EXPR)
1723 t = maybe_fold_or_comparisons (lcode, op1, op2,
1724 rcode, gimple_assign_rhs1 (def2),
1725 gimple_assign_rhs2 (def2));
1726 else
1727 t = maybe_fold_and_comparisons (lcode, op1, op2,
1728 rcode, gimple_assign_rhs1 (def2),
1729 gimple_assign_rhs2 (def2));
1730 if (!t)
1731 continue;
1733 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1734 always give us a boolean_type_node value back. If the original
1735 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1736 we need to convert. */
1737 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1738 t = fold_convert (TREE_TYPE (curr->op), t);
1740 if (TREE_CODE (t) != INTEGER_CST
1741 && !operand_equal_p (t, curr->op, 0))
1743 enum tree_code subcode;
1744 tree newop1, newop2;
1745 if (!COMPARISON_CLASS_P (t))
1746 continue;
1747 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1748 STRIP_USELESS_TYPE_CONVERSION (newop1);
1749 STRIP_USELESS_TYPE_CONVERSION (newop2);
1750 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1751 continue;
1754 if (dump_file && (dump_flags & TDF_DETAILS))
1756 fprintf (dump_file, "Equivalence: ");
1757 print_generic_expr (dump_file, curr->op, 0);
1758 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1759 print_generic_expr (dump_file, oe->op, 0);
1760 fprintf (dump_file, " -> ");
1761 print_generic_expr (dump_file, t, 0);
1762 fprintf (dump_file, "\n");
1765 /* Now we can delete oe, as it has been subsumed by the new combined
1766 expression t. */
1767 ops->ordered_remove (i);
1768 reassociate_stats.ops_eliminated ++;
1770 /* If t is the same as curr->op, we're done. Otherwise we must
1771 replace curr->op with t. Special case is if we got a constant
1772 back, in which case we add it to the end instead of in place of
1773 the current entry. */
1774 if (TREE_CODE (t) == INTEGER_CST)
1776 ops->ordered_remove (currindex);
1777 add_to_ops_vec (ops, t);
1779 else if (!operand_equal_p (t, curr->op, 0))
1781 gimple *sum;
1782 enum tree_code subcode;
1783 tree newop1;
1784 tree newop2;
1785 gcc_assert (COMPARISON_CLASS_P (t));
1786 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1787 STRIP_USELESS_TYPE_CONVERSION (newop1);
1788 STRIP_USELESS_TYPE_CONVERSION (newop2);
1789 gcc_checking_assert (is_gimple_val (newop1)
1790 && is_gimple_val (newop2));
1791 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1792 curr->op = gimple_get_lhs (sum);
1794 return true;
1797 return false;
1801 /* Transform repeated addition of same values into multiply with
1802 constant. */
1803 static bool
1804 transform_add_to_multiply (vec<operand_entry *> *ops)
1806 operand_entry *oe;
1807 tree op = NULL_TREE;
1808 int j;
1809 int i, start = -1, end = 0, count = 0;
1810 auto_vec<std::pair <int, int> > indxs;
1811 bool changed = false;
1813 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1814 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1815 || !flag_unsafe_math_optimizations))
1816 return false;
1818 /* Look for repeated operands. */
1819 FOR_EACH_VEC_ELT (*ops, i, oe)
1821 if (start == -1)
1823 count = 1;
1824 op = oe->op;
1825 start = i;
1827 else if (operand_equal_p (oe->op, op, 0))
1829 count++;
1830 end = i;
1832 else
1834 if (count > 1)
1835 indxs.safe_push (std::make_pair (start, end));
1836 count = 1;
1837 op = oe->op;
1838 start = i;
1842 if (count > 1)
1843 indxs.safe_push (std::make_pair (start, end));
1845 for (j = indxs.length () - 1; j >= 0; --j)
1847 /* Convert repeated operand addition to multiplication. */
1848 start = indxs[j].first;
1849 end = indxs[j].second;
1850 op = (*ops)[start]->op;
1851 count = end - start + 1;
1852 for (i = end; i >= start; --i)
1853 ops->unordered_remove (i);
1854 tree tmp = make_ssa_name (TREE_TYPE (op));
1855 tree cst = build_int_cst (integer_type_node, count);
1856 gassign *mul_stmt
1857 = gimple_build_assign (tmp, MULT_EXPR,
1858 op, fold_convert (TREE_TYPE (op), cst));
1859 gimple_set_visited (mul_stmt, true);
1860 add_to_ops_vec (ops, tmp, mul_stmt);
1861 changed = true;
1864 return changed;
1868 /* Perform various identities and other optimizations on the list of
1869 operand entries, stored in OPS. The tree code for the binary
1870 operation between all the operands is OPCODE. */
1872 static void
1873 optimize_ops_list (enum tree_code opcode,
1874 vec<operand_entry *> *ops)
1876 unsigned int length = ops->length ();
1877 unsigned int i;
1878 operand_entry *oe;
1879 operand_entry *oelast = NULL;
1880 bool iterate = false;
1882 if (length == 1)
1883 return;
1885 oelast = ops->last ();
1887 /* If the last two are constants, pop the constants off, merge them
1888 and try the next two. */
1889 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1891 operand_entry *oelm1 = (*ops)[length - 2];
1893 if (oelm1->rank == 0
1894 && is_gimple_min_invariant (oelm1->op)
1895 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1896 TREE_TYPE (oelast->op)))
1898 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1899 oelm1->op, oelast->op);
1901 if (folded && is_gimple_min_invariant (folded))
1903 if (dump_file && (dump_flags & TDF_DETAILS))
1904 fprintf (dump_file, "Merging constants\n");
1906 ops->pop ();
1907 ops->pop ();
1909 add_to_ops_vec (ops, folded);
1910 reassociate_stats.constants_eliminated++;
1912 optimize_ops_list (opcode, ops);
1913 return;
1918 eliminate_using_constants (opcode, ops);
1919 oelast = NULL;
1921 for (i = 0; ops->iterate (i, &oe);)
1923 bool done = false;
1925 if (eliminate_not_pairs (opcode, ops, i, oe))
1926 return;
1927 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1928 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1929 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1931 if (done)
1932 return;
1933 iterate = true;
1934 oelast = NULL;
1935 continue;
1937 oelast = oe;
1938 i++;
1941 length = ops->length ();
1942 oelast = ops->last ();
1944 if (iterate)
1945 optimize_ops_list (opcode, ops);
1948 /* The following functions are subroutines to optimize_range_tests and allow
1949 it to try to change a logical combination of comparisons into a range
1950 test.
1952 For example, both
1953 X == 2 || X == 5 || X == 3 || X == 4
1955 X >= 2 && X <= 5
1956 are converted to
1957 (unsigned) (X - 2) <= 3
1959 For more information see comments above fold_test_range in fold-const.c,
1960 this implementation is for GIMPLE. */
1962 struct range_entry
1964 tree exp;
1965 tree low;
1966 tree high;
1967 bool in_p;
1968 bool strict_overflow_p;
1969 unsigned int idx, next;
1972 /* This is similar to make_range in fold-const.c, but on top of
1973 GIMPLE instead of trees. If EXP is non-NULL, it should be
1974 an SSA_NAME and STMT argument is ignored, otherwise STMT
1975 argument should be a GIMPLE_COND. */
1977 static void
1978 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
1980 int in_p;
1981 tree low, high;
1982 bool is_bool, strict_overflow_p;
1984 r->exp = NULL_TREE;
1985 r->in_p = false;
1986 r->strict_overflow_p = false;
1987 r->low = NULL_TREE;
1988 r->high = NULL_TREE;
1989 if (exp != NULL_TREE
1990 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1991 return;
1993 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1994 and see if we can refine the range. Some of the cases below may not
1995 happen, but it doesn't seem worth worrying about this. We "continue"
1996 the outer loop when we've changed something; otherwise we "break"
1997 the switch, which will "break" the while. */
1998 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1999 high = low;
2000 in_p = 0;
2001 strict_overflow_p = false;
2002 is_bool = false;
2003 if (exp == NULL_TREE)
2004 is_bool = true;
2005 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2007 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2008 is_bool = true;
2009 else
2010 return;
2012 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2013 is_bool = true;
2015 while (1)
2017 enum tree_code code;
2018 tree arg0, arg1, exp_type;
2019 tree nexp;
2020 location_t loc;
2022 if (exp != NULL_TREE)
2024 if (TREE_CODE (exp) != SSA_NAME
2025 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2026 break;
2028 stmt = SSA_NAME_DEF_STMT (exp);
2029 if (!is_gimple_assign (stmt))
2030 break;
2032 code = gimple_assign_rhs_code (stmt);
2033 arg0 = gimple_assign_rhs1 (stmt);
2034 arg1 = gimple_assign_rhs2 (stmt);
2035 exp_type = TREE_TYPE (exp);
2037 else
2039 code = gimple_cond_code (stmt);
2040 arg0 = gimple_cond_lhs (stmt);
2041 arg1 = gimple_cond_rhs (stmt);
2042 exp_type = boolean_type_node;
2045 if (TREE_CODE (arg0) != SSA_NAME)
2046 break;
2047 loc = gimple_location (stmt);
2048 switch (code)
2050 case BIT_NOT_EXPR:
2051 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2052 /* Ensure the range is either +[-,0], +[0,0],
2053 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2054 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2055 or similar expression of unconditional true or
2056 false, it should not be negated. */
2057 && ((high && integer_zerop (high))
2058 || (low && integer_onep (low))))
2060 in_p = !in_p;
2061 exp = arg0;
2062 continue;
2064 break;
2065 case SSA_NAME:
2066 exp = arg0;
2067 continue;
2068 CASE_CONVERT:
2069 if (is_bool)
2070 goto do_default;
2071 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2073 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2074 is_bool = true;
2075 else
2076 return;
2078 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2079 is_bool = true;
2080 goto do_default;
2081 case EQ_EXPR:
2082 case NE_EXPR:
2083 case LT_EXPR:
2084 case LE_EXPR:
2085 case GE_EXPR:
2086 case GT_EXPR:
2087 is_bool = true;
2088 /* FALLTHRU */
2089 default:
2090 if (!is_bool)
2091 return;
2092 do_default:
2093 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2094 &low, &high, &in_p,
2095 &strict_overflow_p);
2096 if (nexp != NULL_TREE)
2098 exp = nexp;
2099 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2100 continue;
2102 break;
2104 break;
2106 if (is_bool)
2108 r->exp = exp;
2109 r->in_p = in_p;
2110 r->low = low;
2111 r->high = high;
2112 r->strict_overflow_p = strict_overflow_p;
2116 /* Comparison function for qsort. Sort entries
2117 without SSA_NAME exp first, then with SSA_NAMEs sorted
2118 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2119 by increasing ->low and if ->low is the same, by increasing
2120 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2121 maximum. */
2123 static int
2124 range_entry_cmp (const void *a, const void *b)
2126 const struct range_entry *p = (const struct range_entry *) a;
2127 const struct range_entry *q = (const struct range_entry *) b;
2129 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2131 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2133 /* Group range_entries for the same SSA_NAME together. */
2134 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2135 return -1;
2136 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2137 return 1;
2138 /* If ->low is different, NULL low goes first, then by
2139 ascending low. */
2140 if (p->low != NULL_TREE)
2142 if (q->low != NULL_TREE)
2144 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2145 p->low, q->low);
2146 if (tem && integer_onep (tem))
2147 return -1;
2148 tem = fold_binary (GT_EXPR, boolean_type_node,
2149 p->low, q->low);
2150 if (tem && integer_onep (tem))
2151 return 1;
2153 else
2154 return 1;
2156 else if (q->low != NULL_TREE)
2157 return -1;
2158 /* If ->high is different, NULL high goes last, before that by
2159 ascending high. */
2160 if (p->high != NULL_TREE)
2162 if (q->high != NULL_TREE)
2164 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2165 p->high, q->high);
2166 if (tem && integer_onep (tem))
2167 return -1;
2168 tem = fold_binary (GT_EXPR, boolean_type_node,
2169 p->high, q->high);
2170 if (tem && integer_onep (tem))
2171 return 1;
2173 else
2174 return -1;
2176 else if (q->high != NULL_TREE)
2177 return 1;
2178 /* If both ranges are the same, sort below by ascending idx. */
2180 else
2181 return 1;
2183 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2184 return -1;
2186 if (p->idx < q->idx)
2187 return -1;
2188 else
2190 gcc_checking_assert (p->idx > q->idx);
2191 return 1;
2195 /* Helper routine of optimize_range_test.
2196 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2197 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2198 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2199 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2200 an array of COUNT pointers to other ranges. Return
2201 true if the range merge has been successful.
2202 If OPCODE is ERROR_MARK, this is called from within
2203 maybe_optimize_range_tests and is performing inter-bb range optimization.
2204 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2205 oe->rank. */
2207 static bool
2208 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2209 struct range_entry **otherrangep,
2210 unsigned int count, enum tree_code opcode,
2211 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2212 bool in_p, tree low, tree high, bool strict_overflow_p)
2214 operand_entry *oe = (*ops)[range->idx];
2215 tree op = oe->op;
2216 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2217 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2218 location_t loc = gimple_location (stmt);
2219 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2220 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2221 in_p, low, high);
2222 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2223 gimple_stmt_iterator gsi;
2224 unsigned int i, uid;
2226 if (tem == NULL_TREE)
2227 return false;
2229 /* If op is default def SSA_NAME, there is no place to insert the
2230 new comparison. Give up, unless we can use OP itself as the
2231 range test. */
2232 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2234 if (op == range->exp
2235 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2236 || TREE_CODE (optype) == BOOLEAN_TYPE)
2237 && (op == tem
2238 || (TREE_CODE (tem) == EQ_EXPR
2239 && TREE_OPERAND (tem, 0) == op
2240 && integer_onep (TREE_OPERAND (tem, 1))))
2241 && opcode != BIT_IOR_EXPR
2242 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2244 stmt = NULL;
2245 tem = op;
2247 else
2248 return false;
2251 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2252 warning_at (loc, OPT_Wstrict_overflow,
2253 "assuming signed overflow does not occur "
2254 "when simplifying range test");
2256 if (dump_file && (dump_flags & TDF_DETAILS))
2258 struct range_entry *r;
2259 fprintf (dump_file, "Optimizing range tests ");
2260 print_generic_expr (dump_file, range->exp, 0);
2261 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2262 print_generic_expr (dump_file, range->low, 0);
2263 fprintf (dump_file, ", ");
2264 print_generic_expr (dump_file, range->high, 0);
2265 fprintf (dump_file, "]");
2266 for (i = 0; i < count; i++)
2268 if (otherrange)
2269 r = otherrange + i;
2270 else
2271 r = otherrangep[i];
2272 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2273 print_generic_expr (dump_file, r->low, 0);
2274 fprintf (dump_file, ", ");
2275 print_generic_expr (dump_file, r->high, 0);
2276 fprintf (dump_file, "]");
2278 fprintf (dump_file, "\n into ");
2279 print_generic_expr (dump_file, tem, 0);
2280 fprintf (dump_file, "\n");
2283 if (opcode == BIT_IOR_EXPR
2284 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2285 tem = invert_truthvalue_loc (loc, tem);
2287 tem = fold_convert_loc (loc, optype, tem);
2288 if (stmt)
2290 gsi = gsi_for_stmt (stmt);
2291 uid = gimple_uid (stmt);
2293 else
2295 gsi = gsi_none ();
2296 uid = 0;
2298 if (stmt == NULL)
2299 gcc_checking_assert (tem == op);
2300 /* In rare cases range->exp can be equal to lhs of stmt.
2301 In that case we have to insert after the stmt rather then before
2302 it. If stmt is a PHI, insert it at the start of the basic block. */
2303 else if (op != range->exp)
2305 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2306 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2307 GSI_SAME_STMT);
2308 gsi_prev (&gsi);
2310 else if (gimple_code (stmt) != GIMPLE_PHI)
2312 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2313 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2314 GSI_CONTINUE_LINKING);
2316 else
2318 gsi = gsi_after_labels (gimple_bb (stmt));
2319 if (!gsi_end_p (gsi))
2320 uid = gimple_uid (gsi_stmt (gsi));
2321 else
2323 gsi = gsi_start_bb (gimple_bb (stmt));
2324 uid = 1;
2325 while (!gsi_end_p (gsi))
2327 uid = gimple_uid (gsi_stmt (gsi));
2328 gsi_next (&gsi);
2331 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2332 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2333 GSI_SAME_STMT);
2334 if (gsi_end_p (gsi))
2335 gsi = gsi_last_bb (gimple_bb (stmt));
2336 else
2337 gsi_prev (&gsi);
2339 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2340 if (gimple_uid (gsi_stmt (gsi)))
2341 break;
2342 else
2343 gimple_set_uid (gsi_stmt (gsi), uid);
2345 oe->op = tem;
2346 range->exp = exp;
2347 range->low = low;
2348 range->high = high;
2349 range->in_p = in_p;
2350 range->strict_overflow_p = false;
2352 for (i = 0; i < count; i++)
2354 if (otherrange)
2355 range = otherrange + i;
2356 else
2357 range = otherrangep[i];
2358 oe = (*ops)[range->idx];
2359 /* Now change all the other range test immediate uses, so that
2360 those tests will be optimized away. */
2361 if (opcode == ERROR_MARK)
2363 if (oe->op)
2364 oe->op = build_int_cst (TREE_TYPE (oe->op),
2365 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2366 else
2367 oe->op = (oe->rank == BIT_IOR_EXPR
2368 ? boolean_false_node : boolean_true_node);
2370 else
2371 oe->op = error_mark_node;
2372 range->exp = NULL_TREE;
2374 return true;
2377 /* Optimize X == CST1 || X == CST2
2378 if popcount (CST1 ^ CST2) == 1 into
2379 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2380 Similarly for ranges. E.g.
2381 X != 2 && X != 3 && X != 10 && X != 11
2382 will be transformed by the previous optimization into
2383 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2384 and this loop can transform that into
2385 !(((X & ~8) - 2U) <= 1U). */
2387 static bool
2388 optimize_range_tests_xor (enum tree_code opcode, tree type,
2389 tree lowi, tree lowj, tree highi, tree highj,
2390 vec<operand_entry *> *ops,
2391 struct range_entry *rangei,
2392 struct range_entry *rangej)
2394 tree lowxor, highxor, tem, exp;
2395 /* Check lowi ^ lowj == highi ^ highj and
2396 popcount (lowi ^ lowj) == 1. */
2397 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2398 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2399 return false;
2400 if (!integer_pow2p (lowxor))
2401 return false;
2402 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2403 if (!tree_int_cst_equal (lowxor, highxor))
2404 return false;
2406 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2407 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2408 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2409 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2410 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2411 NULL, rangei->in_p, lowj, highj,
2412 rangei->strict_overflow_p
2413 || rangej->strict_overflow_p))
2414 return true;
2415 return false;
2418 /* Optimize X == CST1 || X == CST2
2419 if popcount (CST2 - CST1) == 1 into
2420 ((X - CST1) & ~(CST2 - CST1)) == 0.
2421 Similarly for ranges. E.g.
2422 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2423 || X == 75 || X == 45
2424 will be transformed by the previous optimization into
2425 (X - 43U) <= 3U || (X - 75U) <= 3U
2426 and this loop can transform that into
2427 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2428 static bool
2429 optimize_range_tests_diff (enum tree_code opcode, tree type,
2430 tree lowi, tree lowj, tree highi, tree highj,
2431 vec<operand_entry *> *ops,
2432 struct range_entry *rangei,
2433 struct range_entry *rangej)
2435 tree tem1, tem2, mask;
2436 /* Check highi - lowi == highj - lowj. */
2437 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2438 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2439 return false;
2440 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2441 if (!tree_int_cst_equal (tem1, tem2))
2442 return false;
2443 /* Check popcount (lowj - lowi) == 1. */
2444 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2445 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2446 return false;
2447 if (!integer_pow2p (tem1))
2448 return false;
2450 type = unsigned_type_for (type);
2451 tem1 = fold_convert (type, tem1);
2452 tem2 = fold_convert (type, tem2);
2453 lowi = fold_convert (type, lowi);
2454 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2455 tem1 = fold_binary (MINUS_EXPR, type,
2456 fold_convert (type, rangei->exp), lowi);
2457 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2458 lowj = build_int_cst (type, 0);
2459 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2460 NULL, rangei->in_p, lowj, tem2,
2461 rangei->strict_overflow_p
2462 || rangej->strict_overflow_p))
2463 return true;
2464 return false;
2467 /* It does some common checks for function optimize_range_tests_xor and
2468 optimize_range_tests_diff.
2469 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2470 Else it calls optimize_range_tests_diff. */
2472 static bool
2473 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2474 bool optimize_xor, vec<operand_entry *> *ops,
2475 struct range_entry *ranges)
2477 int i, j;
2478 bool any_changes = false;
2479 for (i = first; i < length; i++)
2481 tree lowi, highi, lowj, highj, type, tem;
2483 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2484 continue;
2485 type = TREE_TYPE (ranges[i].exp);
2486 if (!INTEGRAL_TYPE_P (type))
2487 continue;
2488 lowi = ranges[i].low;
2489 if (lowi == NULL_TREE)
2490 lowi = TYPE_MIN_VALUE (type);
2491 highi = ranges[i].high;
2492 if (highi == NULL_TREE)
2493 continue;
2494 for (j = i + 1; j < length && j < i + 64; j++)
2496 bool changes;
2497 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2498 continue;
2499 lowj = ranges[j].low;
2500 if (lowj == NULL_TREE)
2501 continue;
2502 highj = ranges[j].high;
2503 if (highj == NULL_TREE)
2504 highj = TYPE_MAX_VALUE (type);
2505 /* Check lowj > highi. */
2506 tem = fold_binary (GT_EXPR, boolean_type_node,
2507 lowj, highi);
2508 if (tem == NULL_TREE || !integer_onep (tem))
2509 continue;
2510 if (optimize_xor)
2511 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2512 highi, highj, ops,
2513 ranges + i, ranges + j);
2514 else
2515 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2516 highi, highj, ops,
2517 ranges + i, ranges + j);
2518 if (changes)
2520 any_changes = true;
2521 break;
2525 return any_changes;
2528 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2529 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2530 EXP on success, NULL otherwise. */
2532 static tree
2533 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2534 wide_int *mask, tree *totallowp)
2536 tree tem = int_const_binop (MINUS_EXPR, high, low);
2537 if (tem == NULL_TREE
2538 || TREE_CODE (tem) != INTEGER_CST
2539 || TREE_OVERFLOW (tem)
2540 || tree_int_cst_sgn (tem) == -1
2541 || compare_tree_int (tem, prec) != -1)
2542 return NULL_TREE;
2544 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2545 *mask = wi::shifted_mask (0, max, false, prec);
2546 if (TREE_CODE (exp) == BIT_AND_EXPR
2547 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2549 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2550 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2551 if (wi::popcount (msk) == 1
2552 && wi::ltu_p (msk, prec - max))
2554 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2555 max += msk.to_uhwi ();
2556 exp = TREE_OPERAND (exp, 0);
2557 if (integer_zerop (low)
2558 && TREE_CODE (exp) == PLUS_EXPR
2559 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2561 tree ret = TREE_OPERAND (exp, 0);
2562 STRIP_NOPS (ret);
2563 widest_int bias
2564 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2565 TYPE_PRECISION (TREE_TYPE (low))));
2566 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2567 if (totallowp)
2569 *totallowp = tbias;
2570 return ret;
2572 else if (!tree_int_cst_lt (totallow, tbias))
2573 return NULL_TREE;
2574 bias = wi::to_widest (tbias);
2575 bias -= wi::to_widest (totallow);
2576 if (bias >= 0 && bias < prec - max)
2578 *mask = wi::lshift (*mask, bias);
2579 return ret;
2584 if (totallowp)
2585 return exp;
2586 if (!tree_int_cst_lt (totallow, low))
2587 return exp;
2588 tem = int_const_binop (MINUS_EXPR, low, totallow);
2589 if (tem == NULL_TREE
2590 || TREE_CODE (tem) != INTEGER_CST
2591 || TREE_OVERFLOW (tem)
2592 || compare_tree_int (tem, prec - max) == 1)
2593 return NULL_TREE;
2595 *mask = wi::lshift (*mask, wi::to_widest (tem));
2596 return exp;
2599 /* Attempt to optimize small range tests using bit test.
2600 E.g.
2601 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2602 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2603 has been by earlier optimizations optimized into:
2604 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2605 As all the 43 through 82 range is less than 64 numbers,
2606 for 64-bit word targets optimize that into:
2607 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2609 static bool
2610 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2611 vec<operand_entry *> *ops,
2612 struct range_entry *ranges)
2614 int i, j;
2615 bool any_changes = false;
2616 int prec = GET_MODE_BITSIZE (word_mode);
2617 auto_vec<struct range_entry *, 64> candidates;
2619 for (i = first; i < length - 2; i++)
2621 tree lowi, highi, lowj, highj, type;
2623 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2624 continue;
2625 type = TREE_TYPE (ranges[i].exp);
2626 if (!INTEGRAL_TYPE_P (type))
2627 continue;
2628 lowi = ranges[i].low;
2629 if (lowi == NULL_TREE)
2630 lowi = TYPE_MIN_VALUE (type);
2631 highi = ranges[i].high;
2632 if (highi == NULL_TREE)
2633 continue;
2634 wide_int mask;
2635 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2636 highi, &mask, &lowi);
2637 if (exp == NULL_TREE)
2638 continue;
2639 bool strict_overflow_p = ranges[i].strict_overflow_p;
2640 candidates.truncate (0);
2641 int end = MIN (i + 64, length);
2642 for (j = i + 1; j < end; j++)
2644 tree exp2;
2645 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2646 continue;
2647 if (ranges[j].exp == exp)
2649 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2651 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2652 if (exp2 == exp)
2654 else if (TREE_CODE (exp2) == PLUS_EXPR)
2656 exp2 = TREE_OPERAND (exp2, 0);
2657 STRIP_NOPS (exp2);
2658 if (exp2 != exp)
2659 continue;
2661 else
2662 continue;
2664 else
2665 continue;
2666 lowj = ranges[j].low;
2667 if (lowj == NULL_TREE)
2668 continue;
2669 highj = ranges[j].high;
2670 if (highj == NULL_TREE)
2671 highj = TYPE_MAX_VALUE (type);
2672 wide_int mask2;
2673 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2674 highj, &mask2, NULL);
2675 if (exp2 != exp)
2676 continue;
2677 mask |= mask2;
2678 strict_overflow_p |= ranges[j].strict_overflow_p;
2679 candidates.safe_push (&ranges[j]);
2682 /* If we need otherwise 3 or more comparisons, use a bit test. */
2683 if (candidates.length () >= 2)
2685 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2686 wi::to_widest (lowi)
2687 + prec - 1 - wi::clz (mask));
2688 operand_entry *oe = (*ops)[ranges[i].idx];
2689 tree op = oe->op;
2690 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2691 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2692 location_t loc = gimple_location (stmt);
2693 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2695 /* See if it isn't cheaper to pretend the minimum value of the
2696 range is 0, if maximum value is small enough.
2697 We can avoid then subtraction of the minimum value, but the
2698 mask constant could be perhaps more expensive. */
2699 if (compare_tree_int (lowi, 0) > 0
2700 && compare_tree_int (high, prec) < 0)
2702 int cost_diff;
2703 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2704 rtx reg = gen_raw_REG (word_mode, 10000);
2705 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2706 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2707 GEN_INT (-m)), speed_p);
2708 rtx r = immed_wide_int_const (mask, word_mode);
2709 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2710 word_mode, speed_p);
2711 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2712 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2713 word_mode, speed_p);
2714 if (cost_diff > 0)
2716 mask = wi::lshift (mask, m);
2717 lowi = build_zero_cst (TREE_TYPE (lowi));
2721 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2722 false, lowi, high);
2723 if (tem == NULL_TREE || is_gimple_val (tem))
2724 continue;
2725 tree etype = unsigned_type_for (TREE_TYPE (exp));
2726 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2727 fold_convert_loc (loc, etype, exp),
2728 fold_convert_loc (loc, etype, lowi));
2729 exp = fold_convert_loc (loc, integer_type_node, exp);
2730 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2731 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2732 build_int_cst (word_type, 1), exp);
2733 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2734 wide_int_to_tree (word_type, mask));
2735 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2736 build_zero_cst (word_type));
2737 if (is_gimple_val (exp))
2738 continue;
2740 /* The shift might have undefined behavior if TEM is true,
2741 but reassociate_bb isn't prepared to have basic blocks
2742 split when it is running. So, temporarily emit a code
2743 with BIT_IOR_EXPR instead of &&, and fix it up in
2744 branch_fixup. */
2745 gimple_seq seq;
2746 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2747 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2748 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2749 gimple_seq seq2;
2750 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2751 gimple_seq_add_seq_without_update (&seq, seq2);
2752 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2753 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2754 gimple *g = gimple_build_assign (make_ssa_name (optype),
2755 BIT_IOR_EXPR, tem, exp);
2756 gimple_set_location (g, loc);
2757 gimple_seq_add_stmt_without_update (&seq, g);
2758 exp = gimple_assign_lhs (g);
2759 tree val = build_zero_cst (optype);
2760 if (update_range_test (&ranges[i], NULL, candidates.address (),
2761 candidates.length (), opcode, ops, exp,
2762 seq, false, val, val, strict_overflow_p))
2764 any_changes = true;
2765 reassoc_branch_fixups.safe_push (tem);
2767 else
2768 gimple_seq_discard (seq);
2771 return any_changes;
2774 /* Optimize range tests, similarly how fold_range_test optimizes
2775 it on trees. The tree code for the binary
2776 operation between all the operands is OPCODE.
2777 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2778 maybe_optimize_range_tests for inter-bb range optimization.
2779 In that case if oe->op is NULL, oe->id is bb->index whose
2780 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2781 the actual opcode. */
2783 static bool
2784 optimize_range_tests (enum tree_code opcode,
2785 vec<operand_entry *> *ops)
2787 unsigned int length = ops->length (), i, j, first;
2788 operand_entry *oe;
2789 struct range_entry *ranges;
2790 bool any_changes = false;
2792 if (length == 1)
2793 return false;
2795 ranges = XNEWVEC (struct range_entry, length);
2796 for (i = 0; i < length; i++)
2798 oe = (*ops)[i];
2799 ranges[i].idx = i;
2800 init_range_entry (ranges + i, oe->op,
2801 oe->op
2802 ? NULL
2803 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2804 /* For | invert it now, we will invert it again before emitting
2805 the optimized expression. */
2806 if (opcode == BIT_IOR_EXPR
2807 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2808 ranges[i].in_p = !ranges[i].in_p;
2811 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2812 for (i = 0; i < length; i++)
2813 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2814 break;
2816 /* Try to merge ranges. */
2817 for (first = i; i < length; i++)
2819 tree low = ranges[i].low;
2820 tree high = ranges[i].high;
2821 int in_p = ranges[i].in_p;
2822 bool strict_overflow_p = ranges[i].strict_overflow_p;
2823 int update_fail_count = 0;
2825 for (j = i + 1; j < length; j++)
2827 if (ranges[i].exp != ranges[j].exp)
2828 break;
2829 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2830 ranges[j].in_p, ranges[j].low, ranges[j].high))
2831 break;
2832 strict_overflow_p |= ranges[j].strict_overflow_p;
2835 if (j == i + 1)
2836 continue;
2838 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2839 opcode, ops, ranges[i].exp, NULL, in_p,
2840 low, high, strict_overflow_p))
2842 i = j - 1;
2843 any_changes = true;
2845 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2846 while update_range_test would fail. */
2847 else if (update_fail_count == 64)
2848 i = j - 1;
2849 else
2850 ++update_fail_count;
2853 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2854 ops, ranges);
2856 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2857 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2858 ops, ranges);
2859 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2860 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2861 ops, ranges);
2863 if (any_changes && opcode != ERROR_MARK)
2865 j = 0;
2866 FOR_EACH_VEC_ELT (*ops, i, oe)
2868 if (oe->op == error_mark_node)
2869 continue;
2870 else if (i != j)
2871 (*ops)[j] = oe;
2872 j++;
2874 ops->truncate (j);
2877 XDELETEVEC (ranges);
2878 return any_changes;
2881 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
2882 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
2883 otherwise the comparison code. */
2885 static tree_code
2886 ovce_extract_ops (tree var, gassign **rets, bool *reti)
2888 if (TREE_CODE (var) != SSA_NAME)
2889 return ERROR_MARK;
2891 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
2892 if (stmt == NULL)
2893 return ERROR_MARK;
2895 /* ??? If we start creating more COND_EXPR, we could perform
2896 this same optimization with them. For now, simplify. */
2897 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
2898 return ERROR_MARK;
2900 tree cond = gimple_assign_rhs1 (stmt);
2901 tree_code cmp = TREE_CODE (cond);
2902 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
2903 return ERROR_MARK;
2905 /* ??? For now, allow only canonical true and false result vectors.
2906 We could expand this to other constants should the need arise,
2907 but at the moment we don't create them. */
2908 tree t = gimple_assign_rhs2 (stmt);
2909 tree f = gimple_assign_rhs3 (stmt);
2910 bool inv;
2911 if (integer_all_onesp (t))
2912 inv = false;
2913 else if (integer_all_onesp (f))
2915 cmp = invert_tree_comparison (cmp, false);
2916 inv = true;
2918 else
2919 return ERROR_MARK;
2920 if (!integer_zerop (f))
2921 return ERROR_MARK;
2923 /* Success! */
2924 if (rets)
2925 *rets = stmt;
2926 if (reti)
2927 *reti = inv;
2928 return cmp;
2931 /* Optimize the condition of VEC_COND_EXPRs which have been combined
2932 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
2934 static bool
2935 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
2937 unsigned int length = ops->length (), i, j;
2938 bool any_changes = false;
2940 if (length == 1)
2941 return false;
2943 for (i = 0; i < length; ++i)
2945 tree elt0 = (*ops)[i]->op;
2947 gassign *stmt0;
2948 bool invert;
2949 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
2950 if (cmp0 == ERROR_MARK)
2951 continue;
2953 for (j = i + 1; j < length; ++j)
2955 tree &elt1 = (*ops)[j]->op;
2957 gassign *stmt1;
2958 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
2959 if (cmp1 == ERROR_MARK)
2960 continue;
2962 tree cond0 = gimple_assign_rhs1 (stmt0);
2963 tree x0 = TREE_OPERAND (cond0, 0);
2964 tree y0 = TREE_OPERAND (cond0, 1);
2966 tree cond1 = gimple_assign_rhs1 (stmt1);
2967 tree x1 = TREE_OPERAND (cond1, 0);
2968 tree y1 = TREE_OPERAND (cond1, 1);
2970 tree comb;
2971 if (opcode == BIT_AND_EXPR)
2972 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2973 else if (opcode == BIT_IOR_EXPR)
2974 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2975 else
2976 gcc_unreachable ();
2977 if (comb == NULL)
2978 continue;
2980 /* Success! */
2981 if (dump_file && (dump_flags & TDF_DETAILS))
2983 fprintf (dump_file, "Transforming ");
2984 print_generic_expr (dump_file, cond0, 0);
2985 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
2986 print_generic_expr (dump_file, cond1, 0);
2987 fprintf (dump_file, " into ");
2988 print_generic_expr (dump_file, comb, 0);
2989 fputc ('\n', dump_file);
2992 gimple_assign_set_rhs1 (stmt0, comb);
2993 if (invert)
2994 std::swap (*gimple_assign_rhs2_ptr (stmt0),
2995 *gimple_assign_rhs3_ptr (stmt0));
2996 update_stmt (stmt0);
2998 elt1 = error_mark_node;
2999 any_changes = true;
3003 if (any_changes)
3005 operand_entry *oe;
3006 j = 0;
3007 FOR_EACH_VEC_ELT (*ops, i, oe)
3009 if (oe->op == error_mark_node)
3010 continue;
3011 else if (i != j)
3012 (*ops)[j] = oe;
3013 j++;
3015 ops->truncate (j);
3018 return any_changes;
3021 /* Return true if STMT is a cast like:
3022 <bb N>:
3024 _123 = (int) _234;
3026 <bb M>:
3027 # _345 = PHI <_123(N), 1(...), 1(...)>
3028 where _234 has bool type, _123 has single use and
3029 bb N has a single successor M. This is commonly used in
3030 the last block of a range test. */
3032 static bool
3033 final_range_test_p (gimple *stmt)
3035 basic_block bb, rhs_bb;
3036 edge e;
3037 tree lhs, rhs;
3038 use_operand_p use_p;
3039 gimple *use_stmt;
3041 if (!gimple_assign_cast_p (stmt))
3042 return false;
3043 bb = gimple_bb (stmt);
3044 if (!single_succ_p (bb))
3045 return false;
3046 e = single_succ_edge (bb);
3047 if (e->flags & EDGE_COMPLEX)
3048 return false;
3050 lhs = gimple_assign_lhs (stmt);
3051 rhs = gimple_assign_rhs1 (stmt);
3052 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3053 || TREE_CODE (rhs) != SSA_NAME
3054 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
3055 return false;
3057 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3058 if (!single_imm_use (lhs, &use_p, &use_stmt))
3059 return false;
3061 if (gimple_code (use_stmt) != GIMPLE_PHI
3062 || gimple_bb (use_stmt) != e->dest)
3063 return false;
3065 /* And that the rhs is defined in the same loop. */
3066 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
3067 if (rhs_bb == NULL
3068 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3069 return false;
3071 return true;
3074 /* Return true if BB is suitable basic block for inter-bb range test
3075 optimization. If BACKWARD is true, BB should be the only predecessor
3076 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3077 or compared with to find a common basic block to which all conditions
3078 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3079 be the only predecessor of BB. */
3081 static bool
3082 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3083 bool backward)
3085 edge_iterator ei, ei2;
3086 edge e, e2;
3087 gimple *stmt;
3088 gphi_iterator gsi;
3089 bool other_edge_seen = false;
3090 bool is_cond;
3092 if (test_bb == bb)
3093 return false;
3094 /* Check last stmt first. */
3095 stmt = last_stmt (bb);
3096 if (stmt == NULL
3097 || (gimple_code (stmt) != GIMPLE_COND
3098 && (backward || !final_range_test_p (stmt)))
3099 || gimple_visited_p (stmt)
3100 || stmt_could_throw_p (stmt)
3101 || *other_bb == bb)
3102 return false;
3103 is_cond = gimple_code (stmt) == GIMPLE_COND;
3104 if (is_cond)
3106 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3107 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3108 to *OTHER_BB (if not set yet, try to find it out). */
3109 if (EDGE_COUNT (bb->succs) != 2)
3110 return false;
3111 FOR_EACH_EDGE (e, ei, bb->succs)
3113 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3114 return false;
3115 if (e->dest == test_bb)
3117 if (backward)
3118 continue;
3119 else
3120 return false;
3122 if (e->dest == bb)
3123 return false;
3124 if (*other_bb == NULL)
3126 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3127 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3128 return false;
3129 else if (e->dest == e2->dest)
3130 *other_bb = e->dest;
3131 if (*other_bb == NULL)
3132 return false;
3134 if (e->dest == *other_bb)
3135 other_edge_seen = true;
3136 else if (backward)
3137 return false;
3139 if (*other_bb == NULL || !other_edge_seen)
3140 return false;
3142 else if (single_succ (bb) != *other_bb)
3143 return false;
3145 /* Now check all PHIs of *OTHER_BB. */
3146 e = find_edge (bb, *other_bb);
3147 e2 = find_edge (test_bb, *other_bb);
3148 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3150 gphi *phi = gsi.phi ();
3151 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3152 corresponding to BB and TEST_BB predecessor must be the same. */
3153 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3154 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3156 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3157 one of the PHIs should have the lhs of the last stmt in
3158 that block as PHI arg and that PHI should have 0 or 1
3159 corresponding to it in all other range test basic blocks
3160 considered. */
3161 if (!is_cond)
3163 if (gimple_phi_arg_def (phi, e->dest_idx)
3164 == gimple_assign_lhs (stmt)
3165 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3166 || integer_onep (gimple_phi_arg_def (phi,
3167 e2->dest_idx))))
3168 continue;
3170 else
3172 gimple *test_last = last_stmt (test_bb);
3173 if (gimple_code (test_last) != GIMPLE_COND
3174 && gimple_phi_arg_def (phi, e2->dest_idx)
3175 == gimple_assign_lhs (test_last)
3176 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3177 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3178 continue;
3181 return false;
3184 return true;
3187 /* Return true if BB doesn't have side-effects that would disallow
3188 range test optimization, all SSA_NAMEs set in the bb are consumed
3189 in the bb and there are no PHIs. */
3191 static bool
3192 no_side_effect_bb (basic_block bb)
3194 gimple_stmt_iterator gsi;
3195 gimple *last;
3197 if (!gimple_seq_empty_p (phi_nodes (bb)))
3198 return false;
3199 last = last_stmt (bb);
3200 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3202 gimple *stmt = gsi_stmt (gsi);
3203 tree lhs;
3204 imm_use_iterator imm_iter;
3205 use_operand_p use_p;
3207 if (is_gimple_debug (stmt))
3208 continue;
3209 if (gimple_has_side_effects (stmt))
3210 return false;
3211 if (stmt == last)
3212 return true;
3213 if (!is_gimple_assign (stmt))
3214 return false;
3215 lhs = gimple_assign_lhs (stmt);
3216 if (TREE_CODE (lhs) != SSA_NAME)
3217 return false;
3218 if (gimple_assign_rhs_could_trap_p (stmt))
3219 return false;
3220 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3222 gimple *use_stmt = USE_STMT (use_p);
3223 if (is_gimple_debug (use_stmt))
3224 continue;
3225 if (gimple_bb (use_stmt) != bb)
3226 return false;
3229 return false;
3232 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3233 return true and fill in *OPS recursively. */
3235 static bool
3236 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3237 struct loop *loop)
3239 gimple *stmt = SSA_NAME_DEF_STMT (var);
3240 tree rhs[2];
3241 int i;
3243 if (!is_reassociable_op (stmt, code, loop))
3244 return false;
3246 rhs[0] = gimple_assign_rhs1 (stmt);
3247 rhs[1] = gimple_assign_rhs2 (stmt);
3248 gimple_set_visited (stmt, true);
3249 for (i = 0; i < 2; i++)
3250 if (TREE_CODE (rhs[i]) == SSA_NAME
3251 && !get_ops (rhs[i], code, ops, loop)
3252 && has_single_use (rhs[i]))
3254 operand_entry *oe = operand_entry_pool.allocate ();
3256 oe->op = rhs[i];
3257 oe->rank = code;
3258 oe->id = 0;
3259 oe->count = 1;
3260 oe->stmt_to_insert = NULL;
3261 ops->safe_push (oe);
3263 return true;
3266 /* Find the ops that were added by get_ops starting from VAR, see if
3267 they were changed during update_range_test and if yes, create new
3268 stmts. */
3270 static tree
3271 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3272 unsigned int *pidx, struct loop *loop)
3274 gimple *stmt = SSA_NAME_DEF_STMT (var);
3275 tree rhs[4];
3276 int i;
3278 if (!is_reassociable_op (stmt, code, loop))
3279 return NULL;
3281 rhs[0] = gimple_assign_rhs1 (stmt);
3282 rhs[1] = gimple_assign_rhs2 (stmt);
3283 rhs[2] = rhs[0];
3284 rhs[3] = rhs[1];
3285 for (i = 0; i < 2; i++)
3286 if (TREE_CODE (rhs[i]) == SSA_NAME)
3288 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3289 if (rhs[2 + i] == NULL_TREE)
3291 if (has_single_use (rhs[i]))
3292 rhs[2 + i] = ops[(*pidx)++]->op;
3293 else
3294 rhs[2 + i] = rhs[i];
3297 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3298 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3300 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3301 var = make_ssa_name (TREE_TYPE (var));
3302 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3303 rhs[2], rhs[3]);
3304 gimple_set_uid (g, gimple_uid (stmt));
3305 gimple_set_visited (g, true);
3306 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3308 return var;
3311 /* Structure to track the initial value passed to get_ops and
3312 the range in the ops vector for each basic block. */
3314 struct inter_bb_range_test_entry
3316 tree op;
3317 unsigned int first_idx, last_idx;
3320 /* Inter-bb range test optimization.
3322 Returns TRUE if a gimple conditional is optimized to a true/false,
3323 otherwise return FALSE.
3325 This indicates to the caller that it should run a CFG cleanup pass
3326 once reassociation is completed. */
3328 static bool
3329 maybe_optimize_range_tests (gimple *stmt)
3331 basic_block first_bb = gimple_bb (stmt);
3332 basic_block last_bb = first_bb;
3333 basic_block other_bb = NULL;
3334 basic_block bb;
3335 edge_iterator ei;
3336 edge e;
3337 auto_vec<operand_entry *> ops;
3338 auto_vec<inter_bb_range_test_entry> bbinfo;
3339 bool any_changes = false;
3340 bool cfg_cleanup_needed = false;
3342 /* Consider only basic blocks that end with GIMPLE_COND or
3343 a cast statement satisfying final_range_test_p. All
3344 but the last bb in the first_bb .. last_bb range
3345 should end with GIMPLE_COND. */
3346 if (gimple_code (stmt) == GIMPLE_COND)
3348 if (EDGE_COUNT (first_bb->succs) != 2)
3349 return cfg_cleanup_needed;
3351 else if (final_range_test_p (stmt))
3352 other_bb = single_succ (first_bb);
3353 else
3354 return cfg_cleanup_needed;
3356 if (stmt_could_throw_p (stmt))
3357 return cfg_cleanup_needed;
3359 /* As relative ordering of post-dominator sons isn't fixed,
3360 maybe_optimize_range_tests can be called first on any
3361 bb in the range we want to optimize. So, start searching
3362 backwards, if first_bb can be set to a predecessor. */
3363 while (single_pred_p (first_bb))
3365 basic_block pred_bb = single_pred (first_bb);
3366 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3367 break;
3368 if (!no_side_effect_bb (first_bb))
3369 break;
3370 first_bb = pred_bb;
3372 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3373 Before starting forward search in last_bb successors, find
3374 out the other_bb. */
3375 if (first_bb == last_bb)
3377 other_bb = NULL;
3378 /* As non-GIMPLE_COND last stmt always terminates the range,
3379 if forward search didn't discover anything, just give up. */
3380 if (gimple_code (stmt) != GIMPLE_COND)
3381 return cfg_cleanup_needed;
3382 /* Look at both successors. Either it ends with a GIMPLE_COND
3383 and satisfies suitable_cond_bb, or ends with a cast and
3384 other_bb is that cast's successor. */
3385 FOR_EACH_EDGE (e, ei, first_bb->succs)
3386 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3387 || e->dest == first_bb)
3388 return cfg_cleanup_needed;
3389 else if (single_pred_p (e->dest))
3391 stmt = last_stmt (e->dest);
3392 if (stmt
3393 && gimple_code (stmt) == GIMPLE_COND
3394 && EDGE_COUNT (e->dest->succs) == 2)
3396 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3397 break;
3398 else
3399 other_bb = NULL;
3401 else if (stmt
3402 && final_range_test_p (stmt)
3403 && find_edge (first_bb, single_succ (e->dest)))
3405 other_bb = single_succ (e->dest);
3406 if (other_bb == first_bb)
3407 other_bb = NULL;
3410 if (other_bb == NULL)
3411 return cfg_cleanup_needed;
3413 /* Now do the forward search, moving last_bb to successor bbs
3414 that aren't other_bb. */
3415 while (EDGE_COUNT (last_bb->succs) == 2)
3417 FOR_EACH_EDGE (e, ei, last_bb->succs)
3418 if (e->dest != other_bb)
3419 break;
3420 if (e == NULL)
3421 break;
3422 if (!single_pred_p (e->dest))
3423 break;
3424 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3425 break;
3426 if (!no_side_effect_bb (e->dest))
3427 break;
3428 last_bb = e->dest;
3430 if (first_bb == last_bb)
3431 return cfg_cleanup_needed;
3432 /* Here basic blocks first_bb through last_bb's predecessor
3433 end with GIMPLE_COND, all of them have one of the edges to
3434 other_bb and another to another block in the range,
3435 all blocks except first_bb don't have side-effects and
3436 last_bb ends with either GIMPLE_COND, or cast satisfying
3437 final_range_test_p. */
3438 for (bb = last_bb; ; bb = single_pred (bb))
3440 enum tree_code code;
3441 tree lhs, rhs;
3442 inter_bb_range_test_entry bb_ent;
3444 bb_ent.op = NULL_TREE;
3445 bb_ent.first_idx = ops.length ();
3446 bb_ent.last_idx = bb_ent.first_idx;
3447 e = find_edge (bb, other_bb);
3448 stmt = last_stmt (bb);
3449 gimple_set_visited (stmt, true);
3450 if (gimple_code (stmt) != GIMPLE_COND)
3452 use_operand_p use_p;
3453 gimple *phi;
3454 edge e2;
3455 unsigned int d;
3457 lhs = gimple_assign_lhs (stmt);
3458 rhs = gimple_assign_rhs1 (stmt);
3459 gcc_assert (bb == last_bb);
3461 /* stmt is
3462 _123 = (int) _234;
3464 followed by:
3465 <bb M>:
3466 # _345 = PHI <_123(N), 1(...), 1(...)>
3468 or 0 instead of 1. If it is 0, the _234
3469 range test is anded together with all the
3470 other range tests, if it is 1, it is ored with
3471 them. */
3472 single_imm_use (lhs, &use_p, &phi);
3473 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3474 e2 = find_edge (first_bb, other_bb);
3475 d = e2->dest_idx;
3476 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3477 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3478 code = BIT_AND_EXPR;
3479 else
3481 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3482 code = BIT_IOR_EXPR;
3485 /* If _234 SSA_NAME_DEF_STMT is
3486 _234 = _567 | _789;
3487 (or &, corresponding to 1/0 in the phi arguments,
3488 push into ops the individual range test arguments
3489 of the bitwise or resp. and, recursively. */
3490 if (!get_ops (rhs, code, &ops,
3491 loop_containing_stmt (stmt))
3492 && has_single_use (rhs))
3494 /* Otherwise, push the _234 range test itself. */
3495 operand_entry *oe = operand_entry_pool.allocate ();
3497 oe->op = rhs;
3498 oe->rank = code;
3499 oe->id = 0;
3500 oe->count = 1;
3501 oe->stmt_to_insert = NULL;
3502 ops.safe_push (oe);
3503 bb_ent.last_idx++;
3505 else
3506 bb_ent.last_idx = ops.length ();
3507 bb_ent.op = rhs;
3508 bbinfo.safe_push (bb_ent);
3509 continue;
3511 /* Otherwise stmt is GIMPLE_COND. */
3512 code = gimple_cond_code (stmt);
3513 lhs = gimple_cond_lhs (stmt);
3514 rhs = gimple_cond_rhs (stmt);
3515 if (TREE_CODE (lhs) == SSA_NAME
3516 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3517 && ((code != EQ_EXPR && code != NE_EXPR)
3518 || rhs != boolean_false_node
3519 /* Either push into ops the individual bitwise
3520 or resp. and operands, depending on which
3521 edge is other_bb. */
3522 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3523 ^ (code == EQ_EXPR))
3524 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3525 loop_containing_stmt (stmt))))
3527 /* Or push the GIMPLE_COND stmt itself. */
3528 operand_entry *oe = operand_entry_pool.allocate ();
3530 oe->op = NULL;
3531 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3532 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3533 /* oe->op = NULL signs that there is no SSA_NAME
3534 for the range test, and oe->id instead is the
3535 basic block number, at which's end the GIMPLE_COND
3536 is. */
3537 oe->id = bb->index;
3538 oe->count = 1;
3539 oe->stmt_to_insert = NULL;
3540 ops.safe_push (oe);
3541 bb_ent.op = NULL;
3542 bb_ent.last_idx++;
3544 else if (ops.length () > bb_ent.first_idx)
3546 bb_ent.op = lhs;
3547 bb_ent.last_idx = ops.length ();
3549 bbinfo.safe_push (bb_ent);
3550 if (bb == first_bb)
3551 break;
3553 if (ops.length () > 1)
3554 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3555 if (any_changes)
3557 unsigned int idx, max_idx = 0;
3558 /* update_ops relies on has_single_use predicates returning the
3559 same values as it did during get_ops earlier. Additionally it
3560 never removes statements, only adds new ones and it should walk
3561 from the single imm use and check the predicate already before
3562 making those changes.
3563 On the other side, the handling of GIMPLE_COND directly can turn
3564 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3565 it needs to be done in a separate loop afterwards. */
3566 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3568 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3569 && bbinfo[idx].op != NULL_TREE)
3571 tree new_op;
3573 max_idx = idx;
3574 stmt = last_stmt (bb);
3575 new_op = update_ops (bbinfo[idx].op,
3576 (enum tree_code)
3577 ops[bbinfo[idx].first_idx]->rank,
3578 ops, &bbinfo[idx].first_idx,
3579 loop_containing_stmt (stmt));
3580 if (new_op == NULL_TREE)
3582 gcc_assert (bb == last_bb);
3583 new_op = ops[bbinfo[idx].first_idx++]->op;
3585 if (bbinfo[idx].op != new_op)
3587 imm_use_iterator iter;
3588 use_operand_p use_p;
3589 gimple *use_stmt, *cast_stmt = NULL;
3591 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3592 if (is_gimple_debug (use_stmt))
3593 continue;
3594 else if (gimple_code (use_stmt) == GIMPLE_COND
3595 || gimple_code (use_stmt) == GIMPLE_PHI)
3596 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3597 SET_USE (use_p, new_op);
3598 else if (gimple_assign_cast_p (use_stmt))
3599 cast_stmt = use_stmt;
3600 else
3601 gcc_unreachable ();
3602 if (cast_stmt)
3604 gcc_assert (bb == last_bb);
3605 tree lhs = gimple_assign_lhs (cast_stmt);
3606 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3607 enum tree_code rhs_code
3608 = gimple_assign_rhs_code (cast_stmt);
3609 gassign *g;
3610 if (is_gimple_min_invariant (new_op))
3612 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3613 g = gimple_build_assign (new_lhs, new_op);
3615 else
3616 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3617 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3618 gimple_set_uid (g, gimple_uid (cast_stmt));
3619 gimple_set_visited (g, true);
3620 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3621 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3622 if (is_gimple_debug (use_stmt))
3623 continue;
3624 else if (gimple_code (use_stmt) == GIMPLE_COND
3625 || gimple_code (use_stmt) == GIMPLE_PHI)
3626 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3627 SET_USE (use_p, new_lhs);
3628 else
3629 gcc_unreachable ();
3633 if (bb == first_bb)
3634 break;
3636 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3638 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3639 && bbinfo[idx].op == NULL_TREE
3640 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3642 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3644 if (idx > max_idx)
3645 max_idx = idx;
3647 /* If we collapse the conditional to a true/false
3648 condition, then bubble that knowledge up to our caller. */
3649 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3651 gimple_cond_make_false (cond_stmt);
3652 cfg_cleanup_needed = true;
3654 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3656 gimple_cond_make_true (cond_stmt);
3657 cfg_cleanup_needed = true;
3659 else
3661 gimple_cond_set_code (cond_stmt, NE_EXPR);
3662 gimple_cond_set_lhs (cond_stmt,
3663 ops[bbinfo[idx].first_idx]->op);
3664 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3666 update_stmt (cond_stmt);
3668 if (bb == first_bb)
3669 break;
3672 /* The above changes could result in basic blocks after the first
3673 modified one, up to and including last_bb, to be executed even if
3674 they would not be in the original program. If the value ranges of
3675 assignment lhs' in those bbs were dependent on the conditions
3676 guarding those basic blocks which now can change, the VRs might
3677 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
3678 are only used within the same bb, it should be not a big deal if
3679 we just reset all the VRs in those bbs. See PR68671. */
3680 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
3681 reset_flow_sensitive_info_in_bb (bb);
3683 return cfg_cleanup_needed;
3686 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3687 of STMT in it's operands. This is also known as a "destructive
3688 update" operation. */
3690 static bool
3691 is_phi_for_stmt (gimple *stmt, tree operand)
3693 gimple *def_stmt;
3694 gphi *def_phi;
3695 tree lhs;
3696 use_operand_p arg_p;
3697 ssa_op_iter i;
3699 if (TREE_CODE (operand) != SSA_NAME)
3700 return false;
3702 lhs = gimple_assign_lhs (stmt);
3704 def_stmt = SSA_NAME_DEF_STMT (operand);
3705 def_phi = dyn_cast <gphi *> (def_stmt);
3706 if (!def_phi)
3707 return false;
3709 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3710 if (lhs == USE_FROM_PTR (arg_p))
3711 return true;
3712 return false;
3715 /* Remove def stmt of VAR if VAR has zero uses and recurse
3716 on rhs1 operand if so. */
3718 static void
3719 remove_visited_stmt_chain (tree var)
3721 gimple *stmt;
3722 gimple_stmt_iterator gsi;
3724 while (1)
3726 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3727 return;
3728 stmt = SSA_NAME_DEF_STMT (var);
3729 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3731 var = gimple_assign_rhs1 (stmt);
3732 gsi = gsi_for_stmt (stmt);
3733 reassoc_remove_stmt (&gsi);
3734 release_defs (stmt);
3736 else
3737 return;
3741 /* This function checks three consequtive operands in
3742 passed operands vector OPS starting from OPINDEX and
3743 swaps two operands if it is profitable for binary operation
3744 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3746 We pair ops with the same rank if possible.
3748 The alternative we try is to see if STMT is a destructive
3749 update style statement, which is like:
3750 b = phi (a, ...)
3751 a = c + b;
3752 In that case, we want to use the destructive update form to
3753 expose the possible vectorizer sum reduction opportunity.
3754 In that case, the third operand will be the phi node. This
3755 check is not performed if STMT is null.
3757 We could, of course, try to be better as noted above, and do a
3758 lot of work to try to find these opportunities in >3 operand
3759 cases, but it is unlikely to be worth it. */
3761 static void
3762 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
3763 unsigned int opindex, gimple *stmt)
3765 operand_entry *oe1, *oe2, *oe3;
3767 oe1 = ops[opindex];
3768 oe2 = ops[opindex + 1];
3769 oe3 = ops[opindex + 2];
3771 if ((oe1->rank == oe2->rank
3772 && oe2->rank != oe3->rank)
3773 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3774 && !is_phi_for_stmt (stmt, oe1->op)
3775 && !is_phi_for_stmt (stmt, oe2->op)))
3776 std::swap (*oe1, *oe3);
3777 else if ((oe1->rank == oe3->rank
3778 && oe2->rank != oe3->rank)
3779 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3780 && !is_phi_for_stmt (stmt, oe1->op)
3781 && !is_phi_for_stmt (stmt, oe3->op)))
3782 std::swap (*oe1, *oe2);
3785 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3786 two definitions, otherwise return STMT. */
3788 static inline gimple *
3789 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
3791 if (TREE_CODE (rhs1) == SSA_NAME
3792 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3793 stmt = SSA_NAME_DEF_STMT (rhs1);
3794 if (TREE_CODE (rhs2) == SSA_NAME
3795 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3796 stmt = SSA_NAME_DEF_STMT (rhs2);
3797 return stmt;
3800 /* If the stmt that defines operand has to be inserted, insert it
3801 before the use. */
3802 static void
3803 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
3805 gcc_assert (is_gimple_assign (stmt_to_insert));
3806 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
3807 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
3808 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
3809 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
3810 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
3812 /* If the insert point is not stmt, then insert_point would be
3813 the point where operand rhs1 or rhs2 is defined. In this case,
3814 stmt_to_insert has to be inserted afterwards. This would
3815 only happen when the stmt insertion point is flexible. */
3816 if (stmt == insert_point)
3817 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
3818 else
3819 insert_stmt_after (stmt_to_insert, insert_point);
3823 /* Recursively rewrite our linearized statements so that the operators
3824 match those in OPS[OPINDEX], putting the computation in rank
3825 order. Return new lhs. */
3827 static tree
3828 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
3829 vec<operand_entry *> ops, bool changed)
3831 tree rhs1 = gimple_assign_rhs1 (stmt);
3832 tree rhs2 = gimple_assign_rhs2 (stmt);
3833 tree lhs = gimple_assign_lhs (stmt);
3834 operand_entry *oe;
3836 /* The final recursion case for this function is that you have
3837 exactly two operations left.
3838 If we had exactly one op in the entire list to start with, we
3839 would have never called this function, and the tail recursion
3840 rewrites them one at a time. */
3841 if (opindex + 2 == ops.length ())
3843 operand_entry *oe1, *oe2;
3845 oe1 = ops[opindex];
3846 oe2 = ops[opindex + 1];
3848 if (rhs1 != oe1->op || rhs2 != oe2->op)
3850 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3851 unsigned int uid = gimple_uid (stmt);
3853 if (dump_file && (dump_flags & TDF_DETAILS))
3855 fprintf (dump_file, "Transforming ");
3856 print_gimple_stmt (dump_file, stmt, 0, 0);
3859 /* If the stmt that defines operand has to be inserted, insert it
3860 before the use. */
3861 if (oe1->stmt_to_insert)
3862 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
3863 if (oe2->stmt_to_insert)
3864 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
3865 /* Even when changed is false, reassociation could have e.g. removed
3866 some redundant operations, so unless we are just swapping the
3867 arguments or unless there is no change at all (then we just
3868 return lhs), force creation of a new SSA_NAME. */
3869 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3871 gimple *insert_point
3872 = find_insert_point (stmt, oe1->op, oe2->op);
3873 lhs = make_ssa_name (TREE_TYPE (lhs));
3874 stmt
3875 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3876 oe1->op, oe2->op);
3877 gimple_set_uid (stmt, uid);
3878 gimple_set_visited (stmt, true);
3879 if (insert_point == gsi_stmt (gsi))
3880 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3881 else
3882 insert_stmt_after (stmt, insert_point);
3884 else
3886 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3887 == stmt);
3888 gimple_assign_set_rhs1 (stmt, oe1->op);
3889 gimple_assign_set_rhs2 (stmt, oe2->op);
3890 update_stmt (stmt);
3893 if (rhs1 != oe1->op && rhs1 != oe2->op)
3894 remove_visited_stmt_chain (rhs1);
3896 if (dump_file && (dump_flags & TDF_DETAILS))
3898 fprintf (dump_file, " into ");
3899 print_gimple_stmt (dump_file, stmt, 0, 0);
3902 return lhs;
3905 /* If we hit here, we should have 3 or more ops left. */
3906 gcc_assert (opindex + 2 < ops.length ());
3908 /* Rewrite the next operator. */
3909 oe = ops[opindex];
3911 /* If the stmt that defines operand has to be inserted, insert it
3912 before the use. */
3913 if (oe->stmt_to_insert)
3914 insert_stmt_before_use (stmt, oe->stmt_to_insert);
3916 /* Recurse on the LHS of the binary operator, which is guaranteed to
3917 be the non-leaf side. */
3918 tree new_rhs1
3919 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3920 changed || oe->op != rhs2);
3922 if (oe->op != rhs2 || new_rhs1 != rhs1)
3924 if (dump_file && (dump_flags & TDF_DETAILS))
3926 fprintf (dump_file, "Transforming ");
3927 print_gimple_stmt (dump_file, stmt, 0, 0);
3930 /* If changed is false, this is either opindex == 0
3931 or all outer rhs2's were equal to corresponding oe->op,
3932 and powi_result is NULL.
3933 That means lhs is equivalent before and after reassociation.
3934 Otherwise ensure the old lhs SSA_NAME is not reused and
3935 create a new stmt as well, so that any debug stmts will be
3936 properly adjusted. */
3937 if (changed)
3939 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3940 unsigned int uid = gimple_uid (stmt);
3941 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3943 lhs = make_ssa_name (TREE_TYPE (lhs));
3944 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3945 new_rhs1, oe->op);
3946 gimple_set_uid (stmt, uid);
3947 gimple_set_visited (stmt, true);
3948 if (insert_point == gsi_stmt (gsi))
3949 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3950 else
3951 insert_stmt_after (stmt, insert_point);
3953 else
3955 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3956 == stmt);
3957 gimple_assign_set_rhs1 (stmt, new_rhs1);
3958 gimple_assign_set_rhs2 (stmt, oe->op);
3959 update_stmt (stmt);
3962 if (dump_file && (dump_flags & TDF_DETAILS))
3964 fprintf (dump_file, " into ");
3965 print_gimple_stmt (dump_file, stmt, 0, 0);
3968 return lhs;
3971 /* Find out how many cycles we need to compute statements chain.
3972 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3973 maximum number of independent statements we may execute per cycle. */
3975 static int
3976 get_required_cycles (int ops_num, int cpu_width)
3978 int res;
3979 int elog;
3980 unsigned int rest;
3982 /* While we have more than 2 * cpu_width operands
3983 we may reduce number of operands by cpu_width
3984 per cycle. */
3985 res = ops_num / (2 * cpu_width);
3987 /* Remained operands count may be reduced twice per cycle
3988 until we have only one operand. */
3989 rest = (unsigned)(ops_num - res * cpu_width);
3990 elog = exact_log2 (rest);
3991 if (elog >= 0)
3992 res += elog;
3993 else
3994 res += floor_log2 (rest) + 1;
3996 return res;
3999 /* Returns an optimal number of registers to use for computation of
4000 given statements. */
4002 static int
4003 get_reassociation_width (int ops_num, enum tree_code opc,
4004 machine_mode mode)
4006 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4007 int width;
4008 int width_min;
4009 int cycles_best;
4011 if (param_width > 0)
4012 width = param_width;
4013 else
4014 width = targetm.sched.reassociation_width (opc, mode);
4016 if (width == 1)
4017 return width;
4019 /* Get the minimal time required for sequence computation. */
4020 cycles_best = get_required_cycles (ops_num, width);
4022 /* Check if we may use less width and still compute sequence for
4023 the same time. It will allow us to reduce registers usage.
4024 get_required_cycles is monotonically increasing with lower width
4025 so we can perform a binary search for the minimal width that still
4026 results in the optimal cycle count. */
4027 width_min = 1;
4028 while (width > width_min)
4030 int width_mid = (width + width_min) / 2;
4032 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4033 width = width_mid;
4034 else if (width_min < width_mid)
4035 width_min = width_mid;
4036 else
4037 break;
4040 return width;
4043 /* Recursively rewrite our linearized statements so that the operators
4044 match those in OPS[OPINDEX], putting the computation in rank
4045 order and trying to allow operations to be executed in
4046 parallel. */
4048 static void
4049 rewrite_expr_tree_parallel (gassign *stmt, int width,
4050 vec<operand_entry *> ops)
4052 enum tree_code opcode = gimple_assign_rhs_code (stmt);
4053 int op_num = ops.length ();
4054 int stmt_num = op_num - 1;
4055 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4056 int op_index = op_num - 1;
4057 int stmt_index = 0;
4058 int ready_stmts_end = 0;
4059 int i = 0;
4060 gimple *stmt1 = NULL, *stmt2 = NULL;
4061 tree last_rhs1 = gimple_assign_rhs1 (stmt);
4063 /* We start expression rewriting from the top statements.
4064 So, in this loop we create a full list of statements
4065 we will work with. */
4066 stmts[stmt_num - 1] = stmt;
4067 for (i = stmt_num - 2; i >= 0; i--)
4068 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4070 for (i = 0; i < stmt_num; i++)
4072 tree op1, op2;
4074 /* Determine whether we should use results of
4075 already handled statements or not. */
4076 if (ready_stmts_end == 0
4077 && (i - stmt_index >= width || op_index < 1))
4078 ready_stmts_end = i;
4080 /* Now we choose operands for the next statement. Non zero
4081 value in ready_stmts_end means here that we should use
4082 the result of already generated statements as new operand. */
4083 if (ready_stmts_end > 0)
4085 op1 = gimple_assign_lhs (stmts[stmt_index++]);
4086 if (ready_stmts_end > stmt_index)
4087 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4088 else if (op_index >= 0)
4090 operand_entry *oe = ops[op_index--];
4091 stmt2 = oe->stmt_to_insert;
4092 op2 = oe->op;
4094 else
4096 gcc_assert (stmt_index < i);
4097 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4100 if (stmt_index >= ready_stmts_end)
4101 ready_stmts_end = 0;
4103 else
4105 if (op_index > 1)
4106 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4107 operand_entry *oe2 = ops[op_index--];
4108 operand_entry *oe1 = ops[op_index--];
4109 op2 = oe2->op;
4110 stmt2 = oe2->stmt_to_insert;
4111 op1 = oe1->op;
4112 stmt1 = oe1->stmt_to_insert;
4115 /* If we emit the last statement then we should put
4116 operands into the last statement. It will also
4117 break the loop. */
4118 if (op_index < 0 && stmt_index == i)
4119 i = stmt_num - 1;
4121 if (dump_file && (dump_flags & TDF_DETAILS))
4123 fprintf (dump_file, "Transforming ");
4124 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4127 /* If the stmt that defines operand has to be inserted, insert it
4128 before the use. */
4129 if (stmt1)
4130 insert_stmt_before_use (stmts[i], stmt1);
4131 if (stmt2)
4132 insert_stmt_before_use (stmts[i], stmt2);
4133 stmt1 = stmt2 = NULL;
4135 /* We keep original statement only for the last one. All
4136 others are recreated. */
4137 if (i == stmt_num - 1)
4139 gimple_assign_set_rhs1 (stmts[i], op1);
4140 gimple_assign_set_rhs2 (stmts[i], op2);
4141 update_stmt (stmts[i]);
4143 else
4145 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4147 if (dump_file && (dump_flags & TDF_DETAILS))
4149 fprintf (dump_file, " into ");
4150 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4154 remove_visited_stmt_chain (last_rhs1);
4157 /* Transform STMT, which is really (A +B) + (C + D) into the left
4158 linear form, ((A+B)+C)+D.
4159 Recurse on D if necessary. */
4161 static void
4162 linearize_expr (gimple *stmt)
4164 gimple_stmt_iterator gsi;
4165 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4166 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4167 gimple *oldbinrhs = binrhs;
4168 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4169 gimple *newbinrhs = NULL;
4170 struct loop *loop = loop_containing_stmt (stmt);
4171 tree lhs = gimple_assign_lhs (stmt);
4173 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4174 && is_reassociable_op (binrhs, rhscode, loop));
4176 gsi = gsi_for_stmt (stmt);
4178 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4179 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4180 gimple_assign_rhs_code (binrhs),
4181 gimple_assign_lhs (binlhs),
4182 gimple_assign_rhs2 (binrhs));
4183 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4184 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4185 gimple_set_uid (binrhs, gimple_uid (stmt));
4187 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4188 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4190 if (dump_file && (dump_flags & TDF_DETAILS))
4192 fprintf (dump_file, "Linearized: ");
4193 print_gimple_stmt (dump_file, stmt, 0, 0);
4196 reassociate_stats.linearized++;
4197 update_stmt (stmt);
4199 gsi = gsi_for_stmt (oldbinrhs);
4200 reassoc_remove_stmt (&gsi);
4201 release_defs (oldbinrhs);
4203 gimple_set_visited (stmt, true);
4204 gimple_set_visited (binlhs, true);
4205 gimple_set_visited (binrhs, true);
4207 /* Tail recurse on the new rhs if it still needs reassociation. */
4208 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4209 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4210 want to change the algorithm while converting to tuples. */
4211 linearize_expr (stmt);
4214 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4215 it. Otherwise, return NULL. */
4217 static gimple *
4218 get_single_immediate_use (tree lhs)
4220 use_operand_p immuse;
4221 gimple *immusestmt;
4223 if (TREE_CODE (lhs) == SSA_NAME
4224 && single_imm_use (lhs, &immuse, &immusestmt)
4225 && is_gimple_assign (immusestmt))
4226 return immusestmt;
4228 return NULL;
4231 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4232 representing the negated value. Insertions of any necessary
4233 instructions go before GSI.
4234 This function is recursive in that, if you hand it "a_5" as the
4235 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4236 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4238 static tree
4239 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4241 gimple *negatedefstmt = NULL;
4242 tree resultofnegate;
4243 gimple_stmt_iterator gsi;
4244 unsigned int uid;
4246 /* If we are trying to negate a name, defined by an add, negate the
4247 add operands instead. */
4248 if (TREE_CODE (tonegate) == SSA_NAME)
4249 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4250 if (TREE_CODE (tonegate) == SSA_NAME
4251 && is_gimple_assign (negatedefstmt)
4252 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4253 && has_single_use (gimple_assign_lhs (negatedefstmt))
4254 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4256 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4257 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4258 tree lhs = gimple_assign_lhs (negatedefstmt);
4259 gimple *g;
4261 gsi = gsi_for_stmt (negatedefstmt);
4262 rhs1 = negate_value (rhs1, &gsi);
4264 gsi = gsi_for_stmt (negatedefstmt);
4265 rhs2 = negate_value (rhs2, &gsi);
4267 gsi = gsi_for_stmt (negatedefstmt);
4268 lhs = make_ssa_name (TREE_TYPE (lhs));
4269 gimple_set_visited (negatedefstmt, true);
4270 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4271 gimple_set_uid (g, gimple_uid (negatedefstmt));
4272 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4273 return lhs;
4276 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4277 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4278 NULL_TREE, true, GSI_SAME_STMT);
4279 gsi = *gsip;
4280 uid = gimple_uid (gsi_stmt (gsi));
4281 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4283 gimple *stmt = gsi_stmt (gsi);
4284 if (gimple_uid (stmt) != 0)
4285 break;
4286 gimple_set_uid (stmt, uid);
4288 return resultofnegate;
4291 /* Return true if we should break up the subtract in STMT into an add
4292 with negate. This is true when we the subtract operands are really
4293 adds, or the subtract itself is used in an add expression. In
4294 either case, breaking up the subtract into an add with negate
4295 exposes the adds to reassociation. */
4297 static bool
4298 should_break_up_subtract (gimple *stmt)
4300 tree lhs = gimple_assign_lhs (stmt);
4301 tree binlhs = gimple_assign_rhs1 (stmt);
4302 tree binrhs = gimple_assign_rhs2 (stmt);
4303 gimple *immusestmt;
4304 struct loop *loop = loop_containing_stmt (stmt);
4306 if (TREE_CODE (binlhs) == SSA_NAME
4307 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4308 return true;
4310 if (TREE_CODE (binrhs) == SSA_NAME
4311 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4312 return true;
4314 if (TREE_CODE (lhs) == SSA_NAME
4315 && (immusestmt = get_single_immediate_use (lhs))
4316 && is_gimple_assign (immusestmt)
4317 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4318 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4319 return true;
4320 return false;
4323 /* Transform STMT from A - B into A + -B. */
4325 static void
4326 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4328 tree rhs1 = gimple_assign_rhs1 (stmt);
4329 tree rhs2 = gimple_assign_rhs2 (stmt);
4331 if (dump_file && (dump_flags & TDF_DETAILS))
4333 fprintf (dump_file, "Breaking up subtract ");
4334 print_gimple_stmt (dump_file, stmt, 0, 0);
4337 rhs2 = negate_value (rhs2, gsip);
4338 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4339 update_stmt (stmt);
4342 /* Determine whether STMT is a builtin call that raises an SSA name
4343 to an integer power and has only one use. If so, and this is early
4344 reassociation and unsafe math optimizations are permitted, place
4345 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4346 If any of these conditions does not hold, return FALSE. */
4348 static bool
4349 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4351 tree arg1;
4352 REAL_VALUE_TYPE c, cint;
4354 switch (gimple_call_combined_fn (stmt))
4356 CASE_CFN_POW:
4357 if (flag_errno_math)
4358 return false;
4360 *base = gimple_call_arg (stmt, 0);
4361 arg1 = gimple_call_arg (stmt, 1);
4363 if (TREE_CODE (arg1) != REAL_CST)
4364 return false;
4366 c = TREE_REAL_CST (arg1);
4368 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4369 return false;
4371 *exponent = real_to_integer (&c);
4372 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4373 if (!real_identical (&c, &cint))
4374 return false;
4376 break;
4378 CASE_CFN_POWI:
4379 *base = gimple_call_arg (stmt, 0);
4380 arg1 = gimple_call_arg (stmt, 1);
4382 if (!tree_fits_shwi_p (arg1))
4383 return false;
4385 *exponent = tree_to_shwi (arg1);
4386 break;
4388 default:
4389 return false;
4392 /* Expanding negative exponents is generally unproductive, so we don't
4393 complicate matters with those. Exponents of zero and one should
4394 have been handled by expression folding. */
4395 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4396 return false;
4398 return true;
4401 /* Try to derive and add operand entry for OP to *OPS. Return false if
4402 unsuccessful. */
4404 static bool
4405 try_special_add_to_ops (vec<operand_entry *> *ops,
4406 enum tree_code code,
4407 tree op, gimple* def_stmt)
4409 tree base = NULL_TREE;
4410 HOST_WIDE_INT exponent = 0;
4412 if (TREE_CODE (op) != SSA_NAME
4413 || ! has_single_use (op))
4414 return false;
4416 if (code == MULT_EXPR
4417 && reassoc_insert_powi_p
4418 && flag_unsafe_math_optimizations
4419 && is_gimple_call (def_stmt)
4420 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
4422 add_repeat_to_ops_vec (ops, base, exponent);
4423 gimple_set_visited (def_stmt, true);
4424 return true;
4426 else if (code == MULT_EXPR
4427 && is_gimple_assign (def_stmt)
4428 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
4429 && !HONOR_SNANS (TREE_TYPE (op))
4430 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
4431 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
4433 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4434 tree cst = build_minus_one_cst (TREE_TYPE (op));
4435 add_to_ops_vec (ops, rhs1);
4436 add_to_ops_vec (ops, cst);
4437 gimple_set_visited (def_stmt, true);
4438 return true;
4441 return false;
4444 /* Recursively linearize a binary expression that is the RHS of STMT.
4445 Place the operands of the expression tree in the vector named OPS. */
4447 static void
4448 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4449 bool is_associative, bool set_visited)
4451 tree binlhs = gimple_assign_rhs1 (stmt);
4452 tree binrhs = gimple_assign_rhs2 (stmt);
4453 gimple *binlhsdef = NULL, *binrhsdef = NULL;
4454 bool binlhsisreassoc = false;
4455 bool binrhsisreassoc = false;
4456 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4457 struct loop *loop = loop_containing_stmt (stmt);
4459 if (set_visited)
4460 gimple_set_visited (stmt, true);
4462 if (TREE_CODE (binlhs) == SSA_NAME)
4464 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4465 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4466 && !stmt_could_throw_p (binlhsdef));
4469 if (TREE_CODE (binrhs) == SSA_NAME)
4471 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4472 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4473 && !stmt_could_throw_p (binrhsdef));
4476 /* If the LHS is not reassociable, but the RHS is, we need to swap
4477 them. If neither is reassociable, there is nothing we can do, so
4478 just put them in the ops vector. If the LHS is reassociable,
4479 linearize it. If both are reassociable, then linearize the RHS
4480 and the LHS. */
4482 if (!binlhsisreassoc)
4484 /* If this is not a associative operation like division, give up. */
4485 if (!is_associative)
4487 add_to_ops_vec (ops, binrhs);
4488 return;
4491 if (!binrhsisreassoc)
4493 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4494 add_to_ops_vec (ops, binrhs);
4496 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
4497 add_to_ops_vec (ops, binlhs);
4499 return;
4502 if (dump_file && (dump_flags & TDF_DETAILS))
4504 fprintf (dump_file, "swapping operands of ");
4505 print_gimple_stmt (dump_file, stmt, 0, 0);
4508 swap_ssa_operands (stmt,
4509 gimple_assign_rhs1_ptr (stmt),
4510 gimple_assign_rhs2_ptr (stmt));
4511 update_stmt (stmt);
4513 if (dump_file && (dump_flags & TDF_DETAILS))
4515 fprintf (dump_file, " is now ");
4516 print_gimple_stmt (dump_file, stmt, 0, 0);
4519 /* We want to make it so the lhs is always the reassociative op,
4520 so swap. */
4521 std::swap (binlhs, binrhs);
4523 else if (binrhsisreassoc)
4525 linearize_expr (stmt);
4526 binlhs = gimple_assign_rhs1 (stmt);
4527 binrhs = gimple_assign_rhs2 (stmt);
4530 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4531 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4532 rhscode, loop));
4533 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4534 is_associative, set_visited);
4536 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4537 add_to_ops_vec (ops, binrhs);
4540 /* Repropagate the negates back into subtracts, since no other pass
4541 currently does it. */
4543 static void
4544 repropagate_negates (void)
4546 unsigned int i = 0;
4547 tree negate;
4549 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4551 gimple *user = get_single_immediate_use (negate);
4553 if (!user || !is_gimple_assign (user))
4554 continue;
4556 /* The negate operand can be either operand of a PLUS_EXPR
4557 (it can be the LHS if the RHS is a constant for example).
4559 Force the negate operand to the RHS of the PLUS_EXPR, then
4560 transform the PLUS_EXPR into a MINUS_EXPR. */
4561 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4563 /* If the negated operand appears on the LHS of the
4564 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4565 to force the negated operand to the RHS of the PLUS_EXPR. */
4566 if (gimple_assign_rhs1 (user) == negate)
4568 swap_ssa_operands (user,
4569 gimple_assign_rhs1_ptr (user),
4570 gimple_assign_rhs2_ptr (user));
4573 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4574 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4575 if (gimple_assign_rhs2 (user) == negate)
4577 tree rhs1 = gimple_assign_rhs1 (user);
4578 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
4579 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4580 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4581 update_stmt (user);
4584 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4586 if (gimple_assign_rhs1 (user) == negate)
4588 /* We have
4589 x = -a
4590 y = x - b
4591 which we transform into
4592 x = a + b
4593 y = -x .
4594 This pushes down the negate which we possibly can merge
4595 into some other operation, hence insert it into the
4596 plus_negates vector. */
4597 gimple *feed = SSA_NAME_DEF_STMT (negate);
4598 tree a = gimple_assign_rhs1 (feed);
4599 tree b = gimple_assign_rhs2 (user);
4600 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4601 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4602 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4603 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4604 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4605 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4606 user = gsi_stmt (gsi2);
4607 update_stmt (user);
4608 reassoc_remove_stmt (&gsi);
4609 release_defs (feed);
4610 plus_negates.safe_push (gimple_assign_lhs (user));
4612 else
4614 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4615 rid of one operation. */
4616 gimple *feed = SSA_NAME_DEF_STMT (negate);
4617 tree a = gimple_assign_rhs1 (feed);
4618 tree rhs1 = gimple_assign_rhs1 (user);
4619 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4620 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4621 update_stmt (gsi_stmt (gsi));
4627 /* Returns true if OP is of a type for which we can do reassociation.
4628 That is for integral or non-saturating fixed-point types, and for
4629 floating point type when associative-math is enabled. */
4631 static bool
4632 can_reassociate_p (tree op)
4634 tree type = TREE_TYPE (op);
4635 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4636 || NON_SAT_FIXED_POINT_TYPE_P (type)
4637 || (flag_associative_math && FLOAT_TYPE_P (type)))
4638 return true;
4639 return false;
4642 /* Break up subtract operations in block BB.
4644 We do this top down because we don't know whether the subtract is
4645 part of a possible chain of reassociation except at the top.
4647 IE given
4648 d = f + g
4649 c = a + e
4650 b = c - d
4651 q = b - r
4652 k = t - q
4654 we want to break up k = t - q, but we won't until we've transformed q
4655 = b - r, which won't be broken up until we transform b = c - d.
4657 En passant, clear the GIMPLE visited flag on every statement
4658 and set UIDs within each basic block. */
4660 static void
4661 break_up_subtract_bb (basic_block bb)
4663 gimple_stmt_iterator gsi;
4664 basic_block son;
4665 unsigned int uid = 1;
4667 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4669 gimple *stmt = gsi_stmt (gsi);
4670 gimple_set_visited (stmt, false);
4671 gimple_set_uid (stmt, uid++);
4673 if (!is_gimple_assign (stmt)
4674 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4675 continue;
4677 /* Look for simple gimple subtract operations. */
4678 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4680 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4681 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4682 continue;
4684 /* Check for a subtract used only in an addition. If this
4685 is the case, transform it into add of a negate for better
4686 reassociation. IE transform C = A-B into C = A + -B if C
4687 is only used in an addition. */
4688 if (should_break_up_subtract (stmt))
4689 break_up_subtract (stmt, &gsi);
4691 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4692 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4693 plus_negates.safe_push (gimple_assign_lhs (stmt));
4695 for (son = first_dom_son (CDI_DOMINATORS, bb);
4696 son;
4697 son = next_dom_son (CDI_DOMINATORS, son))
4698 break_up_subtract_bb (son);
4701 /* Used for repeated factor analysis. */
4702 struct repeat_factor
4704 /* An SSA name that occurs in a multiply chain. */
4705 tree factor;
4707 /* Cached rank of the factor. */
4708 unsigned rank;
4710 /* Number of occurrences of the factor in the chain. */
4711 HOST_WIDE_INT count;
4713 /* An SSA name representing the product of this factor and
4714 all factors appearing later in the repeated factor vector. */
4715 tree repr;
4719 static vec<repeat_factor> repeat_factor_vec;
4721 /* Used for sorting the repeat factor vector. Sort primarily by
4722 ascending occurrence count, secondarily by descending rank. */
4724 static int
4725 compare_repeat_factors (const void *x1, const void *x2)
4727 const repeat_factor *rf1 = (const repeat_factor *) x1;
4728 const repeat_factor *rf2 = (const repeat_factor *) x2;
4730 if (rf1->count != rf2->count)
4731 return rf1->count - rf2->count;
4733 return rf2->rank - rf1->rank;
4736 /* Look for repeated operands in OPS in the multiply tree rooted at
4737 STMT. Replace them with an optimal sequence of multiplies and powi
4738 builtin calls, and remove the used operands from OPS. Return an
4739 SSA name representing the value of the replacement sequence. */
4741 static tree
4742 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
4744 unsigned i, j, vec_len;
4745 int ii;
4746 operand_entry *oe;
4747 repeat_factor *rf1, *rf2;
4748 repeat_factor rfnew;
4749 tree result = NULL_TREE;
4750 tree target_ssa, iter_result;
4751 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4752 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4753 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4754 gimple *mul_stmt, *pow_stmt;
4756 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4757 target. */
4758 if (!powi_fndecl)
4759 return NULL_TREE;
4761 /* Allocate the repeated factor vector. */
4762 repeat_factor_vec.create (10);
4764 /* Scan the OPS vector for all SSA names in the product and build
4765 up a vector of occurrence counts for each factor. */
4766 FOR_EACH_VEC_ELT (*ops, i, oe)
4768 if (TREE_CODE (oe->op) == SSA_NAME)
4770 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4772 if (rf1->factor == oe->op)
4774 rf1->count += oe->count;
4775 break;
4779 if (j >= repeat_factor_vec.length ())
4781 rfnew.factor = oe->op;
4782 rfnew.rank = oe->rank;
4783 rfnew.count = oe->count;
4784 rfnew.repr = NULL_TREE;
4785 repeat_factor_vec.safe_push (rfnew);
4790 /* Sort the repeated factor vector by (a) increasing occurrence count,
4791 and (b) decreasing rank. */
4792 repeat_factor_vec.qsort (compare_repeat_factors);
4794 /* It is generally best to combine as many base factors as possible
4795 into a product before applying __builtin_powi to the result.
4796 However, the sort order chosen for the repeated factor vector
4797 allows us to cache partial results for the product of the base
4798 factors for subsequent use. When we already have a cached partial
4799 result from a previous iteration, it is best to make use of it
4800 before looking for another __builtin_pow opportunity.
4802 As an example, consider x * x * y * y * y * z * z * z * z.
4803 We want to first compose the product x * y * z, raise it to the
4804 second power, then multiply this by y * z, and finally multiply
4805 by z. This can be done in 5 multiplies provided we cache y * z
4806 for use in both expressions:
4808 t1 = y * z
4809 t2 = t1 * x
4810 t3 = t2 * t2
4811 t4 = t1 * t3
4812 result = t4 * z
4814 If we instead ignored the cached y * z and first multiplied by
4815 the __builtin_pow opportunity z * z, we would get the inferior:
4817 t1 = y * z
4818 t2 = t1 * x
4819 t3 = t2 * t2
4820 t4 = z * z
4821 t5 = t3 * t4
4822 result = t5 * y */
4824 vec_len = repeat_factor_vec.length ();
4826 /* Repeatedly look for opportunities to create a builtin_powi call. */
4827 while (true)
4829 HOST_WIDE_INT power;
4831 /* First look for the largest cached product of factors from
4832 preceding iterations. If found, create a builtin_powi for
4833 it if the minimum occurrence count for its factors is at
4834 least 2, or just use this cached product as our next
4835 multiplicand if the minimum occurrence count is 1. */
4836 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4838 if (rf1->repr && rf1->count > 0)
4839 break;
4842 if (j < vec_len)
4844 power = rf1->count;
4846 if (power == 1)
4848 iter_result = rf1->repr;
4850 if (dump_file && (dump_flags & TDF_DETAILS))
4852 unsigned elt;
4853 repeat_factor *rf;
4854 fputs ("Multiplying by cached product ", dump_file);
4855 for (elt = j; elt < vec_len; elt++)
4857 rf = &repeat_factor_vec[elt];
4858 print_generic_expr (dump_file, rf->factor, 0);
4859 if (elt < vec_len - 1)
4860 fputs (" * ", dump_file);
4862 fputs ("\n", dump_file);
4865 else
4867 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4868 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4869 build_int_cst (integer_type_node,
4870 power));
4871 gimple_call_set_lhs (pow_stmt, iter_result);
4872 gimple_set_location (pow_stmt, gimple_location (stmt));
4873 gimple_set_uid (pow_stmt, gimple_uid (stmt));
4874 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4876 if (dump_file && (dump_flags & TDF_DETAILS))
4878 unsigned elt;
4879 repeat_factor *rf;
4880 fputs ("Building __builtin_pow call for cached product (",
4881 dump_file);
4882 for (elt = j; elt < vec_len; elt++)
4884 rf = &repeat_factor_vec[elt];
4885 print_generic_expr (dump_file, rf->factor, 0);
4886 if (elt < vec_len - 1)
4887 fputs (" * ", dump_file);
4889 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4890 power);
4894 else
4896 /* Otherwise, find the first factor in the repeated factor
4897 vector whose occurrence count is at least 2. If no such
4898 factor exists, there are no builtin_powi opportunities
4899 remaining. */
4900 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4902 if (rf1->count >= 2)
4903 break;
4906 if (j >= vec_len)
4907 break;
4909 power = rf1->count;
4911 if (dump_file && (dump_flags & TDF_DETAILS))
4913 unsigned elt;
4914 repeat_factor *rf;
4915 fputs ("Building __builtin_pow call for (", dump_file);
4916 for (elt = j; elt < vec_len; elt++)
4918 rf = &repeat_factor_vec[elt];
4919 print_generic_expr (dump_file, rf->factor, 0);
4920 if (elt < vec_len - 1)
4921 fputs (" * ", dump_file);
4923 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4926 reassociate_stats.pows_created++;
4928 /* Visit each element of the vector in reverse order (so that
4929 high-occurrence elements are visited first, and within the
4930 same occurrence count, lower-ranked elements are visited
4931 first). Form a linear product of all elements in this order
4932 whose occurrencce count is at least that of element J.
4933 Record the SSA name representing the product of each element
4934 with all subsequent elements in the vector. */
4935 if (j == vec_len - 1)
4936 rf1->repr = rf1->factor;
4937 else
4939 for (ii = vec_len - 2; ii >= (int)j; ii--)
4941 tree op1, op2;
4943 rf1 = &repeat_factor_vec[ii];
4944 rf2 = &repeat_factor_vec[ii + 1];
4946 /* Init the last factor's representative to be itself. */
4947 if (!rf2->repr)
4948 rf2->repr = rf2->factor;
4950 op1 = rf1->factor;
4951 op2 = rf2->repr;
4953 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4954 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4955 op1, op2);
4956 gimple_set_location (mul_stmt, gimple_location (stmt));
4957 gimple_set_uid (mul_stmt, gimple_uid (stmt));
4958 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4959 rf1->repr = target_ssa;
4961 /* Don't reprocess the multiply we just introduced. */
4962 gimple_set_visited (mul_stmt, true);
4966 /* Form a call to __builtin_powi for the maximum product
4967 just formed, raised to the power obtained earlier. */
4968 rf1 = &repeat_factor_vec[j];
4969 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4970 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4971 build_int_cst (integer_type_node,
4972 power));
4973 gimple_call_set_lhs (pow_stmt, iter_result);
4974 gimple_set_location (pow_stmt, gimple_location (stmt));
4975 gimple_set_uid (pow_stmt, gimple_uid (stmt));
4976 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4979 /* If we previously formed at least one other builtin_powi call,
4980 form the product of this one and those others. */
4981 if (result)
4983 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4984 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4985 result, iter_result);
4986 gimple_set_location (mul_stmt, gimple_location (stmt));
4987 gimple_set_uid (mul_stmt, gimple_uid (stmt));
4988 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4989 gimple_set_visited (mul_stmt, true);
4990 result = new_result;
4992 else
4993 result = iter_result;
4995 /* Decrement the occurrence count of each element in the product
4996 by the count found above, and remove this many copies of each
4997 factor from OPS. */
4998 for (i = j; i < vec_len; i++)
5000 unsigned k = power;
5001 unsigned n;
5003 rf1 = &repeat_factor_vec[i];
5004 rf1->count -= power;
5006 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5008 if (oe->op == rf1->factor)
5010 if (oe->count <= k)
5012 ops->ordered_remove (n);
5013 k -= oe->count;
5015 if (k == 0)
5016 break;
5018 else
5020 oe->count -= k;
5021 break;
5028 /* At this point all elements in the repeated factor vector have a
5029 remaining occurrence count of 0 or 1, and those with a count of 1
5030 don't have cached representatives. Re-sort the ops vector and
5031 clean up. */
5032 ops->qsort (sort_by_operand_rank);
5033 repeat_factor_vec.release ();
5035 /* Return the final product computed herein. Note that there may
5036 still be some elements with single occurrence count left in OPS;
5037 those will be handled by the normal reassociation logic. */
5038 return result;
5041 /* Attempt to optimize
5042 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5043 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5045 static void
5046 attempt_builtin_copysign (vec<operand_entry *> *ops)
5048 operand_entry *oe;
5049 unsigned int i;
5050 unsigned int length = ops->length ();
5051 tree cst = ops->last ()->op;
5053 if (length == 1 || TREE_CODE (cst) != REAL_CST)
5054 return;
5056 FOR_EACH_VEC_ELT (*ops, i, oe)
5058 if (TREE_CODE (oe->op) == SSA_NAME
5059 && has_single_use (oe->op))
5061 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5062 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5064 tree arg0, arg1;
5065 switch (gimple_call_combined_fn (old_call))
5067 CASE_CFN_COPYSIGN:
5068 arg0 = gimple_call_arg (old_call, 0);
5069 arg1 = gimple_call_arg (old_call, 1);
5070 /* The first argument of copysign must be a constant,
5071 otherwise there's nothing to do. */
5072 if (TREE_CODE (arg0) == REAL_CST)
5074 tree type = TREE_TYPE (arg0);
5075 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5076 /* If we couldn't fold to a single constant, skip it.
5077 That happens e.g. for inexact multiplication when
5078 -frounding-math. */
5079 if (mul == NULL_TREE)
5080 break;
5081 /* Instead of adjusting OLD_CALL, let's build a new
5082 call to not leak the LHS and prevent keeping bogus
5083 debug statements. DCE will clean up the old call. */
5084 gcall *new_call;
5085 if (gimple_call_internal_p (old_call))
5086 new_call = gimple_build_call_internal
5087 (IFN_COPYSIGN, 2, mul, arg1);
5088 else
5089 new_call = gimple_build_call
5090 (gimple_call_fndecl (old_call), 2, mul, arg1);
5091 tree lhs = make_ssa_name (type);
5092 gimple_call_set_lhs (new_call, lhs);
5093 gimple_set_location (new_call,
5094 gimple_location (old_call));
5095 insert_stmt_after (new_call, old_call);
5096 /* We've used the constant, get rid of it. */
5097 ops->pop ();
5098 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5099 /* Handle the CST1 < 0 case by negating the result. */
5100 if (cst1_neg)
5102 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5103 gimple *negate_stmt
5104 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5105 insert_stmt_after (negate_stmt, new_call);
5106 oe->op = negrhs;
5108 else
5109 oe->op = lhs;
5110 if (dump_file && (dump_flags & TDF_DETAILS))
5112 fprintf (dump_file, "Optimizing copysign: ");
5113 print_generic_expr (dump_file, cst, 0);
5114 fprintf (dump_file, " * COPYSIGN (");
5115 print_generic_expr (dump_file, arg0, 0);
5116 fprintf (dump_file, ", ");
5117 print_generic_expr (dump_file, arg1, 0);
5118 fprintf (dump_file, ") into %sCOPYSIGN (",
5119 cst1_neg ? "-" : "");
5120 print_generic_expr (dump_file, mul, 0);
5121 fprintf (dump_file, ", ");
5122 print_generic_expr (dump_file, arg1, 0);
5123 fprintf (dump_file, "\n");
5125 return;
5127 break;
5128 default:
5129 break;
5136 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5138 static void
5139 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5141 tree rhs1;
5143 if (dump_file && (dump_flags & TDF_DETAILS))
5145 fprintf (dump_file, "Transforming ");
5146 print_gimple_stmt (dump_file, stmt, 0, 0);
5149 rhs1 = gimple_assign_rhs1 (stmt);
5150 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5151 update_stmt (stmt);
5152 remove_visited_stmt_chain (rhs1);
5154 if (dump_file && (dump_flags & TDF_DETAILS))
5156 fprintf (dump_file, " into ");
5157 print_gimple_stmt (dump_file, stmt, 0, 0);
5161 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5163 static void
5164 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5165 tree rhs1, tree rhs2)
5167 if (dump_file && (dump_flags & TDF_DETAILS))
5169 fprintf (dump_file, "Transforming ");
5170 print_gimple_stmt (dump_file, stmt, 0, 0);
5173 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5174 update_stmt (gsi_stmt (*gsi));
5175 remove_visited_stmt_chain (rhs1);
5177 if (dump_file && (dump_flags & TDF_DETAILS))
5179 fprintf (dump_file, " into ");
5180 print_gimple_stmt (dump_file, stmt, 0, 0);
5184 /* Reassociate expressions in basic block BB and its post-dominator as
5185 children.
5187 Bubble up return status from maybe_optimize_range_tests. */
5189 static bool
5190 reassociate_bb (basic_block bb)
5192 gimple_stmt_iterator gsi;
5193 basic_block son;
5194 gimple *stmt = last_stmt (bb);
5195 bool cfg_cleanup_needed = false;
5197 if (stmt && !gimple_visited_p (stmt))
5198 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5200 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5202 stmt = gsi_stmt (gsi);
5204 if (is_gimple_assign (stmt)
5205 && !stmt_could_throw_p (stmt))
5207 tree lhs, rhs1, rhs2;
5208 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5210 /* If this is not a gimple binary expression, there is
5211 nothing for us to do with it. */
5212 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5213 continue;
5215 /* If this was part of an already processed statement,
5216 we don't need to touch it again. */
5217 if (gimple_visited_p (stmt))
5219 /* This statement might have become dead because of previous
5220 reassociations. */
5221 if (has_zero_uses (gimple_get_lhs (stmt)))
5223 reassoc_remove_stmt (&gsi);
5224 release_defs (stmt);
5225 /* We might end up removing the last stmt above which
5226 places the iterator to the end of the sequence.
5227 Reset it to the last stmt in this case which might
5228 be the end of the sequence as well if we removed
5229 the last statement of the sequence. In which case
5230 we need to bail out. */
5231 if (gsi_end_p (gsi))
5233 gsi = gsi_last_bb (bb);
5234 if (gsi_end_p (gsi))
5235 break;
5238 continue;
5241 lhs = gimple_assign_lhs (stmt);
5242 rhs1 = gimple_assign_rhs1 (stmt);
5243 rhs2 = gimple_assign_rhs2 (stmt);
5245 /* For non-bit or min/max operations we can't associate
5246 all types. Verify that here. */
5247 if (rhs_code != BIT_IOR_EXPR
5248 && rhs_code != BIT_AND_EXPR
5249 && rhs_code != BIT_XOR_EXPR
5250 && rhs_code != MIN_EXPR
5251 && rhs_code != MAX_EXPR
5252 && (!can_reassociate_p (lhs)
5253 || !can_reassociate_p (rhs1)
5254 || !can_reassociate_p (rhs2)))
5255 continue;
5257 if (associative_tree_code (rhs_code))
5259 auto_vec<operand_entry *> ops;
5260 tree powi_result = NULL_TREE;
5261 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5263 /* There may be no immediate uses left by the time we
5264 get here because we may have eliminated them all. */
5265 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5266 continue;
5268 gimple_set_visited (stmt, true);
5269 linearize_expr_tree (&ops, stmt, true, true);
5270 ops.qsort (sort_by_operand_rank);
5271 optimize_ops_list (rhs_code, &ops);
5272 if (undistribute_ops_list (rhs_code, &ops,
5273 loop_containing_stmt (stmt)))
5275 ops.qsort (sort_by_operand_rank);
5276 optimize_ops_list (rhs_code, &ops);
5279 if (rhs_code == PLUS_EXPR
5280 && transform_add_to_multiply (&ops))
5281 ops.qsort (sort_by_operand_rank);
5283 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5285 if (is_vector)
5286 optimize_vec_cond_expr (rhs_code, &ops);
5287 else
5288 optimize_range_tests (rhs_code, &ops);
5291 if (rhs_code == MULT_EXPR && !is_vector)
5293 attempt_builtin_copysign (&ops);
5295 if (reassoc_insert_powi_p
5296 && flag_unsafe_math_optimizations)
5297 powi_result = attempt_builtin_powi (stmt, &ops);
5300 operand_entry *last;
5301 bool negate_result = false;
5302 if (ops.length () > 1
5303 && rhs_code == MULT_EXPR)
5305 last = ops.last ();
5306 if ((integer_minus_onep (last->op)
5307 || real_minus_onep (last->op))
5308 && !HONOR_SNANS (TREE_TYPE (lhs))
5309 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5310 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5312 ops.pop ();
5313 negate_result = true;
5317 tree new_lhs = lhs;
5318 /* If the operand vector is now empty, all operands were
5319 consumed by the __builtin_powi optimization. */
5320 if (ops.length () == 0)
5321 transform_stmt_to_copy (&gsi, stmt, powi_result);
5322 else if (ops.length () == 1)
5324 tree last_op = ops.last ()->op;
5326 /* If the stmt that defines operand has to be inserted, insert it
5327 before the use. */
5328 if (ops.last ()->stmt_to_insert)
5329 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5330 if (powi_result)
5331 transform_stmt_to_multiply (&gsi, stmt, last_op,
5332 powi_result);
5333 else
5334 transform_stmt_to_copy (&gsi, stmt, last_op);
5336 else
5338 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5339 int ops_num = ops.length ();
5340 int width = get_reassociation_width (ops_num, rhs_code, mode);
5342 if (dump_file && (dump_flags & TDF_DETAILS))
5343 fprintf (dump_file,
5344 "Width = %d was chosen for reassociation\n", width);
5346 if (width > 1
5347 && ops.length () > 3)
5348 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5349 width, ops);
5350 else
5352 /* When there are three operands left, we want
5353 to make sure the ones that get the double
5354 binary op are chosen wisely. */
5355 int len = ops.length ();
5356 if (len >= 3)
5357 swap_ops_for_binary_stmt (ops, len - 3, stmt);
5359 new_lhs = rewrite_expr_tree (stmt, 0, ops,
5360 powi_result != NULL
5361 || negate_result);
5364 /* If we combined some repeated factors into a
5365 __builtin_powi call, multiply that result by the
5366 reassociated operands. */
5367 if (powi_result)
5369 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5370 tree type = TREE_TYPE (lhs);
5371 tree target_ssa = make_temp_ssa_name (type, NULL,
5372 "reassocpow");
5373 gimple_set_lhs (lhs_stmt, target_ssa);
5374 update_stmt (lhs_stmt);
5375 if (lhs != new_lhs)
5377 target_ssa = new_lhs;
5378 new_lhs = lhs;
5380 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5381 powi_result, target_ssa);
5382 gimple_set_location (mul_stmt, gimple_location (stmt));
5383 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5384 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5388 if (negate_result)
5390 stmt = SSA_NAME_DEF_STMT (lhs);
5391 tree tmp = make_ssa_name (TREE_TYPE (lhs));
5392 gimple_set_lhs (stmt, tmp);
5393 if (lhs != new_lhs)
5394 tmp = new_lhs;
5395 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
5396 tmp);
5397 gimple_set_uid (neg_stmt, gimple_uid (stmt));
5398 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
5399 update_stmt (stmt);
5404 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5405 son;
5406 son = next_dom_son (CDI_POST_DOMINATORS, son))
5407 cfg_cleanup_needed |= reassociate_bb (son);
5409 return cfg_cleanup_needed;
5412 /* Add jumps around shifts for range tests turned into bit tests.
5413 For each SSA_NAME VAR we have code like:
5414 VAR = ...; // final stmt of range comparison
5415 // bit test here...;
5416 OTHERVAR = ...; // final stmt of the bit test sequence
5417 RES = VAR | OTHERVAR;
5418 Turn the above into:
5419 VAR = ...;
5420 if (VAR != 0)
5421 goto <l3>;
5422 else
5423 goto <l2>;
5424 <l2>:
5425 // bit test here...;
5426 OTHERVAR = ...;
5427 <l3>:
5428 # RES = PHI<1(l1), OTHERVAR(l2)>; */
5430 static void
5431 branch_fixup (void)
5433 tree var;
5434 unsigned int i;
5436 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
5438 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
5439 gimple *use_stmt;
5440 use_operand_p use;
5441 bool ok = single_imm_use (var, &use, &use_stmt);
5442 gcc_assert (ok
5443 && is_gimple_assign (use_stmt)
5444 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5445 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5447 basic_block cond_bb = gimple_bb (def_stmt);
5448 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5449 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5451 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5452 gimple *g = gimple_build_cond (NE_EXPR, var,
5453 build_zero_cst (TREE_TYPE (var)),
5454 NULL_TREE, NULL_TREE);
5455 location_t loc = gimple_location (use_stmt);
5456 gimple_set_location (g, loc);
5457 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5459 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5460 etrue->probability = REG_BR_PROB_BASE / 2;
5461 etrue->count = cond_bb->count / 2;
5462 edge efalse = find_edge (cond_bb, then_bb);
5463 efalse->flags = EDGE_FALSE_VALUE;
5464 efalse->probability -= etrue->probability;
5465 efalse->count -= etrue->count;
5466 then_bb->count -= etrue->count;
5468 tree othervar = NULL_TREE;
5469 if (gimple_assign_rhs1 (use_stmt) == var)
5470 othervar = gimple_assign_rhs2 (use_stmt);
5471 else if (gimple_assign_rhs2 (use_stmt) == var)
5472 othervar = gimple_assign_rhs1 (use_stmt);
5473 else
5474 gcc_unreachable ();
5475 tree lhs = gimple_assign_lhs (use_stmt);
5476 gphi *phi = create_phi_node (lhs, merge_bb);
5477 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5478 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5479 gsi = gsi_for_stmt (use_stmt);
5480 gsi_remove (&gsi, true);
5482 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5483 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5485 reassoc_branch_fixups.release ();
5488 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5489 void debug_ops_vector (vec<operand_entry *> ops);
5491 /* Dump the operand entry vector OPS to FILE. */
5493 void
5494 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5496 operand_entry *oe;
5497 unsigned int i;
5499 FOR_EACH_VEC_ELT (ops, i, oe)
5501 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5502 print_generic_expr (file, oe->op, 0);
5503 fprintf (file, "\n");
5507 /* Dump the operand entry vector OPS to STDERR. */
5509 DEBUG_FUNCTION void
5510 debug_ops_vector (vec<operand_entry *> ops)
5512 dump_ops_vector (stderr, ops);
5515 /* Bubble up return status from reassociate_bb. */
5517 static bool
5518 do_reassoc (void)
5520 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5521 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5524 /* Initialize the reassociation pass. */
5526 static void
5527 init_reassoc (void)
5529 int i;
5530 long rank = 2;
5531 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5533 /* Find the loops, so that we can prevent moving calculations in
5534 them. */
5535 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5537 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5539 next_operand_entry_id = 0;
5541 /* Reverse RPO (Reverse Post Order) will give us something where
5542 deeper loops come later. */
5543 pre_and_rev_post_order_compute (NULL, bbs, false);
5544 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5545 operand_rank = new hash_map<tree, long>;
5547 /* Give each default definition a distinct rank. This includes
5548 parameters and the static chain. Walk backwards over all
5549 SSA names so that we get proper rank ordering according
5550 to tree_swap_operands_p. */
5551 for (i = num_ssa_names - 1; i > 0; --i)
5553 tree name = ssa_name (i);
5554 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5555 insert_operand_rank (name, ++rank);
5558 /* Set up rank for each BB */
5559 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5560 bb_rank[bbs[i]] = ++rank << 16;
5562 free (bbs);
5563 calculate_dominance_info (CDI_POST_DOMINATORS);
5564 plus_negates = vNULL;
5567 /* Cleanup after the reassociation pass, and print stats if
5568 requested. */
5570 static void
5571 fini_reassoc (void)
5573 statistics_counter_event (cfun, "Linearized",
5574 reassociate_stats.linearized);
5575 statistics_counter_event (cfun, "Constants eliminated",
5576 reassociate_stats.constants_eliminated);
5577 statistics_counter_event (cfun, "Ops eliminated",
5578 reassociate_stats.ops_eliminated);
5579 statistics_counter_event (cfun, "Statements rewritten",
5580 reassociate_stats.rewritten);
5581 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5582 reassociate_stats.pows_encountered);
5583 statistics_counter_event (cfun, "Built-in powi calls created",
5584 reassociate_stats.pows_created);
5586 delete operand_rank;
5587 operand_entry_pool.release ();
5588 free (bb_rank);
5589 plus_negates.release ();
5590 free_dominance_info (CDI_POST_DOMINATORS);
5591 loop_optimizer_finalize ();
5594 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
5595 insertion of __builtin_powi calls.
5597 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5598 optimization of a gimple conditional. Otherwise returns zero. */
5600 static unsigned int
5601 execute_reassoc (bool insert_powi_p)
5603 reassoc_insert_powi_p = insert_powi_p;
5605 init_reassoc ();
5607 bool cfg_cleanup_needed;
5608 cfg_cleanup_needed = do_reassoc ();
5609 repropagate_negates ();
5610 branch_fixup ();
5612 fini_reassoc ();
5613 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
5616 namespace {
5618 const pass_data pass_data_reassoc =
5620 GIMPLE_PASS, /* type */
5621 "reassoc", /* name */
5622 OPTGROUP_NONE, /* optinfo_flags */
5623 TV_TREE_REASSOC, /* tv_id */
5624 ( PROP_cfg | PROP_ssa ), /* properties_required */
5625 0, /* properties_provided */
5626 0, /* properties_destroyed */
5627 0, /* todo_flags_start */
5628 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5631 class pass_reassoc : public gimple_opt_pass
5633 public:
5634 pass_reassoc (gcc::context *ctxt)
5635 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
5638 /* opt_pass methods: */
5639 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5640 void set_pass_param (unsigned int n, bool param)
5642 gcc_assert (n == 0);
5643 insert_powi_p = param;
5645 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5646 virtual unsigned int execute (function *)
5647 { return execute_reassoc (insert_powi_p); }
5649 private:
5650 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
5651 point 3a in the pass header comment. */
5652 bool insert_powi_p;
5653 }; // class pass_reassoc
5655 } // anon namespace
5657 gimple_opt_pass *
5658 make_pass_reassoc (gcc::context *ctxt)
5660 return new pass_reassoc (ctxt);