Daily bump.
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobd23dabde7b6b78f0e9d9d5b7f0aea99adcbe5c8c
1 /* Reassociation for trees.
2 Copyright (C) 2005-2016 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "tm_p.h"
33 #include "ssa.h"
34 #include "optabs-tree.h"
35 #include "gimple-pretty-print.h"
36 #include "diagnostic-core.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "cfganal.h"
40 #include "gimple-fold.h"
41 #include "tree-eh.h"
42 #include "gimple-iterator.h"
43 #include "gimplify-me.h"
44 #include "tree-cfg.h"
45 #include "tree-ssa-loop.h"
46 #include "flags.h"
47 #include "tree-ssa.h"
48 #include "langhooks.h"
49 #include "cfgloop.h"
50 #include "params.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
59 It consists of five steps:
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_*
69 3. Optimization of the operand lists, eliminating things like a +
70 -a, a & a, etc.
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
79 5. Repropagate negates, as nothing else will clean it up ATM.
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
100 a (1), b (1), c (1), d (2), e (2)
103 We start with our merge worklist empty, and the ops list with all of
104 those on it.
106 You want to first merge all leaves of the same rank, as much as
107 possible.
109 So first build a binary op of
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
121 Then build a binary op of d + e
122 mergetmp2 = d + e
124 and put mergetmp2 on the merge worklist.
126 so merge worklist = {mergetmp, c, mergetmp2}
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
131 So we have
133 build binary op
134 mergetmp3 = mergetmp + c
136 worklist = {mergetmp2, mergetmp3}
138 mergetmp4 = mergetmp2 + mergetmp3
140 worklist = {mergetmp4}
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
151 So why don't we do this?
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
159 mergetmp = op1 + op2
160 newstmt = mergetmp + op3
162 instead of
163 mergetmp = op2 + op3
164 newstmt = mergetmp + op1
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
168 can do.
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
176 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
177 point 3a in the pass header comment. */
178 static bool reassoc_insert_powi_p;
180 /* Statistics */
181 static struct
183 int linearized;
184 int constants_eliminated;
185 int ops_eliminated;
186 int rewritten;
187 int pows_encountered;
188 int pows_created;
189 } reassociate_stats;
191 /* Operator, rank pair. */
192 struct operand_entry
194 unsigned int rank;
195 int id;
196 tree op;
197 unsigned int count;
200 static object_allocator<operand_entry> operand_entry_pool
201 ("operand entry pool");
203 /* This is used to assign a unique ID to each struct operand_entry
204 so that qsort results are identical on different hosts. */
205 static int next_operand_entry_id;
207 /* Starting rank number for a given basic block, so that we can rank
208 operations using unmovable instructions in that BB based on the bb
209 depth. */
210 static long *bb_rank;
212 /* Operand->rank hashtable. */
213 static hash_map<tree, long> *operand_rank;
215 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
216 all basic blocks the CFG should be adjusted - basic blocks
217 split right after that SSA_NAME's definition statement and before
218 the only use, which must be a bit ior. */
219 static vec<tree> reassoc_branch_fixups;
221 /* Forward decls. */
222 static long get_rank (tree);
223 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
225 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
226 possibly added by gsi_remove. */
228 bool
229 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
231 gimple *stmt = gsi_stmt (*gsi);
233 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
234 return gsi_remove (gsi, true);
236 gimple_stmt_iterator prev = *gsi;
237 gsi_prev (&prev);
238 unsigned uid = gimple_uid (stmt);
239 basic_block bb = gimple_bb (stmt);
240 bool ret = gsi_remove (gsi, true);
241 if (!gsi_end_p (prev))
242 gsi_next (&prev);
243 else
244 prev = gsi_start_bb (bb);
245 gimple *end_stmt = gsi_stmt (*gsi);
246 while ((stmt = gsi_stmt (prev)) != end_stmt)
248 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
249 gimple_set_uid (stmt, uid);
250 gsi_next (&prev);
252 return ret;
255 /* Bias amount for loop-carried phis. We want this to be larger than
256 the depth of any reassociation tree we can see, but not larger than
257 the rank difference between two blocks. */
258 #define PHI_LOOP_BIAS (1 << 15)
260 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
261 an innermost loop, and the phi has only a single use which is inside
262 the loop, then the rank is the block rank of the loop latch plus an
263 extra bias for the loop-carried dependence. This causes expressions
264 calculated into an accumulator variable to be independent for each
265 iteration of the loop. If STMT is some other phi, the rank is the
266 block rank of its containing block. */
267 static long
268 phi_rank (gimple *stmt)
270 basic_block bb = gimple_bb (stmt);
271 struct loop *father = bb->loop_father;
272 tree res;
273 unsigned i;
274 use_operand_p use;
275 gimple *use_stmt;
277 /* We only care about real loops (those with a latch). */
278 if (!father->latch)
279 return bb_rank[bb->index];
281 /* Interesting phis must be in headers of innermost loops. */
282 if (bb != father->header
283 || father->inner)
284 return bb_rank[bb->index];
286 /* Ignore virtual SSA_NAMEs. */
287 res = gimple_phi_result (stmt);
288 if (virtual_operand_p (res))
289 return bb_rank[bb->index];
291 /* The phi definition must have a single use, and that use must be
292 within the loop. Otherwise this isn't an accumulator pattern. */
293 if (!single_imm_use (res, &use, &use_stmt)
294 || gimple_bb (use_stmt)->loop_father != father)
295 return bb_rank[bb->index];
297 /* Look for phi arguments from within the loop. If found, bias this phi. */
298 for (i = 0; i < gimple_phi_num_args (stmt); i++)
300 tree arg = gimple_phi_arg_def (stmt, i);
301 if (TREE_CODE (arg) == SSA_NAME
302 && !SSA_NAME_IS_DEFAULT_DEF (arg))
304 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
305 if (gimple_bb (def_stmt)->loop_father == father)
306 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
310 /* Must be an uninteresting phi. */
311 return bb_rank[bb->index];
314 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
315 loop-carried dependence of an innermost loop, return TRUE; else
316 return FALSE. */
317 static bool
318 loop_carried_phi (tree exp)
320 gimple *phi_stmt;
321 long block_rank;
323 if (TREE_CODE (exp) != SSA_NAME
324 || SSA_NAME_IS_DEFAULT_DEF (exp))
325 return false;
327 phi_stmt = SSA_NAME_DEF_STMT (exp);
329 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
330 return false;
332 /* Non-loop-carried phis have block rank. Loop-carried phis have
333 an additional bias added in. If this phi doesn't have block rank,
334 it's biased and should not be propagated. */
335 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
337 if (phi_rank (phi_stmt) != block_rank)
338 return true;
340 return false;
343 /* Return the maximum of RANK and the rank that should be propagated
344 from expression OP. For most operands, this is just the rank of OP.
345 For loop-carried phis, the value is zero to avoid undoing the bias
346 in favor of the phi. */
347 static long
348 propagate_rank (long rank, tree op)
350 long op_rank;
352 if (loop_carried_phi (op))
353 return rank;
355 op_rank = get_rank (op);
357 return MAX (rank, op_rank);
360 /* Look up the operand rank structure for expression E. */
362 static inline long
363 find_operand_rank (tree e)
365 long *slot = operand_rank->get (e);
366 return slot ? *slot : -1;
369 /* Insert {E,RANK} into the operand rank hashtable. */
371 static inline void
372 insert_operand_rank (tree e, long rank)
374 gcc_assert (rank > 0);
375 gcc_assert (!operand_rank->put (e, rank));
378 /* Given an expression E, return the rank of the expression. */
380 static long
381 get_rank (tree e)
383 /* SSA_NAME's have the rank of the expression they are the result
385 For globals and uninitialized values, the rank is 0.
386 For function arguments, use the pre-setup rank.
387 For PHI nodes, stores, asm statements, etc, we use the rank of
388 the BB.
389 For simple operations, the rank is the maximum rank of any of
390 its operands, or the bb_rank, whichever is less.
391 I make no claims that this is optimal, however, it gives good
392 results. */
394 /* We make an exception to the normal ranking system to break
395 dependences of accumulator variables in loops. Suppose we
396 have a simple one-block loop containing:
398 x_1 = phi(x_0, x_2)
399 b = a + x_1
400 c = b + d
401 x_2 = c + e
403 As shown, each iteration of the calculation into x is fully
404 dependent upon the iteration before it. We would prefer to
405 see this in the form:
407 x_1 = phi(x_0, x_2)
408 b = a + d
409 c = b + e
410 x_2 = c + x_1
412 If the loop is unrolled, the calculations of b and c from
413 different iterations can be interleaved.
415 To obtain this result during reassociation, we bias the rank
416 of the phi definition x_1 upward, when it is recognized as an
417 accumulator pattern. The artificial rank causes it to be
418 added last, providing the desired independence. */
420 if (TREE_CODE (e) == SSA_NAME)
422 ssa_op_iter iter;
423 gimple *stmt;
424 long rank;
425 tree op;
427 if (SSA_NAME_IS_DEFAULT_DEF (e))
428 return find_operand_rank (e);
430 stmt = SSA_NAME_DEF_STMT (e);
431 if (gimple_code (stmt) == GIMPLE_PHI)
432 return phi_rank (stmt);
434 if (!is_gimple_assign (stmt))
435 return bb_rank[gimple_bb (stmt)->index];
437 /* If we already have a rank for this expression, use that. */
438 rank = find_operand_rank (e);
439 if (rank != -1)
440 return rank;
442 /* Otherwise, find the maximum rank for the operands. As an
443 exception, remove the bias from loop-carried phis when propagating
444 the rank so that dependent operations are not also biased. */
445 /* Simply walk over all SSA uses - this takes advatage of the
446 fact that non-SSA operands are is_gimple_min_invariant and
447 thus have rank 0. */
448 rank = 0;
449 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
450 rank = propagate_rank (rank, op);
452 if (dump_file && (dump_flags & TDF_DETAILS))
454 fprintf (dump_file, "Rank for ");
455 print_generic_expr (dump_file, e, 0);
456 fprintf (dump_file, " is %ld\n", (rank + 1));
459 /* Note the rank in the hashtable so we don't recompute it. */
460 insert_operand_rank (e, (rank + 1));
461 return (rank + 1);
464 /* Constants, globals, etc., are rank 0 */
465 return 0;
469 /* We want integer ones to end up last no matter what, since they are
470 the ones we can do the most with. */
471 #define INTEGER_CONST_TYPE 1 << 3
472 #define FLOAT_CONST_TYPE 1 << 2
473 #define OTHER_CONST_TYPE 1 << 1
475 /* Classify an invariant tree into integer, float, or other, so that
476 we can sort them to be near other constants of the same type. */
477 static inline int
478 constant_type (tree t)
480 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
481 return INTEGER_CONST_TYPE;
482 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
483 return FLOAT_CONST_TYPE;
484 else
485 return OTHER_CONST_TYPE;
488 /* qsort comparison function to sort operand entries PA and PB by rank
489 so that the sorted array is ordered by rank in decreasing order. */
490 static int
491 sort_by_operand_rank (const void *pa, const void *pb)
493 const operand_entry *oea = *(const operand_entry *const *)pa;
494 const operand_entry *oeb = *(const operand_entry *const *)pb;
496 /* It's nicer for optimize_expression if constants that are likely
497 to fold when added/multiplied//whatever are put next to each
498 other. Since all constants have rank 0, order them by type. */
499 if (oeb->rank == 0 && oea->rank == 0)
501 if (constant_type (oeb->op) != constant_type (oea->op))
502 return constant_type (oeb->op) - constant_type (oea->op);
503 else
504 /* To make sorting result stable, we use unique IDs to determine
505 order. */
506 return oeb->id - oea->id;
509 /* Lastly, make sure the versions that are the same go next to each
510 other. */
511 if ((oeb->rank - oea->rank == 0)
512 && TREE_CODE (oea->op) == SSA_NAME
513 && TREE_CODE (oeb->op) == SSA_NAME)
515 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
516 versions of removed SSA_NAMEs, so if possible, prefer to sort
517 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
518 See PR60418. */
519 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
520 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
521 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
523 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
524 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
525 basic_block bba = gimple_bb (stmta);
526 basic_block bbb = gimple_bb (stmtb);
527 if (bbb != bba)
529 if (bb_rank[bbb->index] != bb_rank[bba->index])
530 return bb_rank[bbb->index] - bb_rank[bba->index];
532 else
534 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
535 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
536 if (da != db)
537 return da ? 1 : -1;
541 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
542 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
543 else
544 return oeb->id - oea->id;
547 if (oeb->rank != oea->rank)
548 return oeb->rank - oea->rank;
549 else
550 return oeb->id - oea->id;
553 /* Add an operand entry to *OPS for the tree operand OP. */
555 static void
556 add_to_ops_vec (vec<operand_entry *> *ops, tree op)
558 operand_entry *oe = operand_entry_pool.allocate ();
560 oe->op = op;
561 oe->rank = get_rank (op);
562 oe->id = next_operand_entry_id++;
563 oe->count = 1;
564 ops->safe_push (oe);
567 /* Add an operand entry to *OPS for the tree operand OP with repeat
568 count REPEAT. */
570 static void
571 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
572 HOST_WIDE_INT repeat)
574 operand_entry *oe = operand_entry_pool.allocate ();
576 oe->op = op;
577 oe->rank = get_rank (op);
578 oe->id = next_operand_entry_id++;
579 oe->count = repeat;
580 ops->safe_push (oe);
582 reassociate_stats.pows_encountered++;
585 /* Return true if STMT is reassociable operation containing a binary
586 operation with tree code CODE, and is inside LOOP. */
588 static bool
589 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
591 basic_block bb = gimple_bb (stmt);
593 if (gimple_bb (stmt) == NULL)
594 return false;
596 if (!flow_bb_inside_loop_p (loop, bb))
597 return false;
599 if (is_gimple_assign (stmt)
600 && gimple_assign_rhs_code (stmt) == code
601 && has_single_use (gimple_assign_lhs (stmt)))
602 return true;
604 return false;
608 /* Return true if STMT is a nop-conversion. */
610 static bool
611 gimple_nop_conversion_p (gimple *stmt)
613 if (gassign *ass = dyn_cast <gassign *> (stmt))
615 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
616 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
617 TREE_TYPE (gimple_assign_rhs1 (ass))))
618 return true;
620 return false;
623 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
624 operand of the negate operation. Otherwise, return NULL. */
626 static tree
627 get_unary_op (tree name, enum tree_code opcode)
629 gimple *stmt = SSA_NAME_DEF_STMT (name);
631 /* Look through nop conversions (sign changes). */
632 if (gimple_nop_conversion_p (stmt)
633 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
634 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
636 if (!is_gimple_assign (stmt))
637 return NULL_TREE;
639 if (gimple_assign_rhs_code (stmt) == opcode)
640 return gimple_assign_rhs1 (stmt);
641 return NULL_TREE;
644 /* Return true if OP1 and OP2 have the same value if casted to either type. */
646 static bool
647 ops_equal_values_p (tree op1, tree op2)
649 if (op1 == op2)
650 return true;
652 tree orig_op1 = op1;
653 if (TREE_CODE (op1) == SSA_NAME)
655 gimple *stmt = SSA_NAME_DEF_STMT (op1);
656 if (gimple_nop_conversion_p (stmt))
658 op1 = gimple_assign_rhs1 (stmt);
659 if (op1 == op2)
660 return true;
664 if (TREE_CODE (op2) == SSA_NAME)
666 gimple *stmt = SSA_NAME_DEF_STMT (op2);
667 if (gimple_nop_conversion_p (stmt))
669 op2 = gimple_assign_rhs1 (stmt);
670 if (op1 == op2
671 || orig_op1 == op2)
672 return true;
676 return false;
680 /* If CURR and LAST are a pair of ops that OPCODE allows us to
681 eliminate through equivalences, do so, remove them from OPS, and
682 return true. Otherwise, return false. */
684 static bool
685 eliminate_duplicate_pair (enum tree_code opcode,
686 vec<operand_entry *> *ops,
687 bool *all_done,
688 unsigned int i,
689 operand_entry *curr,
690 operand_entry *last)
693 /* If we have two of the same op, and the opcode is & |, min, or max,
694 we can eliminate one of them.
695 If we have two of the same op, and the opcode is ^, we can
696 eliminate both of them. */
698 if (last && last->op == curr->op)
700 switch (opcode)
702 case MAX_EXPR:
703 case MIN_EXPR:
704 case BIT_IOR_EXPR:
705 case BIT_AND_EXPR:
706 if (dump_file && (dump_flags & TDF_DETAILS))
708 fprintf (dump_file, "Equivalence: ");
709 print_generic_expr (dump_file, curr->op, 0);
710 fprintf (dump_file, " [&|minmax] ");
711 print_generic_expr (dump_file, last->op, 0);
712 fprintf (dump_file, " -> ");
713 print_generic_stmt (dump_file, last->op, 0);
716 ops->ordered_remove (i);
717 reassociate_stats.ops_eliminated ++;
719 return true;
721 case BIT_XOR_EXPR:
722 if (dump_file && (dump_flags & TDF_DETAILS))
724 fprintf (dump_file, "Equivalence: ");
725 print_generic_expr (dump_file, curr->op, 0);
726 fprintf (dump_file, " ^ ");
727 print_generic_expr (dump_file, last->op, 0);
728 fprintf (dump_file, " -> nothing\n");
731 reassociate_stats.ops_eliminated += 2;
733 if (ops->length () == 2)
735 ops->create (0);
736 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
737 *all_done = true;
739 else
741 ops->ordered_remove (i-1);
742 ops->ordered_remove (i-1);
745 return true;
747 default:
748 break;
751 return false;
754 static vec<tree> plus_negates;
756 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
757 expression, look in OPS for a corresponding positive operation to cancel
758 it out. If we find one, remove the other from OPS, replace
759 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
760 return false. */
762 static bool
763 eliminate_plus_minus_pair (enum tree_code opcode,
764 vec<operand_entry *> *ops,
765 unsigned int currindex,
766 operand_entry *curr)
768 tree negateop;
769 tree notop;
770 unsigned int i;
771 operand_entry *oe;
773 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
774 return false;
776 negateop = get_unary_op (curr->op, NEGATE_EXPR);
777 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
778 if (negateop == NULL_TREE && notop == NULL_TREE)
779 return false;
781 /* Any non-negated version will have a rank that is one less than
782 the current rank. So once we hit those ranks, if we don't find
783 one, we can stop. */
785 for (i = currindex + 1;
786 ops->iterate (i, &oe)
787 && oe->rank >= curr->rank - 1 ;
788 i++)
790 if (negateop
791 && ops_equal_values_p (oe->op, negateop))
793 if (dump_file && (dump_flags & TDF_DETAILS))
795 fprintf (dump_file, "Equivalence: ");
796 print_generic_expr (dump_file, negateop, 0);
797 fprintf (dump_file, " + -");
798 print_generic_expr (dump_file, oe->op, 0);
799 fprintf (dump_file, " -> 0\n");
802 ops->ordered_remove (i);
803 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
804 ops->ordered_remove (currindex);
805 reassociate_stats.ops_eliminated ++;
807 return true;
809 else if (notop
810 && ops_equal_values_p (oe->op, notop))
812 tree op_type = TREE_TYPE (oe->op);
814 if (dump_file && (dump_flags & TDF_DETAILS))
816 fprintf (dump_file, "Equivalence: ");
817 print_generic_expr (dump_file, notop, 0);
818 fprintf (dump_file, " + ~");
819 print_generic_expr (dump_file, oe->op, 0);
820 fprintf (dump_file, " -> -1\n");
823 ops->ordered_remove (i);
824 add_to_ops_vec (ops, build_all_ones_cst (op_type));
825 ops->ordered_remove (currindex);
826 reassociate_stats.ops_eliminated ++;
828 return true;
832 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
833 save it for later inspection in repropagate_negates(). */
834 if (negateop != NULL_TREE
835 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
836 plus_negates.safe_push (curr->op);
838 return false;
841 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
842 bitwise not expression, look in OPS for a corresponding operand to
843 cancel it out. If we find one, remove the other from OPS, replace
844 OPS[CURRINDEX] with 0, and return true. Otherwise, return
845 false. */
847 static bool
848 eliminate_not_pairs (enum tree_code opcode,
849 vec<operand_entry *> *ops,
850 unsigned int currindex,
851 operand_entry *curr)
853 tree notop;
854 unsigned int i;
855 operand_entry *oe;
857 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
858 || TREE_CODE (curr->op) != SSA_NAME)
859 return false;
861 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
862 if (notop == NULL_TREE)
863 return false;
865 /* Any non-not version will have a rank that is one less than
866 the current rank. So once we hit those ranks, if we don't find
867 one, we can stop. */
869 for (i = currindex + 1;
870 ops->iterate (i, &oe)
871 && oe->rank >= curr->rank - 1;
872 i++)
874 if (oe->op == notop)
876 if (dump_file && (dump_flags & TDF_DETAILS))
878 fprintf (dump_file, "Equivalence: ");
879 print_generic_expr (dump_file, notop, 0);
880 if (opcode == BIT_AND_EXPR)
881 fprintf (dump_file, " & ~");
882 else if (opcode == BIT_IOR_EXPR)
883 fprintf (dump_file, " | ~");
884 print_generic_expr (dump_file, oe->op, 0);
885 if (opcode == BIT_AND_EXPR)
886 fprintf (dump_file, " -> 0\n");
887 else if (opcode == BIT_IOR_EXPR)
888 fprintf (dump_file, " -> -1\n");
891 if (opcode == BIT_AND_EXPR)
892 oe->op = build_zero_cst (TREE_TYPE (oe->op));
893 else if (opcode == BIT_IOR_EXPR)
894 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
896 reassociate_stats.ops_eliminated += ops->length () - 1;
897 ops->truncate (0);
898 ops->quick_push (oe);
899 return true;
903 return false;
906 /* Use constant value that may be present in OPS to try to eliminate
907 operands. Note that this function is only really used when we've
908 eliminated ops for other reasons, or merged constants. Across
909 single statements, fold already does all of this, plus more. There
910 is little point in duplicating logic, so I've only included the
911 identities that I could ever construct testcases to trigger. */
913 static void
914 eliminate_using_constants (enum tree_code opcode,
915 vec<operand_entry *> *ops)
917 operand_entry *oelast = ops->last ();
918 tree type = TREE_TYPE (oelast->op);
920 if (oelast->rank == 0
921 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
923 switch (opcode)
925 case BIT_AND_EXPR:
926 if (integer_zerop (oelast->op))
928 if (ops->length () != 1)
930 if (dump_file && (dump_flags & TDF_DETAILS))
931 fprintf (dump_file, "Found & 0, removing all other ops\n");
933 reassociate_stats.ops_eliminated += ops->length () - 1;
935 ops->truncate (0);
936 ops->quick_push (oelast);
937 return;
940 else if (integer_all_onesp (oelast->op))
942 if (ops->length () != 1)
944 if (dump_file && (dump_flags & TDF_DETAILS))
945 fprintf (dump_file, "Found & -1, removing\n");
946 ops->pop ();
947 reassociate_stats.ops_eliminated++;
950 break;
951 case BIT_IOR_EXPR:
952 if (integer_all_onesp (oelast->op))
954 if (ops->length () != 1)
956 if (dump_file && (dump_flags & TDF_DETAILS))
957 fprintf (dump_file, "Found | -1, removing all other ops\n");
959 reassociate_stats.ops_eliminated += ops->length () - 1;
961 ops->truncate (0);
962 ops->quick_push (oelast);
963 return;
966 else if (integer_zerop (oelast->op))
968 if (ops->length () != 1)
970 if (dump_file && (dump_flags & TDF_DETAILS))
971 fprintf (dump_file, "Found | 0, removing\n");
972 ops->pop ();
973 reassociate_stats.ops_eliminated++;
976 break;
977 case MULT_EXPR:
978 if (integer_zerop (oelast->op)
979 || (FLOAT_TYPE_P (type)
980 && !HONOR_NANS (type)
981 && !HONOR_SIGNED_ZEROS (type)
982 && real_zerop (oelast->op)))
984 if (ops->length () != 1)
986 if (dump_file && (dump_flags & TDF_DETAILS))
987 fprintf (dump_file, "Found * 0, removing all other ops\n");
989 reassociate_stats.ops_eliminated += ops->length () - 1;
990 ops->truncate (1);
991 ops->quick_push (oelast);
992 return;
995 else if (integer_onep (oelast->op)
996 || (FLOAT_TYPE_P (type)
997 && !HONOR_SNANS (type)
998 && real_onep (oelast->op)))
1000 if (ops->length () != 1)
1002 if (dump_file && (dump_flags & TDF_DETAILS))
1003 fprintf (dump_file, "Found * 1, removing\n");
1004 ops->pop ();
1005 reassociate_stats.ops_eliminated++;
1006 return;
1009 break;
1010 case BIT_XOR_EXPR:
1011 case PLUS_EXPR:
1012 case MINUS_EXPR:
1013 if (integer_zerop (oelast->op)
1014 || (FLOAT_TYPE_P (type)
1015 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1016 && fold_real_zero_addition_p (type, oelast->op,
1017 opcode == MINUS_EXPR)))
1019 if (ops->length () != 1)
1021 if (dump_file && (dump_flags & TDF_DETAILS))
1022 fprintf (dump_file, "Found [|^+] 0, removing\n");
1023 ops->pop ();
1024 reassociate_stats.ops_eliminated++;
1025 return;
1028 break;
1029 default:
1030 break;
1036 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1037 bool, bool);
1039 /* Structure for tracking and counting operands. */
1040 struct oecount {
1041 int cnt;
1042 int id;
1043 enum tree_code oecode;
1044 tree op;
1048 /* The heap for the oecount hashtable and the sorted list of operands. */
1049 static vec<oecount> cvec;
1052 /* Oecount hashtable helpers. */
1054 struct oecount_hasher : int_hash <int, 0, 1>
1056 static inline hashval_t hash (int);
1057 static inline bool equal (int, int);
1060 /* Hash function for oecount. */
1062 inline hashval_t
1063 oecount_hasher::hash (int p)
1065 const oecount *c = &cvec[p - 42];
1066 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1069 /* Comparison function for oecount. */
1071 inline bool
1072 oecount_hasher::equal (int p1, int p2)
1074 const oecount *c1 = &cvec[p1 - 42];
1075 const oecount *c2 = &cvec[p2 - 42];
1076 return (c1->oecode == c2->oecode
1077 && c1->op == c2->op);
1080 /* Comparison function for qsort sorting oecount elements by count. */
1082 static int
1083 oecount_cmp (const void *p1, const void *p2)
1085 const oecount *c1 = (const oecount *)p1;
1086 const oecount *c2 = (const oecount *)p2;
1087 if (c1->cnt != c2->cnt)
1088 return c1->cnt - c2->cnt;
1089 else
1090 /* If counts are identical, use unique IDs to stabilize qsort. */
1091 return c1->id - c2->id;
1094 /* Return TRUE iff STMT represents a builtin call that raises OP
1095 to some exponent. */
1097 static bool
1098 stmt_is_power_of_op (gimple *stmt, tree op)
1100 if (!is_gimple_call (stmt))
1101 return false;
1103 switch (gimple_call_combined_fn (stmt))
1105 CASE_CFN_POW:
1106 CASE_CFN_POWI:
1107 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1109 default:
1110 return false;
1114 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1115 in place and return the result. Assumes that stmt_is_power_of_op
1116 was previously called for STMT and returned TRUE. */
1118 static HOST_WIDE_INT
1119 decrement_power (gimple *stmt)
1121 REAL_VALUE_TYPE c, cint;
1122 HOST_WIDE_INT power;
1123 tree arg1;
1125 switch (gimple_call_combined_fn (stmt))
1127 CASE_CFN_POW:
1128 arg1 = gimple_call_arg (stmt, 1);
1129 c = TREE_REAL_CST (arg1);
1130 power = real_to_integer (&c) - 1;
1131 real_from_integer (&cint, VOIDmode, power, SIGNED);
1132 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1133 return power;
1135 CASE_CFN_POWI:
1136 arg1 = gimple_call_arg (stmt, 1);
1137 power = TREE_INT_CST_LOW (arg1) - 1;
1138 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1139 return power;
1141 default:
1142 gcc_unreachable ();
1146 /* Find the single immediate use of STMT's LHS, and replace it
1147 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1148 replace *DEF with OP as well. */
1150 static void
1151 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1153 tree lhs;
1154 gimple *use_stmt;
1155 use_operand_p use;
1156 gimple_stmt_iterator gsi;
1158 if (is_gimple_call (stmt))
1159 lhs = gimple_call_lhs (stmt);
1160 else
1161 lhs = gimple_assign_lhs (stmt);
1163 gcc_assert (has_single_use (lhs));
1164 single_imm_use (lhs, &use, &use_stmt);
1165 if (lhs == *def)
1166 *def = op;
1167 SET_USE (use, op);
1168 if (TREE_CODE (op) != SSA_NAME)
1169 update_stmt (use_stmt);
1170 gsi = gsi_for_stmt (stmt);
1171 unlink_stmt_vdef (stmt);
1172 reassoc_remove_stmt (&gsi);
1173 release_defs (stmt);
1176 /* Walks the linear chain with result *DEF searching for an operation
1177 with operand OP and code OPCODE removing that from the chain. *DEF
1178 is updated if there is only one operand but no operation left. */
1180 static void
1181 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1183 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1187 tree name;
1189 if (opcode == MULT_EXPR
1190 && stmt_is_power_of_op (stmt, op))
1192 if (decrement_power (stmt) == 1)
1193 propagate_op_to_single_use (op, stmt, def);
1194 return;
1197 name = gimple_assign_rhs1 (stmt);
1199 /* If this is the operation we look for and one of the operands
1200 is ours simply propagate the other operand into the stmts
1201 single use. */
1202 if (gimple_assign_rhs_code (stmt) == opcode
1203 && (name == op
1204 || gimple_assign_rhs2 (stmt) == op))
1206 if (name == op)
1207 name = gimple_assign_rhs2 (stmt);
1208 propagate_op_to_single_use (name, stmt, def);
1209 return;
1212 /* We might have a multiply of two __builtin_pow* calls, and
1213 the operand might be hiding in the rightmost one. */
1214 if (opcode == MULT_EXPR
1215 && gimple_assign_rhs_code (stmt) == opcode
1216 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1217 && has_single_use (gimple_assign_rhs2 (stmt)))
1219 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1220 if (stmt_is_power_of_op (stmt2, op))
1222 if (decrement_power (stmt2) == 1)
1223 propagate_op_to_single_use (op, stmt2, def);
1224 return;
1228 /* Continue walking the chain. */
1229 gcc_assert (name != op
1230 && TREE_CODE (name) == SSA_NAME);
1231 stmt = SSA_NAME_DEF_STMT (name);
1233 while (1);
1236 /* Returns true if statement S1 dominates statement S2. Like
1237 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1239 static bool
1240 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1242 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1244 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1245 SSA_NAME. Assume it lives at the beginning of function and
1246 thus dominates everything. */
1247 if (!bb1 || s1 == s2)
1248 return true;
1250 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1251 if (!bb2)
1252 return false;
1254 if (bb1 == bb2)
1256 /* PHIs in the same basic block are assumed to be
1257 executed all in parallel, if only one stmt is a PHI,
1258 it dominates the other stmt in the same basic block. */
1259 if (gimple_code (s1) == GIMPLE_PHI)
1260 return true;
1262 if (gimple_code (s2) == GIMPLE_PHI)
1263 return false;
1265 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1267 if (gimple_uid (s1) < gimple_uid (s2))
1268 return true;
1270 if (gimple_uid (s1) > gimple_uid (s2))
1271 return false;
1273 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1274 unsigned int uid = gimple_uid (s1);
1275 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1277 gimple *s = gsi_stmt (gsi);
1278 if (gimple_uid (s) != uid)
1279 break;
1280 if (s == s2)
1281 return true;
1284 return false;
1287 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1290 /* Insert STMT after INSERT_POINT. */
1292 static void
1293 insert_stmt_after (gimple *stmt, gimple *insert_point)
1295 gimple_stmt_iterator gsi;
1296 basic_block bb;
1298 if (gimple_code (insert_point) == GIMPLE_PHI)
1299 bb = gimple_bb (insert_point);
1300 else if (!stmt_ends_bb_p (insert_point))
1302 gsi = gsi_for_stmt (insert_point);
1303 gimple_set_uid (stmt, gimple_uid (insert_point));
1304 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1305 return;
1307 else
1308 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1309 thus if it must end a basic block, it should be a call that can
1310 throw, or some assignment that can throw. If it throws, the LHS
1311 of it will not be initialized though, so only valid places using
1312 the SSA_NAME should be dominated by the fallthru edge. */
1313 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1314 gsi = gsi_after_labels (bb);
1315 if (gsi_end_p (gsi))
1317 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1318 gimple_set_uid (stmt,
1319 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1321 else
1322 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1323 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1326 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1327 the result. Places the statement after the definition of either
1328 OP1 or OP2. Returns the new statement. */
1330 static gimple *
1331 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1333 gimple *op1def = NULL, *op2def = NULL;
1334 gimple_stmt_iterator gsi;
1335 tree op;
1336 gassign *sum;
1338 /* Create the addition statement. */
1339 op = make_ssa_name (type);
1340 sum = gimple_build_assign (op, opcode, op1, op2);
1342 /* Find an insertion place and insert. */
1343 if (TREE_CODE (op1) == SSA_NAME)
1344 op1def = SSA_NAME_DEF_STMT (op1);
1345 if (TREE_CODE (op2) == SSA_NAME)
1346 op2def = SSA_NAME_DEF_STMT (op2);
1347 if ((!op1def || gimple_nop_p (op1def))
1348 && (!op2def || gimple_nop_p (op2def)))
1350 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1351 if (gsi_end_p (gsi))
1353 gimple_stmt_iterator gsi2
1354 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1355 gimple_set_uid (sum,
1356 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1358 else
1359 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1360 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1362 else
1364 gimple *insert_point;
1365 if ((!op1def || gimple_nop_p (op1def))
1366 || (op2def && !gimple_nop_p (op2def)
1367 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1368 insert_point = op2def;
1369 else
1370 insert_point = op1def;
1371 insert_stmt_after (sum, insert_point);
1373 update_stmt (sum);
1375 return sum;
1378 /* Perform un-distribution of divisions and multiplications.
1379 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1380 to (A + B) / X for real X.
1382 The algorithm is organized as follows.
1384 - First we walk the addition chain *OPS looking for summands that
1385 are defined by a multiplication or a real division. This results
1386 in the candidates bitmap with relevant indices into *OPS.
1388 - Second we build the chains of multiplications or divisions for
1389 these candidates, counting the number of occurrences of (operand, code)
1390 pairs in all of the candidates chains.
1392 - Third we sort the (operand, code) pairs by number of occurrence and
1393 process them starting with the pair with the most uses.
1395 * For each such pair we walk the candidates again to build a
1396 second candidate bitmap noting all multiplication/division chains
1397 that have at least one occurrence of (operand, code).
1399 * We build an alternate addition chain only covering these
1400 candidates with one (operand, code) operation removed from their
1401 multiplication/division chain.
1403 * The first candidate gets replaced by the alternate addition chain
1404 multiplied/divided by the operand.
1406 * All candidate chains get disabled for further processing and
1407 processing of (operand, code) pairs continues.
1409 The alternate addition chains built are re-processed by the main
1410 reassociation algorithm which allows optimizing a * x * y + b * y * x
1411 to (a + b ) * x * y in one invocation of the reassociation pass. */
1413 static bool
1414 undistribute_ops_list (enum tree_code opcode,
1415 vec<operand_entry *> *ops, struct loop *loop)
1417 unsigned int length = ops->length ();
1418 operand_entry *oe1;
1419 unsigned i, j;
1420 sbitmap candidates, candidates2;
1421 unsigned nr_candidates, nr_candidates2;
1422 sbitmap_iterator sbi0;
1423 vec<operand_entry *> *subops;
1424 bool changed = false;
1425 int next_oecount_id = 0;
1427 if (length <= 1
1428 || opcode != PLUS_EXPR)
1429 return false;
1431 /* Build a list of candidates to process. */
1432 candidates = sbitmap_alloc (length);
1433 bitmap_clear (candidates);
1434 nr_candidates = 0;
1435 FOR_EACH_VEC_ELT (*ops, i, oe1)
1437 enum tree_code dcode;
1438 gimple *oe1def;
1440 if (TREE_CODE (oe1->op) != SSA_NAME)
1441 continue;
1442 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1443 if (!is_gimple_assign (oe1def))
1444 continue;
1445 dcode = gimple_assign_rhs_code (oe1def);
1446 if ((dcode != MULT_EXPR
1447 && dcode != RDIV_EXPR)
1448 || !is_reassociable_op (oe1def, dcode, loop))
1449 continue;
1451 bitmap_set_bit (candidates, i);
1452 nr_candidates++;
1455 if (nr_candidates < 2)
1457 sbitmap_free (candidates);
1458 return false;
1461 if (dump_file && (dump_flags & TDF_DETAILS))
1463 fprintf (dump_file, "searching for un-distribute opportunities ");
1464 print_generic_expr (dump_file,
1465 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1466 fprintf (dump_file, " %d\n", nr_candidates);
1469 /* Build linearized sub-operand lists and the counting table. */
1470 cvec.create (0);
1472 hash_table<oecount_hasher> ctable (15);
1474 /* ??? Macro arguments cannot have multi-argument template types in
1475 them. This typedef is needed to workaround that limitation. */
1476 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1477 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1478 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1480 gimple *oedef;
1481 enum tree_code oecode;
1482 unsigned j;
1484 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1485 oecode = gimple_assign_rhs_code (oedef);
1486 linearize_expr_tree (&subops[i], oedef,
1487 associative_tree_code (oecode), false);
1489 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1491 oecount c;
1492 int *slot;
1493 int idx;
1494 c.oecode = oecode;
1495 c.cnt = 1;
1496 c.id = next_oecount_id++;
1497 c.op = oe1->op;
1498 cvec.safe_push (c);
1499 idx = cvec.length () + 41;
1500 slot = ctable.find_slot (idx, INSERT);
1501 if (!*slot)
1503 *slot = idx;
1505 else
1507 cvec.pop ();
1508 cvec[*slot - 42].cnt++;
1513 /* Sort the counting table. */
1514 cvec.qsort (oecount_cmp);
1516 if (dump_file && (dump_flags & TDF_DETAILS))
1518 oecount *c;
1519 fprintf (dump_file, "Candidates:\n");
1520 FOR_EACH_VEC_ELT (cvec, j, c)
1522 fprintf (dump_file, " %u %s: ", c->cnt,
1523 c->oecode == MULT_EXPR
1524 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1525 print_generic_expr (dump_file, c->op, 0);
1526 fprintf (dump_file, "\n");
1530 /* Process the (operand, code) pairs in order of most occurrence. */
1531 candidates2 = sbitmap_alloc (length);
1532 while (!cvec.is_empty ())
1534 oecount *c = &cvec.last ();
1535 if (c->cnt < 2)
1536 break;
1538 /* Now collect the operands in the outer chain that contain
1539 the common operand in their inner chain. */
1540 bitmap_clear (candidates2);
1541 nr_candidates2 = 0;
1542 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1544 gimple *oedef;
1545 enum tree_code oecode;
1546 unsigned j;
1547 tree op = (*ops)[i]->op;
1549 /* If we undistributed in this chain already this may be
1550 a constant. */
1551 if (TREE_CODE (op) != SSA_NAME)
1552 continue;
1554 oedef = SSA_NAME_DEF_STMT (op);
1555 oecode = gimple_assign_rhs_code (oedef);
1556 if (oecode != c->oecode)
1557 continue;
1559 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1561 if (oe1->op == c->op)
1563 bitmap_set_bit (candidates2, i);
1564 ++nr_candidates2;
1565 break;
1570 if (nr_candidates2 >= 2)
1572 operand_entry *oe1, *oe2;
1573 gimple *prod;
1574 int first = bitmap_first_set_bit (candidates2);
1576 /* Build the new addition chain. */
1577 oe1 = (*ops)[first];
1578 if (dump_file && (dump_flags & TDF_DETAILS))
1580 fprintf (dump_file, "Building (");
1581 print_generic_expr (dump_file, oe1->op, 0);
1583 zero_one_operation (&oe1->op, c->oecode, c->op);
1584 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1586 gimple *sum;
1587 oe2 = (*ops)[i];
1588 if (dump_file && (dump_flags & TDF_DETAILS))
1590 fprintf (dump_file, " + ");
1591 print_generic_expr (dump_file, oe2->op, 0);
1593 zero_one_operation (&oe2->op, c->oecode, c->op);
1594 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1595 oe1->op, oe2->op, opcode);
1596 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1597 oe2->rank = 0;
1598 oe1->op = gimple_get_lhs (sum);
1601 /* Apply the multiplication/division. */
1602 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1603 oe1->op, c->op, c->oecode);
1604 if (dump_file && (dump_flags & TDF_DETAILS))
1606 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1607 print_generic_expr (dump_file, c->op, 0);
1608 fprintf (dump_file, "\n");
1611 /* Record it in the addition chain and disable further
1612 undistribution with this op. */
1613 oe1->op = gimple_assign_lhs (prod);
1614 oe1->rank = get_rank (oe1->op);
1615 subops[first].release ();
1617 changed = true;
1620 cvec.pop ();
1623 for (i = 0; i < ops->length (); ++i)
1624 subops[i].release ();
1625 free (subops);
1626 cvec.release ();
1627 sbitmap_free (candidates);
1628 sbitmap_free (candidates2);
1630 return changed;
1633 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1634 expression, examine the other OPS to see if any of them are comparisons
1635 of the same values, which we may be able to combine or eliminate.
1636 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1638 static bool
1639 eliminate_redundant_comparison (enum tree_code opcode,
1640 vec<operand_entry *> *ops,
1641 unsigned int currindex,
1642 operand_entry *curr)
1644 tree op1, op2;
1645 enum tree_code lcode, rcode;
1646 gimple *def1, *def2;
1647 int i;
1648 operand_entry *oe;
1650 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1651 return false;
1653 /* Check that CURR is a comparison. */
1654 if (TREE_CODE (curr->op) != SSA_NAME)
1655 return false;
1656 def1 = SSA_NAME_DEF_STMT (curr->op);
1657 if (!is_gimple_assign (def1))
1658 return false;
1659 lcode = gimple_assign_rhs_code (def1);
1660 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1661 return false;
1662 op1 = gimple_assign_rhs1 (def1);
1663 op2 = gimple_assign_rhs2 (def1);
1665 /* Now look for a similar comparison in the remaining OPS. */
1666 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1668 tree t;
1670 if (TREE_CODE (oe->op) != SSA_NAME)
1671 continue;
1672 def2 = SSA_NAME_DEF_STMT (oe->op);
1673 if (!is_gimple_assign (def2))
1674 continue;
1675 rcode = gimple_assign_rhs_code (def2);
1676 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1677 continue;
1679 /* If we got here, we have a match. See if we can combine the
1680 two comparisons. */
1681 if (opcode == BIT_IOR_EXPR)
1682 t = maybe_fold_or_comparisons (lcode, op1, op2,
1683 rcode, gimple_assign_rhs1 (def2),
1684 gimple_assign_rhs2 (def2));
1685 else
1686 t = maybe_fold_and_comparisons (lcode, op1, op2,
1687 rcode, gimple_assign_rhs1 (def2),
1688 gimple_assign_rhs2 (def2));
1689 if (!t)
1690 continue;
1692 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1693 always give us a boolean_type_node value back. If the original
1694 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1695 we need to convert. */
1696 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1697 t = fold_convert (TREE_TYPE (curr->op), t);
1699 if (TREE_CODE (t) != INTEGER_CST
1700 && !operand_equal_p (t, curr->op, 0))
1702 enum tree_code subcode;
1703 tree newop1, newop2;
1704 if (!COMPARISON_CLASS_P (t))
1705 continue;
1706 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1707 STRIP_USELESS_TYPE_CONVERSION (newop1);
1708 STRIP_USELESS_TYPE_CONVERSION (newop2);
1709 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1710 continue;
1713 if (dump_file && (dump_flags & TDF_DETAILS))
1715 fprintf (dump_file, "Equivalence: ");
1716 print_generic_expr (dump_file, curr->op, 0);
1717 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1718 print_generic_expr (dump_file, oe->op, 0);
1719 fprintf (dump_file, " -> ");
1720 print_generic_expr (dump_file, t, 0);
1721 fprintf (dump_file, "\n");
1724 /* Now we can delete oe, as it has been subsumed by the new combined
1725 expression t. */
1726 ops->ordered_remove (i);
1727 reassociate_stats.ops_eliminated ++;
1729 /* If t is the same as curr->op, we're done. Otherwise we must
1730 replace curr->op with t. Special case is if we got a constant
1731 back, in which case we add it to the end instead of in place of
1732 the current entry. */
1733 if (TREE_CODE (t) == INTEGER_CST)
1735 ops->ordered_remove (currindex);
1736 add_to_ops_vec (ops, t);
1738 else if (!operand_equal_p (t, curr->op, 0))
1740 gimple *sum;
1741 enum tree_code subcode;
1742 tree newop1;
1743 tree newop2;
1744 gcc_assert (COMPARISON_CLASS_P (t));
1745 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1746 STRIP_USELESS_TYPE_CONVERSION (newop1);
1747 STRIP_USELESS_TYPE_CONVERSION (newop2);
1748 gcc_checking_assert (is_gimple_val (newop1)
1749 && is_gimple_val (newop2));
1750 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1751 curr->op = gimple_get_lhs (sum);
1753 return true;
1756 return false;
1759 /* Perform various identities and other optimizations on the list of
1760 operand entries, stored in OPS. The tree code for the binary
1761 operation between all the operands is OPCODE. */
1763 static void
1764 optimize_ops_list (enum tree_code opcode,
1765 vec<operand_entry *> *ops)
1767 unsigned int length = ops->length ();
1768 unsigned int i;
1769 operand_entry *oe;
1770 operand_entry *oelast = NULL;
1771 bool iterate = false;
1773 if (length == 1)
1774 return;
1776 oelast = ops->last ();
1778 /* If the last two are constants, pop the constants off, merge them
1779 and try the next two. */
1780 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1782 operand_entry *oelm1 = (*ops)[length - 2];
1784 if (oelm1->rank == 0
1785 && is_gimple_min_invariant (oelm1->op)
1786 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1787 TREE_TYPE (oelast->op)))
1789 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1790 oelm1->op, oelast->op);
1792 if (folded && is_gimple_min_invariant (folded))
1794 if (dump_file && (dump_flags & TDF_DETAILS))
1795 fprintf (dump_file, "Merging constants\n");
1797 ops->pop ();
1798 ops->pop ();
1800 add_to_ops_vec (ops, folded);
1801 reassociate_stats.constants_eliminated++;
1803 optimize_ops_list (opcode, ops);
1804 return;
1809 eliminate_using_constants (opcode, ops);
1810 oelast = NULL;
1812 for (i = 0; ops->iterate (i, &oe);)
1814 bool done = false;
1816 if (eliminate_not_pairs (opcode, ops, i, oe))
1817 return;
1818 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1819 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1820 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1822 if (done)
1823 return;
1824 iterate = true;
1825 oelast = NULL;
1826 continue;
1828 oelast = oe;
1829 i++;
1832 length = ops->length ();
1833 oelast = ops->last ();
1835 if (iterate)
1836 optimize_ops_list (opcode, ops);
1839 /* The following functions are subroutines to optimize_range_tests and allow
1840 it to try to change a logical combination of comparisons into a range
1841 test.
1843 For example, both
1844 X == 2 || X == 5 || X == 3 || X == 4
1846 X >= 2 && X <= 5
1847 are converted to
1848 (unsigned) (X - 2) <= 3
1850 For more information see comments above fold_test_range in fold-const.c,
1851 this implementation is for GIMPLE. */
1853 struct range_entry
1855 tree exp;
1856 tree low;
1857 tree high;
1858 bool in_p;
1859 bool strict_overflow_p;
1860 unsigned int idx, next;
1863 /* This is similar to make_range in fold-const.c, but on top of
1864 GIMPLE instead of trees. If EXP is non-NULL, it should be
1865 an SSA_NAME and STMT argument is ignored, otherwise STMT
1866 argument should be a GIMPLE_COND. */
1868 static void
1869 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
1871 int in_p;
1872 tree low, high;
1873 bool is_bool, strict_overflow_p;
1875 r->exp = NULL_TREE;
1876 r->in_p = false;
1877 r->strict_overflow_p = false;
1878 r->low = NULL_TREE;
1879 r->high = NULL_TREE;
1880 if (exp != NULL_TREE
1881 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1882 return;
1884 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1885 and see if we can refine the range. Some of the cases below may not
1886 happen, but it doesn't seem worth worrying about this. We "continue"
1887 the outer loop when we've changed something; otherwise we "break"
1888 the switch, which will "break" the while. */
1889 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1890 high = low;
1891 in_p = 0;
1892 strict_overflow_p = false;
1893 is_bool = false;
1894 if (exp == NULL_TREE)
1895 is_bool = true;
1896 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1898 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1899 is_bool = true;
1900 else
1901 return;
1903 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1904 is_bool = true;
1906 while (1)
1908 enum tree_code code;
1909 tree arg0, arg1, exp_type;
1910 tree nexp;
1911 location_t loc;
1913 if (exp != NULL_TREE)
1915 if (TREE_CODE (exp) != SSA_NAME
1916 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1917 break;
1919 stmt = SSA_NAME_DEF_STMT (exp);
1920 if (!is_gimple_assign (stmt))
1921 break;
1923 code = gimple_assign_rhs_code (stmt);
1924 arg0 = gimple_assign_rhs1 (stmt);
1925 arg1 = gimple_assign_rhs2 (stmt);
1926 exp_type = TREE_TYPE (exp);
1928 else
1930 code = gimple_cond_code (stmt);
1931 arg0 = gimple_cond_lhs (stmt);
1932 arg1 = gimple_cond_rhs (stmt);
1933 exp_type = boolean_type_node;
1936 if (TREE_CODE (arg0) != SSA_NAME)
1937 break;
1938 loc = gimple_location (stmt);
1939 switch (code)
1941 case BIT_NOT_EXPR:
1942 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1943 /* Ensure the range is either +[-,0], +[0,0],
1944 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1945 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1946 or similar expression of unconditional true or
1947 false, it should not be negated. */
1948 && ((high && integer_zerop (high))
1949 || (low && integer_onep (low))))
1951 in_p = !in_p;
1952 exp = arg0;
1953 continue;
1955 break;
1956 case SSA_NAME:
1957 exp = arg0;
1958 continue;
1959 CASE_CONVERT:
1960 if (is_bool)
1961 goto do_default;
1962 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1964 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1965 is_bool = true;
1966 else
1967 return;
1969 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1970 is_bool = true;
1971 goto do_default;
1972 case EQ_EXPR:
1973 case NE_EXPR:
1974 case LT_EXPR:
1975 case LE_EXPR:
1976 case GE_EXPR:
1977 case GT_EXPR:
1978 is_bool = true;
1979 /* FALLTHRU */
1980 default:
1981 if (!is_bool)
1982 return;
1983 do_default:
1984 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1985 &low, &high, &in_p,
1986 &strict_overflow_p);
1987 if (nexp != NULL_TREE)
1989 exp = nexp;
1990 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1991 continue;
1993 break;
1995 break;
1997 if (is_bool)
1999 r->exp = exp;
2000 r->in_p = in_p;
2001 r->low = low;
2002 r->high = high;
2003 r->strict_overflow_p = strict_overflow_p;
2007 /* Comparison function for qsort. Sort entries
2008 without SSA_NAME exp first, then with SSA_NAMEs sorted
2009 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2010 by increasing ->low and if ->low is the same, by increasing
2011 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2012 maximum. */
2014 static int
2015 range_entry_cmp (const void *a, const void *b)
2017 const struct range_entry *p = (const struct range_entry *) a;
2018 const struct range_entry *q = (const struct range_entry *) b;
2020 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2022 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2024 /* Group range_entries for the same SSA_NAME together. */
2025 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2026 return -1;
2027 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2028 return 1;
2029 /* If ->low is different, NULL low goes first, then by
2030 ascending low. */
2031 if (p->low != NULL_TREE)
2033 if (q->low != NULL_TREE)
2035 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2036 p->low, q->low);
2037 if (tem && integer_onep (tem))
2038 return -1;
2039 tem = fold_binary (GT_EXPR, boolean_type_node,
2040 p->low, q->low);
2041 if (tem && integer_onep (tem))
2042 return 1;
2044 else
2045 return 1;
2047 else if (q->low != NULL_TREE)
2048 return -1;
2049 /* If ->high is different, NULL high goes last, before that by
2050 ascending high. */
2051 if (p->high != NULL_TREE)
2053 if (q->high != NULL_TREE)
2055 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2056 p->high, q->high);
2057 if (tem && integer_onep (tem))
2058 return -1;
2059 tem = fold_binary (GT_EXPR, boolean_type_node,
2060 p->high, q->high);
2061 if (tem && integer_onep (tem))
2062 return 1;
2064 else
2065 return -1;
2067 else if (q->high != NULL_TREE)
2068 return 1;
2069 /* If both ranges are the same, sort below by ascending idx. */
2071 else
2072 return 1;
2074 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2075 return -1;
2077 if (p->idx < q->idx)
2078 return -1;
2079 else
2081 gcc_checking_assert (p->idx > q->idx);
2082 return 1;
2086 /* Helper routine of optimize_range_test.
2087 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2088 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2089 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2090 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2091 an array of COUNT pointers to other ranges. Return
2092 true if the range merge has been successful.
2093 If OPCODE is ERROR_MARK, this is called from within
2094 maybe_optimize_range_tests and is performing inter-bb range optimization.
2095 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2096 oe->rank. */
2098 static bool
2099 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2100 struct range_entry **otherrangep,
2101 unsigned int count, enum tree_code opcode,
2102 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2103 bool in_p, tree low, tree high, bool strict_overflow_p)
2105 operand_entry *oe = (*ops)[range->idx];
2106 tree op = oe->op;
2107 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2108 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2109 location_t loc = gimple_location (stmt);
2110 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2111 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2112 in_p, low, high);
2113 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2114 gimple_stmt_iterator gsi;
2115 unsigned int i, uid;
2117 if (tem == NULL_TREE)
2118 return false;
2120 /* If op is default def SSA_NAME, there is no place to insert the
2121 new comparison. Give up, unless we can use OP itself as the
2122 range test. */
2123 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2125 if (op == range->exp
2126 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2127 || TREE_CODE (optype) == BOOLEAN_TYPE)
2128 && (op == tem
2129 || (TREE_CODE (tem) == EQ_EXPR
2130 && TREE_OPERAND (tem, 0) == op
2131 && integer_onep (TREE_OPERAND (tem, 1))))
2132 && opcode != BIT_IOR_EXPR
2133 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2135 stmt = NULL;
2136 tem = op;
2138 else
2139 return false;
2142 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2143 warning_at (loc, OPT_Wstrict_overflow,
2144 "assuming signed overflow does not occur "
2145 "when simplifying range test");
2147 if (dump_file && (dump_flags & TDF_DETAILS))
2149 struct range_entry *r;
2150 fprintf (dump_file, "Optimizing range tests ");
2151 print_generic_expr (dump_file, range->exp, 0);
2152 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2153 print_generic_expr (dump_file, range->low, 0);
2154 fprintf (dump_file, ", ");
2155 print_generic_expr (dump_file, range->high, 0);
2156 fprintf (dump_file, "]");
2157 for (i = 0; i < count; i++)
2159 if (otherrange)
2160 r = otherrange + i;
2161 else
2162 r = otherrangep[i];
2163 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2164 print_generic_expr (dump_file, r->low, 0);
2165 fprintf (dump_file, ", ");
2166 print_generic_expr (dump_file, r->high, 0);
2167 fprintf (dump_file, "]");
2169 fprintf (dump_file, "\n into ");
2170 print_generic_expr (dump_file, tem, 0);
2171 fprintf (dump_file, "\n");
2174 if (opcode == BIT_IOR_EXPR
2175 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2176 tem = invert_truthvalue_loc (loc, tem);
2178 tem = fold_convert_loc (loc, optype, tem);
2179 if (stmt)
2181 gsi = gsi_for_stmt (stmt);
2182 uid = gimple_uid (stmt);
2184 else
2186 gsi = gsi_none ();
2187 uid = 0;
2189 if (stmt == NULL)
2190 gcc_checking_assert (tem == op);
2191 /* In rare cases range->exp can be equal to lhs of stmt.
2192 In that case we have to insert after the stmt rather then before
2193 it. If stmt is a PHI, insert it at the start of the basic block. */
2194 else if (op != range->exp)
2196 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2197 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2198 GSI_SAME_STMT);
2199 gsi_prev (&gsi);
2201 else if (gimple_code (stmt) != GIMPLE_PHI)
2203 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2204 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2205 GSI_CONTINUE_LINKING);
2207 else
2209 gsi = gsi_after_labels (gimple_bb (stmt));
2210 if (!gsi_end_p (gsi))
2211 uid = gimple_uid (gsi_stmt (gsi));
2212 else
2214 gsi = gsi_start_bb (gimple_bb (stmt));
2215 uid = 1;
2216 while (!gsi_end_p (gsi))
2218 uid = gimple_uid (gsi_stmt (gsi));
2219 gsi_next (&gsi);
2222 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2223 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2224 GSI_SAME_STMT);
2225 if (gsi_end_p (gsi))
2226 gsi = gsi_last_bb (gimple_bb (stmt));
2227 else
2228 gsi_prev (&gsi);
2230 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2231 if (gimple_uid (gsi_stmt (gsi)))
2232 break;
2233 else
2234 gimple_set_uid (gsi_stmt (gsi), uid);
2236 oe->op = tem;
2237 range->exp = exp;
2238 range->low = low;
2239 range->high = high;
2240 range->in_p = in_p;
2241 range->strict_overflow_p = false;
2243 for (i = 0; i < count; i++)
2245 if (otherrange)
2246 range = otherrange + i;
2247 else
2248 range = otherrangep[i];
2249 oe = (*ops)[range->idx];
2250 /* Now change all the other range test immediate uses, so that
2251 those tests will be optimized away. */
2252 if (opcode == ERROR_MARK)
2254 if (oe->op)
2255 oe->op = build_int_cst (TREE_TYPE (oe->op),
2256 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2257 else
2258 oe->op = (oe->rank == BIT_IOR_EXPR
2259 ? boolean_false_node : boolean_true_node);
2261 else
2262 oe->op = error_mark_node;
2263 range->exp = NULL_TREE;
2265 return true;
2268 /* Optimize X == CST1 || X == CST2
2269 if popcount (CST1 ^ CST2) == 1 into
2270 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2271 Similarly for ranges. E.g.
2272 X != 2 && X != 3 && X != 10 && X != 11
2273 will be transformed by the previous optimization into
2274 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2275 and this loop can transform that into
2276 !(((X & ~8) - 2U) <= 1U). */
2278 static bool
2279 optimize_range_tests_xor (enum tree_code opcode, tree type,
2280 tree lowi, tree lowj, tree highi, tree highj,
2281 vec<operand_entry *> *ops,
2282 struct range_entry *rangei,
2283 struct range_entry *rangej)
2285 tree lowxor, highxor, tem, exp;
2286 /* Check lowi ^ lowj == highi ^ highj and
2287 popcount (lowi ^ lowj) == 1. */
2288 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2289 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2290 return false;
2291 if (!integer_pow2p (lowxor))
2292 return false;
2293 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2294 if (!tree_int_cst_equal (lowxor, highxor))
2295 return false;
2297 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2298 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2299 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2300 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2301 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2302 NULL, rangei->in_p, lowj, highj,
2303 rangei->strict_overflow_p
2304 || rangej->strict_overflow_p))
2305 return true;
2306 return false;
2309 /* Optimize X == CST1 || X == CST2
2310 if popcount (CST2 - CST1) == 1 into
2311 ((X - CST1) & ~(CST2 - CST1)) == 0.
2312 Similarly for ranges. E.g.
2313 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2314 || X == 75 || X == 45
2315 will be transformed by the previous optimization into
2316 (X - 43U) <= 3U || (X - 75U) <= 3U
2317 and this loop can transform that into
2318 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2319 static bool
2320 optimize_range_tests_diff (enum tree_code opcode, tree type,
2321 tree lowi, tree lowj, tree highi, tree highj,
2322 vec<operand_entry *> *ops,
2323 struct range_entry *rangei,
2324 struct range_entry *rangej)
2326 tree tem1, tem2, mask;
2327 /* Check highi - lowi == highj - lowj. */
2328 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2329 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2330 return false;
2331 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2332 if (!tree_int_cst_equal (tem1, tem2))
2333 return false;
2334 /* Check popcount (lowj - lowi) == 1. */
2335 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2336 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2337 return false;
2338 if (!integer_pow2p (tem1))
2339 return false;
2341 type = unsigned_type_for (type);
2342 tem1 = fold_convert (type, tem1);
2343 tem2 = fold_convert (type, tem2);
2344 lowi = fold_convert (type, lowi);
2345 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2346 tem1 = fold_binary (MINUS_EXPR, type,
2347 fold_convert (type, rangei->exp), lowi);
2348 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2349 lowj = build_int_cst (type, 0);
2350 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2351 NULL, rangei->in_p, lowj, tem2,
2352 rangei->strict_overflow_p
2353 || rangej->strict_overflow_p))
2354 return true;
2355 return false;
2358 /* It does some common checks for function optimize_range_tests_xor and
2359 optimize_range_tests_diff.
2360 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2361 Else it calls optimize_range_tests_diff. */
2363 static bool
2364 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2365 bool optimize_xor, vec<operand_entry *> *ops,
2366 struct range_entry *ranges)
2368 int i, j;
2369 bool any_changes = false;
2370 for (i = first; i < length; i++)
2372 tree lowi, highi, lowj, highj, type, tem;
2374 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2375 continue;
2376 type = TREE_TYPE (ranges[i].exp);
2377 if (!INTEGRAL_TYPE_P (type))
2378 continue;
2379 lowi = ranges[i].low;
2380 if (lowi == NULL_TREE)
2381 lowi = TYPE_MIN_VALUE (type);
2382 highi = ranges[i].high;
2383 if (highi == NULL_TREE)
2384 continue;
2385 for (j = i + 1; j < length && j < i + 64; j++)
2387 bool changes;
2388 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2389 continue;
2390 lowj = ranges[j].low;
2391 if (lowj == NULL_TREE)
2392 continue;
2393 highj = ranges[j].high;
2394 if (highj == NULL_TREE)
2395 highj = TYPE_MAX_VALUE (type);
2396 /* Check lowj > highi. */
2397 tem = fold_binary (GT_EXPR, boolean_type_node,
2398 lowj, highi);
2399 if (tem == NULL_TREE || !integer_onep (tem))
2400 continue;
2401 if (optimize_xor)
2402 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2403 highi, highj, ops,
2404 ranges + i, ranges + j);
2405 else
2406 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2407 highi, highj, ops,
2408 ranges + i, ranges + j);
2409 if (changes)
2411 any_changes = true;
2412 break;
2416 return any_changes;
2419 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2420 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2421 EXP on success, NULL otherwise. */
2423 static tree
2424 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2425 wide_int *mask, tree *totallowp)
2427 tree tem = int_const_binop (MINUS_EXPR, high, low);
2428 if (tem == NULL_TREE
2429 || TREE_CODE (tem) != INTEGER_CST
2430 || TREE_OVERFLOW (tem)
2431 || tree_int_cst_sgn (tem) == -1
2432 || compare_tree_int (tem, prec) != -1)
2433 return NULL_TREE;
2435 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2436 *mask = wi::shifted_mask (0, max, false, prec);
2437 if (TREE_CODE (exp) == BIT_AND_EXPR
2438 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2440 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2441 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2442 if (wi::popcount (msk) == 1
2443 && wi::ltu_p (msk, prec - max))
2445 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2446 max += msk.to_uhwi ();
2447 exp = TREE_OPERAND (exp, 0);
2448 if (integer_zerop (low)
2449 && TREE_CODE (exp) == PLUS_EXPR
2450 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2452 tree ret = TREE_OPERAND (exp, 0);
2453 STRIP_NOPS (ret);
2454 widest_int bias
2455 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2456 TYPE_PRECISION (TREE_TYPE (low))));
2457 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2458 if (totallowp)
2460 *totallowp = tbias;
2461 return ret;
2463 else if (!tree_int_cst_lt (totallow, tbias))
2464 return NULL_TREE;
2465 bias = wi::to_widest (tbias);
2466 bias -= wi::to_widest (totallow);
2467 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2469 *mask = wi::lshift (*mask, bias);
2470 return ret;
2475 if (totallowp)
2476 return exp;
2477 if (!tree_int_cst_lt (totallow, low))
2478 return exp;
2479 tem = int_const_binop (MINUS_EXPR, low, totallow);
2480 if (tem == NULL_TREE
2481 || TREE_CODE (tem) != INTEGER_CST
2482 || TREE_OVERFLOW (tem)
2483 || compare_tree_int (tem, prec - max) == 1)
2484 return NULL_TREE;
2486 *mask = wi::lshift (*mask, wi::to_widest (tem));
2487 return exp;
2490 /* Attempt to optimize small range tests using bit test.
2491 E.g.
2492 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2493 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2494 has been by earlier optimizations optimized into:
2495 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2496 As all the 43 through 82 range is less than 64 numbers,
2497 for 64-bit word targets optimize that into:
2498 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2500 static bool
2501 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2502 vec<operand_entry *> *ops,
2503 struct range_entry *ranges)
2505 int i, j;
2506 bool any_changes = false;
2507 int prec = GET_MODE_BITSIZE (word_mode);
2508 auto_vec<struct range_entry *, 64> candidates;
2510 for (i = first; i < length - 2; i++)
2512 tree lowi, highi, lowj, highj, type;
2514 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2515 continue;
2516 type = TREE_TYPE (ranges[i].exp);
2517 if (!INTEGRAL_TYPE_P (type))
2518 continue;
2519 lowi = ranges[i].low;
2520 if (lowi == NULL_TREE)
2521 lowi = TYPE_MIN_VALUE (type);
2522 highi = ranges[i].high;
2523 if (highi == NULL_TREE)
2524 continue;
2525 wide_int mask;
2526 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2527 highi, &mask, &lowi);
2528 if (exp == NULL_TREE)
2529 continue;
2530 bool strict_overflow_p = ranges[i].strict_overflow_p;
2531 candidates.truncate (0);
2532 int end = MIN (i + 64, length);
2533 for (j = i + 1; j < end; j++)
2535 tree exp2;
2536 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2537 continue;
2538 if (ranges[j].exp == exp)
2540 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2542 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2543 if (exp2 == exp)
2545 else if (TREE_CODE (exp2) == PLUS_EXPR)
2547 exp2 = TREE_OPERAND (exp2, 0);
2548 STRIP_NOPS (exp2);
2549 if (exp2 != exp)
2550 continue;
2552 else
2553 continue;
2555 else
2556 continue;
2557 lowj = ranges[j].low;
2558 if (lowj == NULL_TREE)
2559 continue;
2560 highj = ranges[j].high;
2561 if (highj == NULL_TREE)
2562 highj = TYPE_MAX_VALUE (type);
2563 wide_int mask2;
2564 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2565 highj, &mask2, NULL);
2566 if (exp2 != exp)
2567 continue;
2568 mask |= mask2;
2569 strict_overflow_p |= ranges[j].strict_overflow_p;
2570 candidates.safe_push (&ranges[j]);
2573 /* If we need otherwise 3 or more comparisons, use a bit test. */
2574 if (candidates.length () >= 2)
2576 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2577 wi::to_widest (lowi)
2578 + prec - 1 - wi::clz (mask));
2579 operand_entry *oe = (*ops)[ranges[i].idx];
2580 tree op = oe->op;
2581 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2582 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2583 location_t loc = gimple_location (stmt);
2584 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2586 /* See if it isn't cheaper to pretend the minimum value of the
2587 range is 0, if maximum value is small enough.
2588 We can avoid then subtraction of the minimum value, but the
2589 mask constant could be perhaps more expensive. */
2590 if (compare_tree_int (lowi, 0) > 0
2591 && compare_tree_int (high, prec) < 0)
2593 int cost_diff;
2594 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2595 rtx reg = gen_raw_REG (word_mode, 10000);
2596 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2597 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2598 GEN_INT (-m)), speed_p);
2599 rtx r = immed_wide_int_const (mask, word_mode);
2600 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2601 word_mode, speed_p);
2602 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2603 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2604 word_mode, speed_p);
2605 if (cost_diff > 0)
2607 mask = wi::lshift (mask, m);
2608 lowi = build_zero_cst (TREE_TYPE (lowi));
2612 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2613 false, lowi, high);
2614 if (tem == NULL_TREE || is_gimple_val (tem))
2615 continue;
2616 tree etype = unsigned_type_for (TREE_TYPE (exp));
2617 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2618 fold_convert_loc (loc, etype, exp),
2619 fold_convert_loc (loc, etype, lowi));
2620 exp = fold_convert_loc (loc, integer_type_node, exp);
2621 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2622 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2623 build_int_cst (word_type, 1), exp);
2624 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2625 wide_int_to_tree (word_type, mask));
2626 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2627 build_zero_cst (word_type));
2628 if (is_gimple_val (exp))
2629 continue;
2631 /* The shift might have undefined behavior if TEM is true,
2632 but reassociate_bb isn't prepared to have basic blocks
2633 split when it is running. So, temporarily emit a code
2634 with BIT_IOR_EXPR instead of &&, and fix it up in
2635 branch_fixup. */
2636 gimple_seq seq;
2637 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2638 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2639 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2640 gimple_seq seq2;
2641 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2642 gimple_seq_add_seq_without_update (&seq, seq2);
2643 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2644 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2645 gimple *g = gimple_build_assign (make_ssa_name (optype),
2646 BIT_IOR_EXPR, tem, exp);
2647 gimple_set_location (g, loc);
2648 gimple_seq_add_stmt_without_update (&seq, g);
2649 exp = gimple_assign_lhs (g);
2650 tree val = build_zero_cst (optype);
2651 if (update_range_test (&ranges[i], NULL, candidates.address (),
2652 candidates.length (), opcode, ops, exp,
2653 seq, false, val, val, strict_overflow_p))
2655 any_changes = true;
2656 reassoc_branch_fixups.safe_push (tem);
2658 else
2659 gimple_seq_discard (seq);
2662 return any_changes;
2665 /* Optimize range tests, similarly how fold_range_test optimizes
2666 it on trees. The tree code for the binary
2667 operation between all the operands is OPCODE.
2668 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2669 maybe_optimize_range_tests for inter-bb range optimization.
2670 In that case if oe->op is NULL, oe->id is bb->index whose
2671 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2672 the actual opcode. */
2674 static bool
2675 optimize_range_tests (enum tree_code opcode,
2676 vec<operand_entry *> *ops)
2678 unsigned int length = ops->length (), i, j, first;
2679 operand_entry *oe;
2680 struct range_entry *ranges;
2681 bool any_changes = false;
2683 if (length == 1)
2684 return false;
2686 ranges = XNEWVEC (struct range_entry, length);
2687 for (i = 0; i < length; i++)
2689 oe = (*ops)[i];
2690 ranges[i].idx = i;
2691 init_range_entry (ranges + i, oe->op,
2692 oe->op
2693 ? NULL
2694 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2695 /* For | invert it now, we will invert it again before emitting
2696 the optimized expression. */
2697 if (opcode == BIT_IOR_EXPR
2698 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2699 ranges[i].in_p = !ranges[i].in_p;
2702 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2703 for (i = 0; i < length; i++)
2704 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2705 break;
2707 /* Try to merge ranges. */
2708 for (first = i; i < length; i++)
2710 tree low = ranges[i].low;
2711 tree high = ranges[i].high;
2712 int in_p = ranges[i].in_p;
2713 bool strict_overflow_p = ranges[i].strict_overflow_p;
2714 int update_fail_count = 0;
2716 for (j = i + 1; j < length; j++)
2718 if (ranges[i].exp != ranges[j].exp)
2719 break;
2720 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2721 ranges[j].in_p, ranges[j].low, ranges[j].high))
2722 break;
2723 strict_overflow_p |= ranges[j].strict_overflow_p;
2726 if (j == i + 1)
2727 continue;
2729 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2730 opcode, ops, ranges[i].exp, NULL, in_p,
2731 low, high, strict_overflow_p))
2733 i = j - 1;
2734 any_changes = true;
2736 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2737 while update_range_test would fail. */
2738 else if (update_fail_count == 64)
2739 i = j - 1;
2740 else
2741 ++update_fail_count;
2744 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2745 ops, ranges);
2747 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2748 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2749 ops, ranges);
2750 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2751 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2752 ops, ranges);
2754 if (any_changes && opcode != ERROR_MARK)
2756 j = 0;
2757 FOR_EACH_VEC_ELT (*ops, i, oe)
2759 if (oe->op == error_mark_node)
2760 continue;
2761 else if (i != j)
2762 (*ops)[j] = oe;
2763 j++;
2765 ops->truncate (j);
2768 XDELETEVEC (ranges);
2769 return any_changes;
2772 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
2773 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
2774 otherwise the comparison code. */
2776 static tree_code
2777 ovce_extract_ops (tree var, gassign **rets, bool *reti)
2779 if (TREE_CODE (var) != SSA_NAME)
2780 return ERROR_MARK;
2782 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
2783 if (stmt == NULL)
2784 return ERROR_MARK;
2786 /* ??? If we start creating more COND_EXPR, we could perform
2787 this same optimization with them. For now, simplify. */
2788 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
2789 return ERROR_MARK;
2791 tree cond = gimple_assign_rhs1 (stmt);
2792 tree_code cmp = TREE_CODE (cond);
2793 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
2794 return ERROR_MARK;
2796 /* ??? For now, allow only canonical true and false result vectors.
2797 We could expand this to other constants should the need arise,
2798 but at the moment we don't create them. */
2799 tree t = gimple_assign_rhs2 (stmt);
2800 tree f = gimple_assign_rhs3 (stmt);
2801 bool inv;
2802 if (integer_all_onesp (t))
2803 inv = false;
2804 else if (integer_all_onesp (f))
2806 cmp = invert_tree_comparison (cmp, false);
2807 inv = true;
2809 else
2810 return ERROR_MARK;
2811 if (!integer_zerop (f))
2812 return ERROR_MARK;
2814 /* Success! */
2815 if (rets)
2816 *rets = stmt;
2817 if (reti)
2818 *reti = inv;
2819 return cmp;
2822 /* Optimize the condition of VEC_COND_EXPRs which have been combined
2823 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
2825 static bool
2826 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
2828 unsigned int length = ops->length (), i, j;
2829 bool any_changes = false;
2831 if (length == 1)
2832 return false;
2834 for (i = 0; i < length; ++i)
2836 tree elt0 = (*ops)[i]->op;
2838 gassign *stmt0;
2839 bool invert;
2840 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
2841 if (cmp0 == ERROR_MARK)
2842 continue;
2844 for (j = i + 1; j < length; ++j)
2846 tree &elt1 = (*ops)[j]->op;
2848 gassign *stmt1;
2849 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
2850 if (cmp1 == ERROR_MARK)
2851 continue;
2853 tree cond0 = gimple_assign_rhs1 (stmt0);
2854 tree x0 = TREE_OPERAND (cond0, 0);
2855 tree y0 = TREE_OPERAND (cond0, 1);
2857 tree cond1 = gimple_assign_rhs1 (stmt1);
2858 tree x1 = TREE_OPERAND (cond1, 0);
2859 tree y1 = TREE_OPERAND (cond1, 1);
2861 tree comb;
2862 if (opcode == BIT_AND_EXPR)
2863 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2864 else if (opcode == BIT_IOR_EXPR)
2865 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2866 else
2867 gcc_unreachable ();
2868 if (comb == NULL)
2869 continue;
2871 /* Success! */
2872 if (dump_file && (dump_flags & TDF_DETAILS))
2874 fprintf (dump_file, "Transforming ");
2875 print_generic_expr (dump_file, cond0, 0);
2876 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
2877 print_generic_expr (dump_file, cond1, 0);
2878 fprintf (dump_file, " into ");
2879 print_generic_expr (dump_file, comb, 0);
2880 fputc ('\n', dump_file);
2883 gimple_assign_set_rhs1 (stmt0, comb);
2884 if (invert)
2885 std::swap (*gimple_assign_rhs2_ptr (stmt0),
2886 *gimple_assign_rhs3_ptr (stmt0));
2887 update_stmt (stmt0);
2889 elt1 = error_mark_node;
2890 any_changes = true;
2894 if (any_changes)
2896 operand_entry *oe;
2897 j = 0;
2898 FOR_EACH_VEC_ELT (*ops, i, oe)
2900 if (oe->op == error_mark_node)
2901 continue;
2902 else if (i != j)
2903 (*ops)[j] = oe;
2904 j++;
2906 ops->truncate (j);
2909 return any_changes;
2912 /* Return true if STMT is a cast like:
2913 <bb N>:
2915 _123 = (int) _234;
2917 <bb M>:
2918 # _345 = PHI <_123(N), 1(...), 1(...)>
2919 where _234 has bool type, _123 has single use and
2920 bb N has a single successor M. This is commonly used in
2921 the last block of a range test. */
2923 static bool
2924 final_range_test_p (gimple *stmt)
2926 basic_block bb, rhs_bb;
2927 edge e;
2928 tree lhs, rhs;
2929 use_operand_p use_p;
2930 gimple *use_stmt;
2932 if (!gimple_assign_cast_p (stmt))
2933 return false;
2934 bb = gimple_bb (stmt);
2935 if (!single_succ_p (bb))
2936 return false;
2937 e = single_succ_edge (bb);
2938 if (e->flags & EDGE_COMPLEX)
2939 return false;
2941 lhs = gimple_assign_lhs (stmt);
2942 rhs = gimple_assign_rhs1 (stmt);
2943 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2944 || TREE_CODE (rhs) != SSA_NAME
2945 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2946 return false;
2948 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2949 if (!single_imm_use (lhs, &use_p, &use_stmt))
2950 return false;
2952 if (gimple_code (use_stmt) != GIMPLE_PHI
2953 || gimple_bb (use_stmt) != e->dest)
2954 return false;
2956 /* And that the rhs is defined in the same loop. */
2957 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2958 if (rhs_bb == NULL
2959 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2960 return false;
2962 return true;
2965 /* Return true if BB is suitable basic block for inter-bb range test
2966 optimization. If BACKWARD is true, BB should be the only predecessor
2967 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2968 or compared with to find a common basic block to which all conditions
2969 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2970 be the only predecessor of BB. */
2972 static bool
2973 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2974 bool backward)
2976 edge_iterator ei, ei2;
2977 edge e, e2;
2978 gimple *stmt;
2979 gphi_iterator gsi;
2980 bool other_edge_seen = false;
2981 bool is_cond;
2983 if (test_bb == bb)
2984 return false;
2985 /* Check last stmt first. */
2986 stmt = last_stmt (bb);
2987 if (stmt == NULL
2988 || (gimple_code (stmt) != GIMPLE_COND
2989 && (backward || !final_range_test_p (stmt)))
2990 || gimple_visited_p (stmt)
2991 || stmt_could_throw_p (stmt)
2992 || *other_bb == bb)
2993 return false;
2994 is_cond = gimple_code (stmt) == GIMPLE_COND;
2995 if (is_cond)
2997 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2998 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2999 to *OTHER_BB (if not set yet, try to find it out). */
3000 if (EDGE_COUNT (bb->succs) != 2)
3001 return false;
3002 FOR_EACH_EDGE (e, ei, bb->succs)
3004 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3005 return false;
3006 if (e->dest == test_bb)
3008 if (backward)
3009 continue;
3010 else
3011 return false;
3013 if (e->dest == bb)
3014 return false;
3015 if (*other_bb == NULL)
3017 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3018 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3019 return false;
3020 else if (e->dest == e2->dest)
3021 *other_bb = e->dest;
3022 if (*other_bb == NULL)
3023 return false;
3025 if (e->dest == *other_bb)
3026 other_edge_seen = true;
3027 else if (backward)
3028 return false;
3030 if (*other_bb == NULL || !other_edge_seen)
3031 return false;
3033 else if (single_succ (bb) != *other_bb)
3034 return false;
3036 /* Now check all PHIs of *OTHER_BB. */
3037 e = find_edge (bb, *other_bb);
3038 e2 = find_edge (test_bb, *other_bb);
3039 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3041 gphi *phi = gsi.phi ();
3042 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3043 corresponding to BB and TEST_BB predecessor must be the same. */
3044 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3045 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3047 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3048 one of the PHIs should have the lhs of the last stmt in
3049 that block as PHI arg and that PHI should have 0 or 1
3050 corresponding to it in all other range test basic blocks
3051 considered. */
3052 if (!is_cond)
3054 if (gimple_phi_arg_def (phi, e->dest_idx)
3055 == gimple_assign_lhs (stmt)
3056 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3057 || integer_onep (gimple_phi_arg_def (phi,
3058 e2->dest_idx))))
3059 continue;
3061 else
3063 gimple *test_last = last_stmt (test_bb);
3064 if (gimple_code (test_last) != GIMPLE_COND
3065 && gimple_phi_arg_def (phi, e2->dest_idx)
3066 == gimple_assign_lhs (test_last)
3067 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3068 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3069 continue;
3072 return false;
3075 return true;
3078 /* Return true if BB doesn't have side-effects that would disallow
3079 range test optimization, all SSA_NAMEs set in the bb are consumed
3080 in the bb and there are no PHIs. */
3082 static bool
3083 no_side_effect_bb (basic_block bb)
3085 gimple_stmt_iterator gsi;
3086 gimple *last;
3088 if (!gimple_seq_empty_p (phi_nodes (bb)))
3089 return false;
3090 last = last_stmt (bb);
3091 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3093 gimple *stmt = gsi_stmt (gsi);
3094 tree lhs;
3095 imm_use_iterator imm_iter;
3096 use_operand_p use_p;
3098 if (is_gimple_debug (stmt))
3099 continue;
3100 if (gimple_has_side_effects (stmt))
3101 return false;
3102 if (stmt == last)
3103 return true;
3104 if (!is_gimple_assign (stmt))
3105 return false;
3106 lhs = gimple_assign_lhs (stmt);
3107 if (TREE_CODE (lhs) != SSA_NAME)
3108 return false;
3109 if (gimple_assign_rhs_could_trap_p (stmt))
3110 return false;
3111 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3113 gimple *use_stmt = USE_STMT (use_p);
3114 if (is_gimple_debug (use_stmt))
3115 continue;
3116 if (gimple_bb (use_stmt) != bb)
3117 return false;
3120 return false;
3123 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3124 return true and fill in *OPS recursively. */
3126 static bool
3127 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3128 struct loop *loop)
3130 gimple *stmt = SSA_NAME_DEF_STMT (var);
3131 tree rhs[2];
3132 int i;
3134 if (!is_reassociable_op (stmt, code, loop))
3135 return false;
3137 rhs[0] = gimple_assign_rhs1 (stmt);
3138 rhs[1] = gimple_assign_rhs2 (stmt);
3139 gimple_set_visited (stmt, true);
3140 for (i = 0; i < 2; i++)
3141 if (TREE_CODE (rhs[i]) == SSA_NAME
3142 && !get_ops (rhs[i], code, ops, loop)
3143 && has_single_use (rhs[i]))
3145 operand_entry *oe = operand_entry_pool.allocate ();
3147 oe->op = rhs[i];
3148 oe->rank = code;
3149 oe->id = 0;
3150 oe->count = 1;
3151 ops->safe_push (oe);
3153 return true;
3156 /* Find the ops that were added by get_ops starting from VAR, see if
3157 they were changed during update_range_test and if yes, create new
3158 stmts. */
3160 static tree
3161 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3162 unsigned int *pidx, struct loop *loop)
3164 gimple *stmt = SSA_NAME_DEF_STMT (var);
3165 tree rhs[4];
3166 int i;
3168 if (!is_reassociable_op (stmt, code, loop))
3169 return NULL;
3171 rhs[0] = gimple_assign_rhs1 (stmt);
3172 rhs[1] = gimple_assign_rhs2 (stmt);
3173 rhs[2] = rhs[0];
3174 rhs[3] = rhs[1];
3175 for (i = 0; i < 2; i++)
3176 if (TREE_CODE (rhs[i]) == SSA_NAME)
3178 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3179 if (rhs[2 + i] == NULL_TREE)
3181 if (has_single_use (rhs[i]))
3182 rhs[2 + i] = ops[(*pidx)++]->op;
3183 else
3184 rhs[2 + i] = rhs[i];
3187 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3188 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3190 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3191 var = make_ssa_name (TREE_TYPE (var));
3192 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3193 rhs[2], rhs[3]);
3194 gimple_set_uid (g, gimple_uid (stmt));
3195 gimple_set_visited (g, true);
3196 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3198 return var;
3201 /* Structure to track the initial value passed to get_ops and
3202 the range in the ops vector for each basic block. */
3204 struct inter_bb_range_test_entry
3206 tree op;
3207 unsigned int first_idx, last_idx;
3210 /* Inter-bb range test optimization.
3212 Returns TRUE if a gimple conditional is optimized to a true/false,
3213 otherwise return FALSE.
3215 This indicates to the caller that it should run a CFG cleanup pass
3216 once reassociation is completed. */
3218 static bool
3219 maybe_optimize_range_tests (gimple *stmt)
3221 basic_block first_bb = gimple_bb (stmt);
3222 basic_block last_bb = first_bb;
3223 basic_block other_bb = NULL;
3224 basic_block bb;
3225 edge_iterator ei;
3226 edge e;
3227 auto_vec<operand_entry *> ops;
3228 auto_vec<inter_bb_range_test_entry> bbinfo;
3229 bool any_changes = false;
3230 bool cfg_cleanup_needed = false;
3232 /* Consider only basic blocks that end with GIMPLE_COND or
3233 a cast statement satisfying final_range_test_p. All
3234 but the last bb in the first_bb .. last_bb range
3235 should end with GIMPLE_COND. */
3236 if (gimple_code (stmt) == GIMPLE_COND)
3238 if (EDGE_COUNT (first_bb->succs) != 2)
3239 return cfg_cleanup_needed;
3241 else if (final_range_test_p (stmt))
3242 other_bb = single_succ (first_bb);
3243 else
3244 return cfg_cleanup_needed;
3246 if (stmt_could_throw_p (stmt))
3247 return cfg_cleanup_needed;
3249 /* As relative ordering of post-dominator sons isn't fixed,
3250 maybe_optimize_range_tests can be called first on any
3251 bb in the range we want to optimize. So, start searching
3252 backwards, if first_bb can be set to a predecessor. */
3253 while (single_pred_p (first_bb))
3255 basic_block pred_bb = single_pred (first_bb);
3256 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3257 break;
3258 if (!no_side_effect_bb (first_bb))
3259 break;
3260 first_bb = pred_bb;
3262 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3263 Before starting forward search in last_bb successors, find
3264 out the other_bb. */
3265 if (first_bb == last_bb)
3267 other_bb = NULL;
3268 /* As non-GIMPLE_COND last stmt always terminates the range,
3269 if forward search didn't discover anything, just give up. */
3270 if (gimple_code (stmt) != GIMPLE_COND)
3271 return cfg_cleanup_needed;
3272 /* Look at both successors. Either it ends with a GIMPLE_COND
3273 and satisfies suitable_cond_bb, or ends with a cast and
3274 other_bb is that cast's successor. */
3275 FOR_EACH_EDGE (e, ei, first_bb->succs)
3276 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3277 || e->dest == first_bb)
3278 return cfg_cleanup_needed;
3279 else if (single_pred_p (e->dest))
3281 stmt = last_stmt (e->dest);
3282 if (stmt
3283 && gimple_code (stmt) == GIMPLE_COND
3284 && EDGE_COUNT (e->dest->succs) == 2)
3286 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3287 break;
3288 else
3289 other_bb = NULL;
3291 else if (stmt
3292 && final_range_test_p (stmt)
3293 && find_edge (first_bb, single_succ (e->dest)))
3295 other_bb = single_succ (e->dest);
3296 if (other_bb == first_bb)
3297 other_bb = NULL;
3300 if (other_bb == NULL)
3301 return cfg_cleanup_needed;
3303 /* Now do the forward search, moving last_bb to successor bbs
3304 that aren't other_bb. */
3305 while (EDGE_COUNT (last_bb->succs) == 2)
3307 FOR_EACH_EDGE (e, ei, last_bb->succs)
3308 if (e->dest != other_bb)
3309 break;
3310 if (e == NULL)
3311 break;
3312 if (!single_pred_p (e->dest))
3313 break;
3314 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3315 break;
3316 if (!no_side_effect_bb (e->dest))
3317 break;
3318 last_bb = e->dest;
3320 if (first_bb == last_bb)
3321 return cfg_cleanup_needed;
3322 /* Here basic blocks first_bb through last_bb's predecessor
3323 end with GIMPLE_COND, all of them have one of the edges to
3324 other_bb and another to another block in the range,
3325 all blocks except first_bb don't have side-effects and
3326 last_bb ends with either GIMPLE_COND, or cast satisfying
3327 final_range_test_p. */
3328 for (bb = last_bb; ; bb = single_pred (bb))
3330 enum tree_code code;
3331 tree lhs, rhs;
3332 inter_bb_range_test_entry bb_ent;
3334 bb_ent.op = NULL_TREE;
3335 bb_ent.first_idx = ops.length ();
3336 bb_ent.last_idx = bb_ent.first_idx;
3337 e = find_edge (bb, other_bb);
3338 stmt = last_stmt (bb);
3339 gimple_set_visited (stmt, true);
3340 if (gimple_code (stmt) != GIMPLE_COND)
3342 use_operand_p use_p;
3343 gimple *phi;
3344 edge e2;
3345 unsigned int d;
3347 lhs = gimple_assign_lhs (stmt);
3348 rhs = gimple_assign_rhs1 (stmt);
3349 gcc_assert (bb == last_bb);
3351 /* stmt is
3352 _123 = (int) _234;
3354 followed by:
3355 <bb M>:
3356 # _345 = PHI <_123(N), 1(...), 1(...)>
3358 or 0 instead of 1. If it is 0, the _234
3359 range test is anded together with all the
3360 other range tests, if it is 1, it is ored with
3361 them. */
3362 single_imm_use (lhs, &use_p, &phi);
3363 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3364 e2 = find_edge (first_bb, other_bb);
3365 d = e2->dest_idx;
3366 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3367 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3368 code = BIT_AND_EXPR;
3369 else
3371 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3372 code = BIT_IOR_EXPR;
3375 /* If _234 SSA_NAME_DEF_STMT is
3376 _234 = _567 | _789;
3377 (or &, corresponding to 1/0 in the phi arguments,
3378 push into ops the individual range test arguments
3379 of the bitwise or resp. and, recursively. */
3380 if (!get_ops (rhs, code, &ops,
3381 loop_containing_stmt (stmt))
3382 && has_single_use (rhs))
3384 /* Otherwise, push the _234 range test itself. */
3385 operand_entry *oe = operand_entry_pool.allocate ();
3387 oe->op = rhs;
3388 oe->rank = code;
3389 oe->id = 0;
3390 oe->count = 1;
3391 ops.safe_push (oe);
3392 bb_ent.last_idx++;
3394 else
3395 bb_ent.last_idx = ops.length ();
3396 bb_ent.op = rhs;
3397 bbinfo.safe_push (bb_ent);
3398 continue;
3400 /* Otherwise stmt is GIMPLE_COND. */
3401 code = gimple_cond_code (stmt);
3402 lhs = gimple_cond_lhs (stmt);
3403 rhs = gimple_cond_rhs (stmt);
3404 if (TREE_CODE (lhs) == SSA_NAME
3405 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3406 && ((code != EQ_EXPR && code != NE_EXPR)
3407 || rhs != boolean_false_node
3408 /* Either push into ops the individual bitwise
3409 or resp. and operands, depending on which
3410 edge is other_bb. */
3411 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3412 ^ (code == EQ_EXPR))
3413 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3414 loop_containing_stmt (stmt))))
3416 /* Or push the GIMPLE_COND stmt itself. */
3417 operand_entry *oe = operand_entry_pool.allocate ();
3419 oe->op = NULL;
3420 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3421 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3422 /* oe->op = NULL signs that there is no SSA_NAME
3423 for the range test, and oe->id instead is the
3424 basic block number, at which's end the GIMPLE_COND
3425 is. */
3426 oe->id = bb->index;
3427 oe->count = 1;
3428 ops.safe_push (oe);
3429 bb_ent.op = NULL;
3430 bb_ent.last_idx++;
3432 else if (ops.length () > bb_ent.first_idx)
3434 bb_ent.op = lhs;
3435 bb_ent.last_idx = ops.length ();
3437 bbinfo.safe_push (bb_ent);
3438 if (bb == first_bb)
3439 break;
3441 if (ops.length () > 1)
3442 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3443 if (any_changes)
3445 unsigned int idx, max_idx = 0;
3446 /* update_ops relies on has_single_use predicates returning the
3447 same values as it did during get_ops earlier. Additionally it
3448 never removes statements, only adds new ones and it should walk
3449 from the single imm use and check the predicate already before
3450 making those changes.
3451 On the other side, the handling of GIMPLE_COND directly can turn
3452 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3453 it needs to be done in a separate loop afterwards. */
3454 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3456 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3457 && bbinfo[idx].op != NULL_TREE)
3459 tree new_op;
3461 max_idx = idx;
3462 stmt = last_stmt (bb);
3463 new_op = update_ops (bbinfo[idx].op,
3464 (enum tree_code)
3465 ops[bbinfo[idx].first_idx]->rank,
3466 ops, &bbinfo[idx].first_idx,
3467 loop_containing_stmt (stmt));
3468 if (new_op == NULL_TREE)
3470 gcc_assert (bb == last_bb);
3471 new_op = ops[bbinfo[idx].first_idx++]->op;
3473 if (bbinfo[idx].op != new_op)
3475 imm_use_iterator iter;
3476 use_operand_p use_p;
3477 gimple *use_stmt, *cast_stmt = NULL;
3479 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3480 if (is_gimple_debug (use_stmt))
3481 continue;
3482 else if (gimple_code (use_stmt) == GIMPLE_COND
3483 || gimple_code (use_stmt) == GIMPLE_PHI)
3484 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3485 SET_USE (use_p, new_op);
3486 else if (gimple_assign_cast_p (use_stmt))
3487 cast_stmt = use_stmt;
3488 else
3489 gcc_unreachable ();
3490 if (cast_stmt)
3492 gcc_assert (bb == last_bb);
3493 tree lhs = gimple_assign_lhs (cast_stmt);
3494 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3495 enum tree_code rhs_code
3496 = gimple_assign_rhs_code (cast_stmt);
3497 gassign *g;
3498 if (is_gimple_min_invariant (new_op))
3500 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3501 g = gimple_build_assign (new_lhs, new_op);
3503 else
3504 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3505 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3506 gimple_set_uid (g, gimple_uid (cast_stmt));
3507 gimple_set_visited (g, true);
3508 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3509 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3510 if (is_gimple_debug (use_stmt))
3511 continue;
3512 else if (gimple_code (use_stmt) == GIMPLE_COND
3513 || gimple_code (use_stmt) == GIMPLE_PHI)
3514 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3515 SET_USE (use_p, new_lhs);
3516 else
3517 gcc_unreachable ();
3521 if (bb == first_bb)
3522 break;
3524 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3526 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3527 && bbinfo[idx].op == NULL_TREE
3528 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3530 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3532 if (idx > max_idx)
3533 max_idx = idx;
3535 /* If we collapse the conditional to a true/false
3536 condition, then bubble that knowledge up to our caller. */
3537 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3539 gimple_cond_make_false (cond_stmt);
3540 cfg_cleanup_needed = true;
3542 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3544 gimple_cond_make_true (cond_stmt);
3545 cfg_cleanup_needed = true;
3547 else
3549 gimple_cond_set_code (cond_stmt, NE_EXPR);
3550 gimple_cond_set_lhs (cond_stmt,
3551 ops[bbinfo[idx].first_idx]->op);
3552 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3554 update_stmt (cond_stmt);
3556 if (bb == first_bb)
3557 break;
3560 /* The above changes could result in basic blocks after the first
3561 modified one, up to and including last_bb, to be executed even if
3562 they would not be in the original program. If the value ranges of
3563 assignment lhs' in those bbs were dependent on the conditions
3564 guarding those basic blocks which now can change, the VRs might
3565 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
3566 are only used within the same bb, it should be not a big deal if
3567 we just reset all the VRs in those bbs. See PR68671. */
3568 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
3569 reset_flow_sensitive_info_in_bb (bb);
3571 return cfg_cleanup_needed;
3574 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3575 of STMT in it's operands. This is also known as a "destructive
3576 update" operation. */
3578 static bool
3579 is_phi_for_stmt (gimple *stmt, tree operand)
3581 gimple *def_stmt;
3582 gphi *def_phi;
3583 tree lhs;
3584 use_operand_p arg_p;
3585 ssa_op_iter i;
3587 if (TREE_CODE (operand) != SSA_NAME)
3588 return false;
3590 lhs = gimple_assign_lhs (stmt);
3592 def_stmt = SSA_NAME_DEF_STMT (operand);
3593 def_phi = dyn_cast <gphi *> (def_stmt);
3594 if (!def_phi)
3595 return false;
3597 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3598 if (lhs == USE_FROM_PTR (arg_p))
3599 return true;
3600 return false;
3603 /* Remove def stmt of VAR if VAR has zero uses and recurse
3604 on rhs1 operand if so. */
3606 static void
3607 remove_visited_stmt_chain (tree var)
3609 gimple *stmt;
3610 gimple_stmt_iterator gsi;
3612 while (1)
3614 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3615 return;
3616 stmt = SSA_NAME_DEF_STMT (var);
3617 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3619 var = gimple_assign_rhs1 (stmt);
3620 gsi = gsi_for_stmt (stmt);
3621 reassoc_remove_stmt (&gsi);
3622 release_defs (stmt);
3624 else
3625 return;
3629 /* This function checks three consequtive operands in
3630 passed operands vector OPS starting from OPINDEX and
3631 swaps two operands if it is profitable for binary operation
3632 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3634 We pair ops with the same rank if possible.
3636 The alternative we try is to see if STMT is a destructive
3637 update style statement, which is like:
3638 b = phi (a, ...)
3639 a = c + b;
3640 In that case, we want to use the destructive update form to
3641 expose the possible vectorizer sum reduction opportunity.
3642 In that case, the third operand will be the phi node. This
3643 check is not performed if STMT is null.
3645 We could, of course, try to be better as noted above, and do a
3646 lot of work to try to find these opportunities in >3 operand
3647 cases, but it is unlikely to be worth it. */
3649 static void
3650 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
3651 unsigned int opindex, gimple *stmt)
3653 operand_entry *oe1, *oe2, *oe3;
3655 oe1 = ops[opindex];
3656 oe2 = ops[opindex + 1];
3657 oe3 = ops[opindex + 2];
3659 if ((oe1->rank == oe2->rank
3660 && oe2->rank != oe3->rank)
3661 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3662 && !is_phi_for_stmt (stmt, oe1->op)
3663 && !is_phi_for_stmt (stmt, oe2->op)))
3665 operand_entry temp = *oe3;
3666 oe3->op = oe1->op;
3667 oe3->rank = oe1->rank;
3668 oe1->op = temp.op;
3669 oe1->rank= temp.rank;
3671 else if ((oe1->rank == oe3->rank
3672 && oe2->rank != oe3->rank)
3673 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3674 && !is_phi_for_stmt (stmt, oe1->op)
3675 && !is_phi_for_stmt (stmt, oe3->op)))
3677 operand_entry temp = *oe2;
3678 oe2->op = oe1->op;
3679 oe2->rank = oe1->rank;
3680 oe1->op = temp.op;
3681 oe1->rank = temp.rank;
3685 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3686 two definitions, otherwise return STMT. */
3688 static inline gimple *
3689 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
3691 if (TREE_CODE (rhs1) == SSA_NAME
3692 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3693 stmt = SSA_NAME_DEF_STMT (rhs1);
3694 if (TREE_CODE (rhs2) == SSA_NAME
3695 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3696 stmt = SSA_NAME_DEF_STMT (rhs2);
3697 return stmt;
3700 /* Recursively rewrite our linearized statements so that the operators
3701 match those in OPS[OPINDEX], putting the computation in rank
3702 order. Return new lhs. */
3704 static tree
3705 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
3706 vec<operand_entry *> ops, bool changed)
3708 tree rhs1 = gimple_assign_rhs1 (stmt);
3709 tree rhs2 = gimple_assign_rhs2 (stmt);
3710 tree lhs = gimple_assign_lhs (stmt);
3711 operand_entry *oe;
3713 /* The final recursion case for this function is that you have
3714 exactly two operations left.
3715 If we had exactly one op in the entire list to start with, we
3716 would have never called this function, and the tail recursion
3717 rewrites them one at a time. */
3718 if (opindex + 2 == ops.length ())
3720 operand_entry *oe1, *oe2;
3722 oe1 = ops[opindex];
3723 oe2 = ops[opindex + 1];
3725 if (rhs1 != oe1->op || rhs2 != oe2->op)
3727 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3728 unsigned int uid = gimple_uid (stmt);
3730 if (dump_file && (dump_flags & TDF_DETAILS))
3732 fprintf (dump_file, "Transforming ");
3733 print_gimple_stmt (dump_file, stmt, 0, 0);
3736 /* Even when changed is false, reassociation could have e.g. removed
3737 some redundant operations, so unless we are just swapping the
3738 arguments or unless there is no change at all (then we just
3739 return lhs), force creation of a new SSA_NAME. */
3740 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3742 gimple *insert_point
3743 = find_insert_point (stmt, oe1->op, oe2->op);
3744 lhs = make_ssa_name (TREE_TYPE (lhs));
3745 stmt
3746 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3747 oe1->op, oe2->op);
3748 gimple_set_uid (stmt, uid);
3749 gimple_set_visited (stmt, true);
3750 if (insert_point == gsi_stmt (gsi))
3751 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3752 else
3753 insert_stmt_after (stmt, insert_point);
3755 else
3757 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3758 == stmt);
3759 gimple_assign_set_rhs1 (stmt, oe1->op);
3760 gimple_assign_set_rhs2 (stmt, oe2->op);
3761 update_stmt (stmt);
3764 if (rhs1 != oe1->op && rhs1 != oe2->op)
3765 remove_visited_stmt_chain (rhs1);
3767 if (dump_file && (dump_flags & TDF_DETAILS))
3769 fprintf (dump_file, " into ");
3770 print_gimple_stmt (dump_file, stmt, 0, 0);
3773 return lhs;
3776 /* If we hit here, we should have 3 or more ops left. */
3777 gcc_assert (opindex + 2 < ops.length ());
3779 /* Rewrite the next operator. */
3780 oe = ops[opindex];
3782 /* Recurse on the LHS of the binary operator, which is guaranteed to
3783 be the non-leaf side. */
3784 tree new_rhs1
3785 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3786 changed || oe->op != rhs2);
3788 if (oe->op != rhs2 || new_rhs1 != rhs1)
3790 if (dump_file && (dump_flags & TDF_DETAILS))
3792 fprintf (dump_file, "Transforming ");
3793 print_gimple_stmt (dump_file, stmt, 0, 0);
3796 /* If changed is false, this is either opindex == 0
3797 or all outer rhs2's were equal to corresponding oe->op,
3798 and powi_result is NULL.
3799 That means lhs is equivalent before and after reassociation.
3800 Otherwise ensure the old lhs SSA_NAME is not reused and
3801 create a new stmt as well, so that any debug stmts will be
3802 properly adjusted. */
3803 if (changed)
3805 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3806 unsigned int uid = gimple_uid (stmt);
3807 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3809 lhs = make_ssa_name (TREE_TYPE (lhs));
3810 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3811 new_rhs1, oe->op);
3812 gimple_set_uid (stmt, uid);
3813 gimple_set_visited (stmt, true);
3814 if (insert_point == gsi_stmt (gsi))
3815 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3816 else
3817 insert_stmt_after (stmt, insert_point);
3819 else
3821 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3822 == stmt);
3823 gimple_assign_set_rhs1 (stmt, new_rhs1);
3824 gimple_assign_set_rhs2 (stmt, oe->op);
3825 update_stmt (stmt);
3828 if (dump_file && (dump_flags & TDF_DETAILS))
3830 fprintf (dump_file, " into ");
3831 print_gimple_stmt (dump_file, stmt, 0, 0);
3834 return lhs;
3837 /* Find out how many cycles we need to compute statements chain.
3838 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3839 maximum number of independent statements we may execute per cycle. */
3841 static int
3842 get_required_cycles (int ops_num, int cpu_width)
3844 int res;
3845 int elog;
3846 unsigned int rest;
3848 /* While we have more than 2 * cpu_width operands
3849 we may reduce number of operands by cpu_width
3850 per cycle. */
3851 res = ops_num / (2 * cpu_width);
3853 /* Remained operands count may be reduced twice per cycle
3854 until we have only one operand. */
3855 rest = (unsigned)(ops_num - res * cpu_width);
3856 elog = exact_log2 (rest);
3857 if (elog >= 0)
3858 res += elog;
3859 else
3860 res += floor_log2 (rest) + 1;
3862 return res;
3865 /* Returns an optimal number of registers to use for computation of
3866 given statements. */
3868 static int
3869 get_reassociation_width (int ops_num, enum tree_code opc,
3870 machine_mode mode)
3872 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3873 int width;
3874 int width_min;
3875 int cycles_best;
3877 if (param_width > 0)
3878 width = param_width;
3879 else
3880 width = targetm.sched.reassociation_width (opc, mode);
3882 if (width == 1)
3883 return width;
3885 /* Get the minimal time required for sequence computation. */
3886 cycles_best = get_required_cycles (ops_num, width);
3888 /* Check if we may use less width and still compute sequence for
3889 the same time. It will allow us to reduce registers usage.
3890 get_required_cycles is monotonically increasing with lower width
3891 so we can perform a binary search for the minimal width that still
3892 results in the optimal cycle count. */
3893 width_min = 1;
3894 while (width > width_min)
3896 int width_mid = (width + width_min) / 2;
3898 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3899 width = width_mid;
3900 else if (width_min < width_mid)
3901 width_min = width_mid;
3902 else
3903 break;
3906 return width;
3909 /* Recursively rewrite our linearized statements so that the operators
3910 match those in OPS[OPINDEX], putting the computation in rank
3911 order and trying to allow operations to be executed in
3912 parallel. */
3914 static void
3915 rewrite_expr_tree_parallel (gassign *stmt, int width,
3916 vec<operand_entry *> ops)
3918 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3919 int op_num = ops.length ();
3920 int stmt_num = op_num - 1;
3921 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
3922 int op_index = op_num - 1;
3923 int stmt_index = 0;
3924 int ready_stmts_end = 0;
3925 int i = 0;
3926 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3928 /* We start expression rewriting from the top statements.
3929 So, in this loop we create a full list of statements
3930 we will work with. */
3931 stmts[stmt_num - 1] = stmt;
3932 for (i = stmt_num - 2; i >= 0; i--)
3933 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3935 for (i = 0; i < stmt_num; i++)
3937 tree op1, op2;
3939 /* Determine whether we should use results of
3940 already handled statements or not. */
3941 if (ready_stmts_end == 0
3942 && (i - stmt_index >= width || op_index < 1))
3943 ready_stmts_end = i;
3945 /* Now we choose operands for the next statement. Non zero
3946 value in ready_stmts_end means here that we should use
3947 the result of already generated statements as new operand. */
3948 if (ready_stmts_end > 0)
3950 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3951 if (ready_stmts_end > stmt_index)
3952 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3953 else if (op_index >= 0)
3954 op2 = ops[op_index--]->op;
3955 else
3957 gcc_assert (stmt_index < i);
3958 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3961 if (stmt_index >= ready_stmts_end)
3962 ready_stmts_end = 0;
3964 else
3966 if (op_index > 1)
3967 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3968 op2 = ops[op_index--]->op;
3969 op1 = ops[op_index--]->op;
3972 /* If we emit the last statement then we should put
3973 operands into the last statement. It will also
3974 break the loop. */
3975 if (op_index < 0 && stmt_index == i)
3976 i = stmt_num - 1;
3978 if (dump_file && (dump_flags & TDF_DETAILS))
3980 fprintf (dump_file, "Transforming ");
3981 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3984 /* We keep original statement only for the last one. All
3985 others are recreated. */
3986 if (i == stmt_num - 1)
3988 gimple_assign_set_rhs1 (stmts[i], op1);
3989 gimple_assign_set_rhs2 (stmts[i], op2);
3990 update_stmt (stmts[i]);
3992 else
3993 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3995 if (dump_file && (dump_flags & TDF_DETAILS))
3997 fprintf (dump_file, " into ");
3998 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4002 remove_visited_stmt_chain (last_rhs1);
4005 /* Transform STMT, which is really (A +B) + (C + D) into the left
4006 linear form, ((A+B)+C)+D.
4007 Recurse on D if necessary. */
4009 static void
4010 linearize_expr (gimple *stmt)
4012 gimple_stmt_iterator gsi;
4013 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4014 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4015 gimple *oldbinrhs = binrhs;
4016 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4017 gimple *newbinrhs = NULL;
4018 struct loop *loop = loop_containing_stmt (stmt);
4019 tree lhs = gimple_assign_lhs (stmt);
4021 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4022 && is_reassociable_op (binrhs, rhscode, loop));
4024 gsi = gsi_for_stmt (stmt);
4026 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4027 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4028 gimple_assign_rhs_code (binrhs),
4029 gimple_assign_lhs (binlhs),
4030 gimple_assign_rhs2 (binrhs));
4031 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4032 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4033 gimple_set_uid (binrhs, gimple_uid (stmt));
4035 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4036 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4038 if (dump_file && (dump_flags & TDF_DETAILS))
4040 fprintf (dump_file, "Linearized: ");
4041 print_gimple_stmt (dump_file, stmt, 0, 0);
4044 reassociate_stats.linearized++;
4045 update_stmt (stmt);
4047 gsi = gsi_for_stmt (oldbinrhs);
4048 reassoc_remove_stmt (&gsi);
4049 release_defs (oldbinrhs);
4051 gimple_set_visited (stmt, true);
4052 gimple_set_visited (binlhs, true);
4053 gimple_set_visited (binrhs, true);
4055 /* Tail recurse on the new rhs if it still needs reassociation. */
4056 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4057 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4058 want to change the algorithm while converting to tuples. */
4059 linearize_expr (stmt);
4062 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4063 it. Otherwise, return NULL. */
4065 static gimple *
4066 get_single_immediate_use (tree lhs)
4068 use_operand_p immuse;
4069 gimple *immusestmt;
4071 if (TREE_CODE (lhs) == SSA_NAME
4072 && single_imm_use (lhs, &immuse, &immusestmt)
4073 && is_gimple_assign (immusestmt))
4074 return immusestmt;
4076 return NULL;
4079 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4080 representing the negated value. Insertions of any necessary
4081 instructions go before GSI.
4082 This function is recursive in that, if you hand it "a_5" as the
4083 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4084 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4086 static tree
4087 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4089 gimple *negatedefstmt = NULL;
4090 tree resultofnegate;
4091 gimple_stmt_iterator gsi;
4092 unsigned int uid;
4094 /* If we are trying to negate a name, defined by an add, negate the
4095 add operands instead. */
4096 if (TREE_CODE (tonegate) == SSA_NAME)
4097 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4098 if (TREE_CODE (tonegate) == SSA_NAME
4099 && is_gimple_assign (negatedefstmt)
4100 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4101 && has_single_use (gimple_assign_lhs (negatedefstmt))
4102 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4104 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4105 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4106 tree lhs = gimple_assign_lhs (negatedefstmt);
4107 gimple *g;
4109 gsi = gsi_for_stmt (negatedefstmt);
4110 rhs1 = negate_value (rhs1, &gsi);
4112 gsi = gsi_for_stmt (negatedefstmt);
4113 rhs2 = negate_value (rhs2, &gsi);
4115 gsi = gsi_for_stmt (negatedefstmt);
4116 lhs = make_ssa_name (TREE_TYPE (lhs));
4117 gimple_set_visited (negatedefstmt, true);
4118 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4119 gimple_set_uid (g, gimple_uid (negatedefstmt));
4120 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4121 return lhs;
4124 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4125 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4126 NULL_TREE, true, GSI_SAME_STMT);
4127 gsi = *gsip;
4128 uid = gimple_uid (gsi_stmt (gsi));
4129 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4131 gimple *stmt = gsi_stmt (gsi);
4132 if (gimple_uid (stmt) != 0)
4133 break;
4134 gimple_set_uid (stmt, uid);
4136 return resultofnegate;
4139 /* Return true if we should break up the subtract in STMT into an add
4140 with negate. This is true when we the subtract operands are really
4141 adds, or the subtract itself is used in an add expression. In
4142 either case, breaking up the subtract into an add with negate
4143 exposes the adds to reassociation. */
4145 static bool
4146 should_break_up_subtract (gimple *stmt)
4148 tree lhs = gimple_assign_lhs (stmt);
4149 tree binlhs = gimple_assign_rhs1 (stmt);
4150 tree binrhs = gimple_assign_rhs2 (stmt);
4151 gimple *immusestmt;
4152 struct loop *loop = loop_containing_stmt (stmt);
4154 if (TREE_CODE (binlhs) == SSA_NAME
4155 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4156 return true;
4158 if (TREE_CODE (binrhs) == SSA_NAME
4159 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4160 return true;
4162 if (TREE_CODE (lhs) == SSA_NAME
4163 && (immusestmt = get_single_immediate_use (lhs))
4164 && is_gimple_assign (immusestmt)
4165 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4166 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4167 return true;
4168 return false;
4171 /* Transform STMT from A - B into A + -B. */
4173 static void
4174 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4176 tree rhs1 = gimple_assign_rhs1 (stmt);
4177 tree rhs2 = gimple_assign_rhs2 (stmt);
4179 if (dump_file && (dump_flags & TDF_DETAILS))
4181 fprintf (dump_file, "Breaking up subtract ");
4182 print_gimple_stmt (dump_file, stmt, 0, 0);
4185 rhs2 = negate_value (rhs2, gsip);
4186 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4187 update_stmt (stmt);
4190 /* Determine whether STMT is a builtin call that raises an SSA name
4191 to an integer power and has only one use. If so, and this is early
4192 reassociation and unsafe math optimizations are permitted, place
4193 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4194 If any of these conditions does not hold, return FALSE. */
4196 static bool
4197 acceptable_pow_call (gimple *stmt, tree *base, HOST_WIDE_INT *exponent)
4199 tree arg1;
4200 REAL_VALUE_TYPE c, cint;
4202 if (!reassoc_insert_powi_p
4203 || !flag_unsafe_math_optimizations
4204 || !is_gimple_call (stmt)
4205 || !has_single_use (gimple_call_lhs (stmt)))
4206 return false;
4208 switch (gimple_call_combined_fn (stmt))
4210 CASE_CFN_POW:
4211 if (flag_errno_math)
4212 return false;
4214 *base = gimple_call_arg (stmt, 0);
4215 arg1 = gimple_call_arg (stmt, 1);
4217 if (TREE_CODE (arg1) != REAL_CST)
4218 return false;
4220 c = TREE_REAL_CST (arg1);
4222 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4223 return false;
4225 *exponent = real_to_integer (&c);
4226 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4227 if (!real_identical (&c, &cint))
4228 return false;
4230 break;
4232 CASE_CFN_POWI:
4233 *base = gimple_call_arg (stmt, 0);
4234 arg1 = gimple_call_arg (stmt, 1);
4236 if (!tree_fits_shwi_p (arg1))
4237 return false;
4239 *exponent = tree_to_shwi (arg1);
4240 break;
4242 default:
4243 return false;
4246 /* Expanding negative exponents is generally unproductive, so we don't
4247 complicate matters with those. Exponents of zero and one should
4248 have been handled by expression folding. */
4249 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4250 return false;
4252 return true;
4255 /* Recursively linearize a binary expression that is the RHS of STMT.
4256 Place the operands of the expression tree in the vector named OPS. */
4258 static void
4259 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4260 bool is_associative, bool set_visited)
4262 tree binlhs = gimple_assign_rhs1 (stmt);
4263 tree binrhs = gimple_assign_rhs2 (stmt);
4264 gimple *binlhsdef = NULL, *binrhsdef = NULL;
4265 bool binlhsisreassoc = false;
4266 bool binrhsisreassoc = false;
4267 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4268 struct loop *loop = loop_containing_stmt (stmt);
4269 tree base = NULL_TREE;
4270 HOST_WIDE_INT exponent = 0;
4272 if (set_visited)
4273 gimple_set_visited (stmt, true);
4275 if (TREE_CODE (binlhs) == SSA_NAME)
4277 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4278 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4279 && !stmt_could_throw_p (binlhsdef));
4282 if (TREE_CODE (binrhs) == SSA_NAME)
4284 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4285 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4286 && !stmt_could_throw_p (binrhsdef));
4289 /* If the LHS is not reassociable, but the RHS is, we need to swap
4290 them. If neither is reassociable, there is nothing we can do, so
4291 just put them in the ops vector. If the LHS is reassociable,
4292 linearize it. If both are reassociable, then linearize the RHS
4293 and the LHS. */
4295 if (!binlhsisreassoc)
4297 /* If this is not a associative operation like division, give up. */
4298 if (!is_associative)
4300 add_to_ops_vec (ops, binrhs);
4301 return;
4304 if (!binrhsisreassoc)
4306 if (rhscode == MULT_EXPR
4307 && TREE_CODE (binrhs) == SSA_NAME
4308 && acceptable_pow_call (binrhsdef, &base, &exponent))
4310 add_repeat_to_ops_vec (ops, base, exponent);
4311 gimple_set_visited (binrhsdef, true);
4313 else
4314 add_to_ops_vec (ops, binrhs);
4316 if (rhscode == MULT_EXPR
4317 && TREE_CODE (binlhs) == SSA_NAME
4318 && acceptable_pow_call (binlhsdef, &base, &exponent))
4320 add_repeat_to_ops_vec (ops, base, exponent);
4321 gimple_set_visited (binlhsdef, true);
4323 else
4324 add_to_ops_vec (ops, binlhs);
4326 return;
4329 if (dump_file && (dump_flags & TDF_DETAILS))
4331 fprintf (dump_file, "swapping operands of ");
4332 print_gimple_stmt (dump_file, stmt, 0, 0);
4335 swap_ssa_operands (stmt,
4336 gimple_assign_rhs1_ptr (stmt),
4337 gimple_assign_rhs2_ptr (stmt));
4338 update_stmt (stmt);
4340 if (dump_file && (dump_flags & TDF_DETAILS))
4342 fprintf (dump_file, " is now ");
4343 print_gimple_stmt (dump_file, stmt, 0, 0);
4346 /* We want to make it so the lhs is always the reassociative op,
4347 so swap. */
4348 std::swap (binlhs, binrhs);
4350 else if (binrhsisreassoc)
4352 linearize_expr (stmt);
4353 binlhs = gimple_assign_rhs1 (stmt);
4354 binrhs = gimple_assign_rhs2 (stmt);
4357 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4358 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4359 rhscode, loop));
4360 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4361 is_associative, set_visited);
4363 if (rhscode == MULT_EXPR
4364 && TREE_CODE (binrhs) == SSA_NAME
4365 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4367 add_repeat_to_ops_vec (ops, base, exponent);
4368 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4370 else
4371 add_to_ops_vec (ops, binrhs);
4374 /* Repropagate the negates back into subtracts, since no other pass
4375 currently does it. */
4377 static void
4378 repropagate_negates (void)
4380 unsigned int i = 0;
4381 tree negate;
4383 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4385 gimple *user = get_single_immediate_use (negate);
4387 if (!user || !is_gimple_assign (user))
4388 continue;
4390 /* The negate operand can be either operand of a PLUS_EXPR
4391 (it can be the LHS if the RHS is a constant for example).
4393 Force the negate operand to the RHS of the PLUS_EXPR, then
4394 transform the PLUS_EXPR into a MINUS_EXPR. */
4395 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4397 /* If the negated operand appears on the LHS of the
4398 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4399 to force the negated operand to the RHS of the PLUS_EXPR. */
4400 if (gimple_assign_rhs1 (user) == negate)
4402 swap_ssa_operands (user,
4403 gimple_assign_rhs1_ptr (user),
4404 gimple_assign_rhs2_ptr (user));
4407 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4408 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4409 if (gimple_assign_rhs2 (user) == negate)
4411 tree rhs1 = gimple_assign_rhs1 (user);
4412 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
4413 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4414 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4415 update_stmt (user);
4418 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4420 if (gimple_assign_rhs1 (user) == negate)
4422 /* We have
4423 x = -a
4424 y = x - b
4425 which we transform into
4426 x = a + b
4427 y = -x .
4428 This pushes down the negate which we possibly can merge
4429 into some other operation, hence insert it into the
4430 plus_negates vector. */
4431 gimple *feed = SSA_NAME_DEF_STMT (negate);
4432 tree a = gimple_assign_rhs1 (feed);
4433 tree b = gimple_assign_rhs2 (user);
4434 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4435 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4436 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4437 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4438 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4439 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4440 user = gsi_stmt (gsi2);
4441 update_stmt (user);
4442 reassoc_remove_stmt (&gsi);
4443 release_defs (feed);
4444 plus_negates.safe_push (gimple_assign_lhs (user));
4446 else
4448 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4449 rid of one operation. */
4450 gimple *feed = SSA_NAME_DEF_STMT (negate);
4451 tree a = gimple_assign_rhs1 (feed);
4452 tree rhs1 = gimple_assign_rhs1 (user);
4453 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4454 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4455 update_stmt (gsi_stmt (gsi));
4461 /* Returns true if OP is of a type for which we can do reassociation.
4462 That is for integral or non-saturating fixed-point types, and for
4463 floating point type when associative-math is enabled. */
4465 static bool
4466 can_reassociate_p (tree op)
4468 tree type = TREE_TYPE (op);
4469 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4470 || NON_SAT_FIXED_POINT_TYPE_P (type)
4471 || (flag_associative_math && FLOAT_TYPE_P (type)))
4472 return true;
4473 return false;
4476 /* Break up subtract operations in block BB.
4478 We do this top down because we don't know whether the subtract is
4479 part of a possible chain of reassociation except at the top.
4481 IE given
4482 d = f + g
4483 c = a + e
4484 b = c - d
4485 q = b - r
4486 k = t - q
4488 we want to break up k = t - q, but we won't until we've transformed q
4489 = b - r, which won't be broken up until we transform b = c - d.
4491 En passant, clear the GIMPLE visited flag on every statement
4492 and set UIDs within each basic block. */
4494 static void
4495 break_up_subtract_bb (basic_block bb)
4497 gimple_stmt_iterator gsi;
4498 basic_block son;
4499 unsigned int uid = 1;
4501 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4503 gimple *stmt = gsi_stmt (gsi);
4504 gimple_set_visited (stmt, false);
4505 gimple_set_uid (stmt, uid++);
4507 if (!is_gimple_assign (stmt)
4508 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4509 continue;
4511 /* Look for simple gimple subtract operations. */
4512 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4514 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4515 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4516 continue;
4518 /* Check for a subtract used only in an addition. If this
4519 is the case, transform it into add of a negate for better
4520 reassociation. IE transform C = A-B into C = A + -B if C
4521 is only used in an addition. */
4522 if (should_break_up_subtract (stmt))
4523 break_up_subtract (stmt, &gsi);
4525 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4526 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4527 plus_negates.safe_push (gimple_assign_lhs (stmt));
4529 for (son = first_dom_son (CDI_DOMINATORS, bb);
4530 son;
4531 son = next_dom_son (CDI_DOMINATORS, son))
4532 break_up_subtract_bb (son);
4535 /* Used for repeated factor analysis. */
4536 struct repeat_factor
4538 /* An SSA name that occurs in a multiply chain. */
4539 tree factor;
4541 /* Cached rank of the factor. */
4542 unsigned rank;
4544 /* Number of occurrences of the factor in the chain. */
4545 HOST_WIDE_INT count;
4547 /* An SSA name representing the product of this factor and
4548 all factors appearing later in the repeated factor vector. */
4549 tree repr;
4553 static vec<repeat_factor> repeat_factor_vec;
4555 /* Used for sorting the repeat factor vector. Sort primarily by
4556 ascending occurrence count, secondarily by descending rank. */
4558 static int
4559 compare_repeat_factors (const void *x1, const void *x2)
4561 const repeat_factor *rf1 = (const repeat_factor *) x1;
4562 const repeat_factor *rf2 = (const repeat_factor *) x2;
4564 if (rf1->count != rf2->count)
4565 return rf1->count - rf2->count;
4567 return rf2->rank - rf1->rank;
4570 /* Look for repeated operands in OPS in the multiply tree rooted at
4571 STMT. Replace them with an optimal sequence of multiplies and powi
4572 builtin calls, and remove the used operands from OPS. Return an
4573 SSA name representing the value of the replacement sequence. */
4575 static tree
4576 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
4578 unsigned i, j, vec_len;
4579 int ii;
4580 operand_entry *oe;
4581 repeat_factor *rf1, *rf2;
4582 repeat_factor rfnew;
4583 tree result = NULL_TREE;
4584 tree target_ssa, iter_result;
4585 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4586 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4587 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4588 gimple *mul_stmt, *pow_stmt;
4590 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4591 target. */
4592 if (!powi_fndecl)
4593 return NULL_TREE;
4595 /* Allocate the repeated factor vector. */
4596 repeat_factor_vec.create (10);
4598 /* Scan the OPS vector for all SSA names in the product and build
4599 up a vector of occurrence counts for each factor. */
4600 FOR_EACH_VEC_ELT (*ops, i, oe)
4602 if (TREE_CODE (oe->op) == SSA_NAME)
4604 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4606 if (rf1->factor == oe->op)
4608 rf1->count += oe->count;
4609 break;
4613 if (j >= repeat_factor_vec.length ())
4615 rfnew.factor = oe->op;
4616 rfnew.rank = oe->rank;
4617 rfnew.count = oe->count;
4618 rfnew.repr = NULL_TREE;
4619 repeat_factor_vec.safe_push (rfnew);
4624 /* Sort the repeated factor vector by (a) increasing occurrence count,
4625 and (b) decreasing rank. */
4626 repeat_factor_vec.qsort (compare_repeat_factors);
4628 /* It is generally best to combine as many base factors as possible
4629 into a product before applying __builtin_powi to the result.
4630 However, the sort order chosen for the repeated factor vector
4631 allows us to cache partial results for the product of the base
4632 factors for subsequent use. When we already have a cached partial
4633 result from a previous iteration, it is best to make use of it
4634 before looking for another __builtin_pow opportunity.
4636 As an example, consider x * x * y * y * y * z * z * z * z.
4637 We want to first compose the product x * y * z, raise it to the
4638 second power, then multiply this by y * z, and finally multiply
4639 by z. This can be done in 5 multiplies provided we cache y * z
4640 for use in both expressions:
4642 t1 = y * z
4643 t2 = t1 * x
4644 t3 = t2 * t2
4645 t4 = t1 * t3
4646 result = t4 * z
4648 If we instead ignored the cached y * z and first multiplied by
4649 the __builtin_pow opportunity z * z, we would get the inferior:
4651 t1 = y * z
4652 t2 = t1 * x
4653 t3 = t2 * t2
4654 t4 = z * z
4655 t5 = t3 * t4
4656 result = t5 * y */
4658 vec_len = repeat_factor_vec.length ();
4660 /* Repeatedly look for opportunities to create a builtin_powi call. */
4661 while (true)
4663 HOST_WIDE_INT power;
4665 /* First look for the largest cached product of factors from
4666 preceding iterations. If found, create a builtin_powi for
4667 it if the minimum occurrence count for its factors is at
4668 least 2, or just use this cached product as our next
4669 multiplicand if the minimum occurrence count is 1. */
4670 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4672 if (rf1->repr && rf1->count > 0)
4673 break;
4676 if (j < vec_len)
4678 power = rf1->count;
4680 if (power == 1)
4682 iter_result = rf1->repr;
4684 if (dump_file && (dump_flags & TDF_DETAILS))
4686 unsigned elt;
4687 repeat_factor *rf;
4688 fputs ("Multiplying by cached product ", dump_file);
4689 for (elt = j; elt < vec_len; elt++)
4691 rf = &repeat_factor_vec[elt];
4692 print_generic_expr (dump_file, rf->factor, 0);
4693 if (elt < vec_len - 1)
4694 fputs (" * ", dump_file);
4696 fputs ("\n", dump_file);
4699 else
4701 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4702 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4703 build_int_cst (integer_type_node,
4704 power));
4705 gimple_call_set_lhs (pow_stmt, iter_result);
4706 gimple_set_location (pow_stmt, gimple_location (stmt));
4707 gimple_set_uid (pow_stmt, gimple_uid (stmt));
4708 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4710 if (dump_file && (dump_flags & TDF_DETAILS))
4712 unsigned elt;
4713 repeat_factor *rf;
4714 fputs ("Building __builtin_pow call for cached product (",
4715 dump_file);
4716 for (elt = j; elt < vec_len; elt++)
4718 rf = &repeat_factor_vec[elt];
4719 print_generic_expr (dump_file, rf->factor, 0);
4720 if (elt < vec_len - 1)
4721 fputs (" * ", dump_file);
4723 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4724 power);
4728 else
4730 /* Otherwise, find the first factor in the repeated factor
4731 vector whose occurrence count is at least 2. If no such
4732 factor exists, there are no builtin_powi opportunities
4733 remaining. */
4734 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4736 if (rf1->count >= 2)
4737 break;
4740 if (j >= vec_len)
4741 break;
4743 power = rf1->count;
4745 if (dump_file && (dump_flags & TDF_DETAILS))
4747 unsigned elt;
4748 repeat_factor *rf;
4749 fputs ("Building __builtin_pow call for (", dump_file);
4750 for (elt = j; elt < vec_len; elt++)
4752 rf = &repeat_factor_vec[elt];
4753 print_generic_expr (dump_file, rf->factor, 0);
4754 if (elt < vec_len - 1)
4755 fputs (" * ", dump_file);
4757 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4760 reassociate_stats.pows_created++;
4762 /* Visit each element of the vector in reverse order (so that
4763 high-occurrence elements are visited first, and within the
4764 same occurrence count, lower-ranked elements are visited
4765 first). Form a linear product of all elements in this order
4766 whose occurrencce count is at least that of element J.
4767 Record the SSA name representing the product of each element
4768 with all subsequent elements in the vector. */
4769 if (j == vec_len - 1)
4770 rf1->repr = rf1->factor;
4771 else
4773 for (ii = vec_len - 2; ii >= (int)j; ii--)
4775 tree op1, op2;
4777 rf1 = &repeat_factor_vec[ii];
4778 rf2 = &repeat_factor_vec[ii + 1];
4780 /* Init the last factor's representative to be itself. */
4781 if (!rf2->repr)
4782 rf2->repr = rf2->factor;
4784 op1 = rf1->factor;
4785 op2 = rf2->repr;
4787 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4788 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4789 op1, op2);
4790 gimple_set_location (mul_stmt, gimple_location (stmt));
4791 gimple_set_uid (mul_stmt, gimple_uid (stmt));
4792 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4793 rf1->repr = target_ssa;
4795 /* Don't reprocess the multiply we just introduced. */
4796 gimple_set_visited (mul_stmt, true);
4800 /* Form a call to __builtin_powi for the maximum product
4801 just formed, raised to the power obtained earlier. */
4802 rf1 = &repeat_factor_vec[j];
4803 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4804 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4805 build_int_cst (integer_type_node,
4806 power));
4807 gimple_call_set_lhs (pow_stmt, iter_result);
4808 gimple_set_location (pow_stmt, gimple_location (stmt));
4809 gimple_set_uid (pow_stmt, gimple_uid (stmt));
4810 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4813 /* If we previously formed at least one other builtin_powi call,
4814 form the product of this one and those others. */
4815 if (result)
4817 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4818 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4819 result, iter_result);
4820 gimple_set_location (mul_stmt, gimple_location (stmt));
4821 gimple_set_uid (mul_stmt, gimple_uid (stmt));
4822 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4823 gimple_set_visited (mul_stmt, true);
4824 result = new_result;
4826 else
4827 result = iter_result;
4829 /* Decrement the occurrence count of each element in the product
4830 by the count found above, and remove this many copies of each
4831 factor from OPS. */
4832 for (i = j; i < vec_len; i++)
4834 unsigned k = power;
4835 unsigned n;
4837 rf1 = &repeat_factor_vec[i];
4838 rf1->count -= power;
4840 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4842 if (oe->op == rf1->factor)
4844 if (oe->count <= k)
4846 ops->ordered_remove (n);
4847 k -= oe->count;
4849 if (k == 0)
4850 break;
4852 else
4854 oe->count -= k;
4855 break;
4862 /* At this point all elements in the repeated factor vector have a
4863 remaining occurrence count of 0 or 1, and those with a count of 1
4864 don't have cached representatives. Re-sort the ops vector and
4865 clean up. */
4866 ops->qsort (sort_by_operand_rank);
4867 repeat_factor_vec.release ();
4869 /* Return the final product computed herein. Note that there may
4870 still be some elements with single occurrence count left in OPS;
4871 those will be handled by the normal reassociation logic. */
4872 return result;
4875 /* Attempt to optimize
4876 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
4877 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
4879 static void
4880 attempt_builtin_copysign (vec<operand_entry *> *ops)
4882 operand_entry *oe;
4883 unsigned int i;
4884 unsigned int length = ops->length ();
4885 tree cst = ops->last ()->op;
4887 if (length == 1 || TREE_CODE (cst) != REAL_CST)
4888 return;
4890 FOR_EACH_VEC_ELT (*ops, i, oe)
4892 if (TREE_CODE (oe->op) == SSA_NAME
4893 && has_single_use (oe->op))
4895 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
4896 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
4898 tree arg0, arg1;
4899 switch (gimple_call_combined_fn (old_call))
4901 CASE_CFN_COPYSIGN:
4902 arg0 = gimple_call_arg (old_call, 0);
4903 arg1 = gimple_call_arg (old_call, 1);
4904 /* The first argument of copysign must be a constant,
4905 otherwise there's nothing to do. */
4906 if (TREE_CODE (arg0) == REAL_CST)
4908 tree type = TREE_TYPE (arg0);
4909 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
4910 /* If we couldn't fold to a single constant, skip it.
4911 That happens e.g. for inexact multiplication when
4912 -frounding-math. */
4913 if (mul == NULL_TREE)
4914 break;
4915 /* Instead of adjusting OLD_CALL, let's build a new
4916 call to not leak the LHS and prevent keeping bogus
4917 debug statements. DCE will clean up the old call. */
4918 gcall *new_call;
4919 if (gimple_call_internal_p (old_call))
4920 new_call = gimple_build_call_internal
4921 (IFN_COPYSIGN, 2, mul, arg1);
4922 else
4923 new_call = gimple_build_call
4924 (gimple_call_fndecl (old_call), 2, mul, arg1);
4925 tree lhs = make_ssa_name (type);
4926 gimple_call_set_lhs (new_call, lhs);
4927 gimple_set_location (new_call,
4928 gimple_location (old_call));
4929 insert_stmt_after (new_call, old_call);
4930 /* We've used the constant, get rid of it. */
4931 ops->pop ();
4932 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
4933 /* Handle the CST1 < 0 case by negating the result. */
4934 if (cst1_neg)
4936 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
4937 gimple *negate_stmt
4938 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
4939 insert_stmt_after (negate_stmt, new_call);
4940 oe->op = negrhs;
4942 else
4943 oe->op = lhs;
4944 if (dump_file && (dump_flags & TDF_DETAILS))
4946 fprintf (dump_file, "Optimizing copysign: ");
4947 print_generic_expr (dump_file, cst, 0);
4948 fprintf (dump_file, " * COPYSIGN (");
4949 print_generic_expr (dump_file, arg0, 0);
4950 fprintf (dump_file, ", ");
4951 print_generic_expr (dump_file, arg1, 0);
4952 fprintf (dump_file, ") into %sCOPYSIGN (",
4953 cst1_neg ? "-" : "");
4954 print_generic_expr (dump_file, mul, 0);
4955 fprintf (dump_file, ", ");
4956 print_generic_expr (dump_file, arg1, 0);
4957 fprintf (dump_file, "\n");
4959 return;
4961 break;
4962 default:
4963 break;
4970 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4972 static void
4973 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
4975 tree rhs1;
4977 if (dump_file && (dump_flags & TDF_DETAILS))
4979 fprintf (dump_file, "Transforming ");
4980 print_gimple_stmt (dump_file, stmt, 0, 0);
4983 rhs1 = gimple_assign_rhs1 (stmt);
4984 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4985 update_stmt (stmt);
4986 remove_visited_stmt_chain (rhs1);
4988 if (dump_file && (dump_flags & TDF_DETAILS))
4990 fprintf (dump_file, " into ");
4991 print_gimple_stmt (dump_file, stmt, 0, 0);
4995 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4997 static void
4998 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
4999 tree rhs1, tree rhs2)
5001 if (dump_file && (dump_flags & TDF_DETAILS))
5003 fprintf (dump_file, "Transforming ");
5004 print_gimple_stmt (dump_file, stmt, 0, 0);
5007 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5008 update_stmt (gsi_stmt (*gsi));
5009 remove_visited_stmt_chain (rhs1);
5011 if (dump_file && (dump_flags & TDF_DETAILS))
5013 fprintf (dump_file, " into ");
5014 print_gimple_stmt (dump_file, stmt, 0, 0);
5018 /* Reassociate expressions in basic block BB and its post-dominator as
5019 children.
5021 Bubble up return status from maybe_optimize_range_tests. */
5023 static bool
5024 reassociate_bb (basic_block bb)
5026 gimple_stmt_iterator gsi;
5027 basic_block son;
5028 gimple *stmt = last_stmt (bb);
5029 bool cfg_cleanup_needed = false;
5031 if (stmt && !gimple_visited_p (stmt))
5032 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5034 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5036 stmt = gsi_stmt (gsi);
5038 if (is_gimple_assign (stmt)
5039 && !stmt_could_throw_p (stmt))
5041 tree lhs, rhs1, rhs2;
5042 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5044 /* If this is not a gimple binary expression, there is
5045 nothing for us to do with it. */
5046 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5047 continue;
5049 /* If this was part of an already processed statement,
5050 we don't need to touch it again. */
5051 if (gimple_visited_p (stmt))
5053 /* This statement might have become dead because of previous
5054 reassociations. */
5055 if (has_zero_uses (gimple_get_lhs (stmt)))
5057 reassoc_remove_stmt (&gsi);
5058 release_defs (stmt);
5059 /* We might end up removing the last stmt above which
5060 places the iterator to the end of the sequence.
5061 Reset it to the last stmt in this case which might
5062 be the end of the sequence as well if we removed
5063 the last statement of the sequence. In which case
5064 we need to bail out. */
5065 if (gsi_end_p (gsi))
5067 gsi = gsi_last_bb (bb);
5068 if (gsi_end_p (gsi))
5069 break;
5072 continue;
5075 lhs = gimple_assign_lhs (stmt);
5076 rhs1 = gimple_assign_rhs1 (stmt);
5077 rhs2 = gimple_assign_rhs2 (stmt);
5079 /* For non-bit or min/max operations we can't associate
5080 all types. Verify that here. */
5081 if (rhs_code != BIT_IOR_EXPR
5082 && rhs_code != BIT_AND_EXPR
5083 && rhs_code != BIT_XOR_EXPR
5084 && rhs_code != MIN_EXPR
5085 && rhs_code != MAX_EXPR
5086 && (!can_reassociate_p (lhs)
5087 || !can_reassociate_p (rhs1)
5088 || !can_reassociate_p (rhs2)))
5089 continue;
5091 if (associative_tree_code (rhs_code))
5093 auto_vec<operand_entry *> ops;
5094 tree powi_result = NULL_TREE;
5095 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5097 /* There may be no immediate uses left by the time we
5098 get here because we may have eliminated them all. */
5099 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5100 continue;
5102 gimple_set_visited (stmt, true);
5103 linearize_expr_tree (&ops, stmt, true, true);
5104 ops.qsort (sort_by_operand_rank);
5105 optimize_ops_list (rhs_code, &ops);
5106 if (undistribute_ops_list (rhs_code, &ops,
5107 loop_containing_stmt (stmt)))
5109 ops.qsort (sort_by_operand_rank);
5110 optimize_ops_list (rhs_code, &ops);
5113 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5115 if (is_vector)
5116 optimize_vec_cond_expr (rhs_code, &ops);
5117 else
5118 optimize_range_tests (rhs_code, &ops);
5121 if (rhs_code == MULT_EXPR && !is_vector)
5123 attempt_builtin_copysign (&ops);
5125 if (reassoc_insert_powi_p
5126 && flag_unsafe_math_optimizations)
5127 powi_result = attempt_builtin_powi (stmt, &ops);
5130 /* If the operand vector is now empty, all operands were
5131 consumed by the __builtin_powi optimization. */
5132 if (ops.length () == 0)
5133 transform_stmt_to_copy (&gsi, stmt, powi_result);
5134 else if (ops.length () == 1)
5136 tree last_op = ops.last ()->op;
5138 if (powi_result)
5139 transform_stmt_to_multiply (&gsi, stmt, last_op,
5140 powi_result);
5141 else
5142 transform_stmt_to_copy (&gsi, stmt, last_op);
5144 else
5146 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5147 int ops_num = ops.length ();
5148 int width = get_reassociation_width (ops_num, rhs_code, mode);
5149 tree new_lhs = lhs;
5151 if (dump_file && (dump_flags & TDF_DETAILS))
5152 fprintf (dump_file,
5153 "Width = %d was chosen for reassociation\n", width);
5155 if (width > 1
5156 && ops.length () > 3)
5157 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5158 width, ops);
5159 else
5161 /* When there are three operands left, we want
5162 to make sure the ones that get the double
5163 binary op are chosen wisely. */
5164 int len = ops.length ();
5165 if (len >= 3)
5166 swap_ops_for_binary_stmt (ops, len - 3, stmt);
5168 new_lhs = rewrite_expr_tree (stmt, 0, ops,
5169 powi_result != NULL);
5172 /* If we combined some repeated factors into a
5173 __builtin_powi call, multiply that result by the
5174 reassociated operands. */
5175 if (powi_result)
5177 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5178 tree type = TREE_TYPE (lhs);
5179 tree target_ssa = make_temp_ssa_name (type, NULL,
5180 "reassocpow");
5181 gimple_set_lhs (lhs_stmt, target_ssa);
5182 update_stmt (lhs_stmt);
5183 if (lhs != new_lhs)
5184 target_ssa = new_lhs;
5185 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5186 powi_result, target_ssa);
5187 gimple_set_location (mul_stmt, gimple_location (stmt));
5188 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5189 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5195 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5196 son;
5197 son = next_dom_son (CDI_POST_DOMINATORS, son))
5198 cfg_cleanup_needed |= reassociate_bb (son);
5200 return cfg_cleanup_needed;
5203 /* Add jumps around shifts for range tests turned into bit tests.
5204 For each SSA_NAME VAR we have code like:
5205 VAR = ...; // final stmt of range comparison
5206 // bit test here...;
5207 OTHERVAR = ...; // final stmt of the bit test sequence
5208 RES = VAR | OTHERVAR;
5209 Turn the above into:
5210 VAR = ...;
5211 if (VAR != 0)
5212 goto <l3>;
5213 else
5214 goto <l2>;
5215 <l2>:
5216 // bit test here...;
5217 OTHERVAR = ...;
5218 <l3>:
5219 # RES = PHI<1(l1), OTHERVAR(l2)>; */
5221 static void
5222 branch_fixup (void)
5224 tree var;
5225 unsigned int i;
5227 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
5229 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
5230 gimple *use_stmt;
5231 use_operand_p use;
5232 bool ok = single_imm_use (var, &use, &use_stmt);
5233 gcc_assert (ok
5234 && is_gimple_assign (use_stmt)
5235 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5236 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5238 basic_block cond_bb = gimple_bb (def_stmt);
5239 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5240 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5242 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5243 gimple *g = gimple_build_cond (NE_EXPR, var,
5244 build_zero_cst (TREE_TYPE (var)),
5245 NULL_TREE, NULL_TREE);
5246 location_t loc = gimple_location (use_stmt);
5247 gimple_set_location (g, loc);
5248 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5250 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5251 etrue->probability = REG_BR_PROB_BASE / 2;
5252 etrue->count = cond_bb->count / 2;
5253 edge efalse = find_edge (cond_bb, then_bb);
5254 efalse->flags = EDGE_FALSE_VALUE;
5255 efalse->probability -= etrue->probability;
5256 efalse->count -= etrue->count;
5257 then_bb->count -= etrue->count;
5259 tree othervar = NULL_TREE;
5260 if (gimple_assign_rhs1 (use_stmt) == var)
5261 othervar = gimple_assign_rhs2 (use_stmt);
5262 else if (gimple_assign_rhs2 (use_stmt) == var)
5263 othervar = gimple_assign_rhs1 (use_stmt);
5264 else
5265 gcc_unreachable ();
5266 tree lhs = gimple_assign_lhs (use_stmt);
5267 gphi *phi = create_phi_node (lhs, merge_bb);
5268 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5269 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5270 gsi = gsi_for_stmt (use_stmt);
5271 gsi_remove (&gsi, true);
5273 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5274 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5276 reassoc_branch_fixups.release ();
5279 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5280 void debug_ops_vector (vec<operand_entry *> ops);
5282 /* Dump the operand entry vector OPS to FILE. */
5284 void
5285 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5287 operand_entry *oe;
5288 unsigned int i;
5290 FOR_EACH_VEC_ELT (ops, i, oe)
5292 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5293 print_generic_expr (file, oe->op, 0);
5294 fprintf (file, "\n");
5298 /* Dump the operand entry vector OPS to STDERR. */
5300 DEBUG_FUNCTION void
5301 debug_ops_vector (vec<operand_entry *> ops)
5303 dump_ops_vector (stderr, ops);
5306 /* Bubble up return status from reassociate_bb. */
5308 static bool
5309 do_reassoc (void)
5311 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5312 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5315 /* Initialize the reassociation pass. */
5317 static void
5318 init_reassoc (void)
5320 int i;
5321 long rank = 2;
5322 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5324 /* Find the loops, so that we can prevent moving calculations in
5325 them. */
5326 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5328 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5330 next_operand_entry_id = 0;
5332 /* Reverse RPO (Reverse Post Order) will give us something where
5333 deeper loops come later. */
5334 pre_and_rev_post_order_compute (NULL, bbs, false);
5335 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5336 operand_rank = new hash_map<tree, long>;
5338 /* Give each default definition a distinct rank. This includes
5339 parameters and the static chain. Walk backwards over all
5340 SSA names so that we get proper rank ordering according
5341 to tree_swap_operands_p. */
5342 for (i = num_ssa_names - 1; i > 0; --i)
5344 tree name = ssa_name (i);
5345 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5346 insert_operand_rank (name, ++rank);
5349 /* Set up rank for each BB */
5350 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5351 bb_rank[bbs[i]] = ++rank << 16;
5353 free (bbs);
5354 calculate_dominance_info (CDI_POST_DOMINATORS);
5355 plus_negates = vNULL;
5358 /* Cleanup after the reassociation pass, and print stats if
5359 requested. */
5361 static void
5362 fini_reassoc (void)
5364 statistics_counter_event (cfun, "Linearized",
5365 reassociate_stats.linearized);
5366 statistics_counter_event (cfun, "Constants eliminated",
5367 reassociate_stats.constants_eliminated);
5368 statistics_counter_event (cfun, "Ops eliminated",
5369 reassociate_stats.ops_eliminated);
5370 statistics_counter_event (cfun, "Statements rewritten",
5371 reassociate_stats.rewritten);
5372 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5373 reassociate_stats.pows_encountered);
5374 statistics_counter_event (cfun, "Built-in powi calls created",
5375 reassociate_stats.pows_created);
5377 delete operand_rank;
5378 operand_entry_pool.release ();
5379 free (bb_rank);
5380 plus_negates.release ();
5381 free_dominance_info (CDI_POST_DOMINATORS);
5382 loop_optimizer_finalize ();
5385 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
5386 insertion of __builtin_powi calls.
5388 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5389 optimization of a gimple conditional. Otherwise returns zero. */
5391 static unsigned int
5392 execute_reassoc (bool insert_powi_p)
5394 reassoc_insert_powi_p = insert_powi_p;
5396 init_reassoc ();
5398 bool cfg_cleanup_needed;
5399 cfg_cleanup_needed = do_reassoc ();
5400 repropagate_negates ();
5401 branch_fixup ();
5403 fini_reassoc ();
5404 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
5407 namespace {
5409 const pass_data pass_data_reassoc =
5411 GIMPLE_PASS, /* type */
5412 "reassoc", /* name */
5413 OPTGROUP_NONE, /* optinfo_flags */
5414 TV_TREE_REASSOC, /* tv_id */
5415 ( PROP_cfg | PROP_ssa ), /* properties_required */
5416 0, /* properties_provided */
5417 0, /* properties_destroyed */
5418 0, /* todo_flags_start */
5419 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5422 class pass_reassoc : public gimple_opt_pass
5424 public:
5425 pass_reassoc (gcc::context *ctxt)
5426 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
5429 /* opt_pass methods: */
5430 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5431 void set_pass_param (unsigned int n, bool param)
5433 gcc_assert (n == 0);
5434 insert_powi_p = param;
5436 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5437 virtual unsigned int execute (function *)
5438 { return execute_reassoc (insert_powi_p); }
5440 private:
5441 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
5442 point 3a in the pass header comment. */
5443 bool insert_powi_p;
5444 }; // class pass_reassoc
5446 } // anon namespace
5448 gimple_opt_pass *
5449 make_pass_reassoc (gcc::context *ctxt)
5451 return new pass_reassoc (ctxt);