tree-flow-inline.h (get_addr_base_and_unit_offset_1): Handle BIT_FIELD_REF.
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob27161cd18600dfa6818d9a2abe03a02b141737d4
1 /* Reassociation for trees.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "basic-block.h"
27 #include "gimple-pretty-print.h"
28 #include "tree-inline.h"
29 #include "tree-flow.h"
30 #include "gimple.h"
31 #include "tree-iterator.h"
32 #include "tree-pass.h"
33 #include "alloc-pool.h"
34 #include "vec.h"
35 #include "langhooks.h"
36 #include "pointer-set.h"
37 #include "cfgloop.h"
38 #include "flags.h"
39 #include "target.h"
40 #include "params.h"
41 #include "diagnostic-core.h"
43 /* This is a simple global reassociation pass. It is, in part, based
44 on the LLVM pass of the same name (They do some things more/less
45 than we do, in different orders, etc).
47 It consists of five steps:
49 1. Breaking up subtract operations into addition + negate, where
50 it would promote the reassociation of adds.
52 2. Left linearization of the expression trees, so that (A+B)+(C+D)
53 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
54 During linearization, we place the operands of the binary
55 expressions into a vector of operand_entry_t
57 3. Optimization of the operand lists, eliminating things like a +
58 -a, a & a, etc.
60 3a. Combine repeated factors with the same occurrence counts
61 into a __builtin_powi call that will later be optimized into
62 an optimal number of multiplies.
64 4. Rewrite the expression trees we linearized and optimized so
65 they are in proper rank order.
67 5. Repropagate negates, as nothing else will clean it up ATM.
69 A bit of theory on #4, since nobody seems to write anything down
70 about why it makes sense to do it the way they do it:
72 We could do this much nicer theoretically, but don't (for reasons
73 explained after how to do it theoretically nice :P).
75 In order to promote the most redundancy elimination, you want
76 binary expressions whose operands are the same rank (or
77 preferably, the same value) exposed to the redundancy eliminator,
78 for possible elimination.
80 So the way to do this if we really cared, is to build the new op
81 tree from the leaves to the roots, merging as you go, and putting the
82 new op on the end of the worklist, until you are left with one
83 thing on the worklist.
85 IE if you have to rewrite the following set of operands (listed with
86 rank in parentheses), with opcode PLUS_EXPR:
88 a (1), b (1), c (1), d (2), e (2)
91 We start with our merge worklist empty, and the ops list with all of
92 those on it.
94 You want to first merge all leaves of the same rank, as much as
95 possible.
97 So first build a binary op of
99 mergetmp = a + b, and put "mergetmp" on the merge worklist.
101 Because there is no three operand form of PLUS_EXPR, c is not going to
102 be exposed to redundancy elimination as a rank 1 operand.
104 So you might as well throw it on the merge worklist (you could also
105 consider it to now be a rank two operand, and merge it with d and e,
106 but in this case, you then have evicted e from a binary op. So at
107 least in this situation, you can't win.)
109 Then build a binary op of d + e
110 mergetmp2 = d + e
112 and put mergetmp2 on the merge worklist.
114 so merge worklist = {mergetmp, c, mergetmp2}
116 Continue building binary ops of these operations until you have only
117 one operation left on the worklist.
119 So we have
121 build binary op
122 mergetmp3 = mergetmp + c
124 worklist = {mergetmp2, mergetmp3}
126 mergetmp4 = mergetmp2 + mergetmp3
128 worklist = {mergetmp4}
130 because we have one operation left, we can now just set the original
131 statement equal to the result of that operation.
133 This will at least expose a + b and d + e to redundancy elimination
134 as binary operations.
136 For extra points, you can reuse the old statements to build the
137 mergetmps, since you shouldn't run out.
139 So why don't we do this?
141 Because it's expensive, and rarely will help. Most trees we are
142 reassociating have 3 or less ops. If they have 2 ops, they already
143 will be written into a nice single binary op. If you have 3 ops, a
144 single simple check suffices to tell you whether the first two are of the
145 same rank. If so, you know to order it
147 mergetmp = op1 + op2
148 newstmt = mergetmp + op3
150 instead of
151 mergetmp = op2 + op3
152 newstmt = mergetmp + op1
154 If all three are of the same rank, you can't expose them all in a
155 single binary operator anyway, so the above is *still* the best you
156 can do.
158 Thus, this is what we do. When we have three ops left, we check to see
159 what order to put them in, and call it a day. As a nod to vector sum
160 reduction, we check if any of the ops are really a phi node that is a
161 destructive update for the associating op, and keep the destructive
162 update together for vector sum reduction recognition. */
165 /* Statistics */
166 static struct
168 int linearized;
169 int constants_eliminated;
170 int ops_eliminated;
171 int rewritten;
172 int pows_encountered;
173 int pows_created;
174 } reassociate_stats;
176 /* Operator, rank pair. */
177 typedef struct operand_entry
179 unsigned int rank;
180 int id;
181 tree op;
182 unsigned int count;
183 } *operand_entry_t;
185 static alloc_pool operand_entry_pool;
187 /* This is used to assign a unique ID to each struct operand_entry
188 so that qsort results are identical on different hosts. */
189 static int next_operand_entry_id;
191 /* Starting rank number for a given basic block, so that we can rank
192 operations using unmovable instructions in that BB based on the bb
193 depth. */
194 static long *bb_rank;
196 /* Operand->rank hashtable. */
197 static struct pointer_map_t *operand_rank;
199 /* Forward decls. */
200 static long get_rank (tree);
203 /* Bias amount for loop-carried phis. We want this to be larger than
204 the depth of any reassociation tree we can see, but not larger than
205 the rank difference between two blocks. */
206 #define PHI_LOOP_BIAS (1 << 15)
208 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
209 an innermost loop, and the phi has only a single use which is inside
210 the loop, then the rank is the block rank of the loop latch plus an
211 extra bias for the loop-carried dependence. This causes expressions
212 calculated into an accumulator variable to be independent for each
213 iteration of the loop. If STMT is some other phi, the rank is the
214 block rank of its containing block. */
215 static long
216 phi_rank (gimple stmt)
218 basic_block bb = gimple_bb (stmt);
219 struct loop *father = bb->loop_father;
220 tree res;
221 unsigned i;
222 use_operand_p use;
223 gimple use_stmt;
225 /* We only care about real loops (those with a latch). */
226 if (!father->latch)
227 return bb_rank[bb->index];
229 /* Interesting phis must be in headers of innermost loops. */
230 if (bb != father->header
231 || father->inner)
232 return bb_rank[bb->index];
234 /* Ignore virtual SSA_NAMEs. */
235 res = gimple_phi_result (stmt);
236 if (virtual_operand_p (res))
237 return bb_rank[bb->index];
239 /* The phi definition must have a single use, and that use must be
240 within the loop. Otherwise this isn't an accumulator pattern. */
241 if (!single_imm_use (res, &use, &use_stmt)
242 || gimple_bb (use_stmt)->loop_father != father)
243 return bb_rank[bb->index];
245 /* Look for phi arguments from within the loop. If found, bias this phi. */
246 for (i = 0; i < gimple_phi_num_args (stmt); i++)
248 tree arg = gimple_phi_arg_def (stmt, i);
249 if (TREE_CODE (arg) == SSA_NAME
250 && !SSA_NAME_IS_DEFAULT_DEF (arg))
252 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
253 if (gimple_bb (def_stmt)->loop_father == father)
254 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
258 /* Must be an uninteresting phi. */
259 return bb_rank[bb->index];
262 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
263 loop-carried dependence of an innermost loop, return TRUE; else
264 return FALSE. */
265 static bool
266 loop_carried_phi (tree exp)
268 gimple phi_stmt;
269 long block_rank;
271 if (TREE_CODE (exp) != SSA_NAME
272 || SSA_NAME_IS_DEFAULT_DEF (exp))
273 return false;
275 phi_stmt = SSA_NAME_DEF_STMT (exp);
277 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
278 return false;
280 /* Non-loop-carried phis have block rank. Loop-carried phis have
281 an additional bias added in. If this phi doesn't have block rank,
282 it's biased and should not be propagated. */
283 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
285 if (phi_rank (phi_stmt) != block_rank)
286 return true;
288 return false;
291 /* Return the maximum of RANK and the rank that should be propagated
292 from expression OP. For most operands, this is just the rank of OP.
293 For loop-carried phis, the value is zero to avoid undoing the bias
294 in favor of the phi. */
295 static long
296 propagate_rank (long rank, tree op)
298 long op_rank;
300 if (loop_carried_phi (op))
301 return rank;
303 op_rank = get_rank (op);
305 return MAX (rank, op_rank);
308 /* Look up the operand rank structure for expression E. */
310 static inline long
311 find_operand_rank (tree e)
313 void **slot = pointer_map_contains (operand_rank, e);
314 return slot ? (long) (intptr_t) *slot : -1;
317 /* Insert {E,RANK} into the operand rank hashtable. */
319 static inline void
320 insert_operand_rank (tree e, long rank)
322 void **slot;
323 gcc_assert (rank > 0);
324 slot = pointer_map_insert (operand_rank, e);
325 gcc_assert (!*slot);
326 *slot = (void *) (intptr_t) rank;
329 /* Given an expression E, return the rank of the expression. */
331 static long
332 get_rank (tree e)
334 /* Constants have rank 0. */
335 if (is_gimple_min_invariant (e))
336 return 0;
338 /* SSA_NAME's have the rank of the expression they are the result
340 For globals and uninitialized values, the rank is 0.
341 For function arguments, use the pre-setup rank.
342 For PHI nodes, stores, asm statements, etc, we use the rank of
343 the BB.
344 For simple operations, the rank is the maximum rank of any of
345 its operands, or the bb_rank, whichever is less.
346 I make no claims that this is optimal, however, it gives good
347 results. */
349 /* We make an exception to the normal ranking system to break
350 dependences of accumulator variables in loops. Suppose we
351 have a simple one-block loop containing:
353 x_1 = phi(x_0, x_2)
354 b = a + x_1
355 c = b + d
356 x_2 = c + e
358 As shown, each iteration of the calculation into x is fully
359 dependent upon the iteration before it. We would prefer to
360 see this in the form:
362 x_1 = phi(x_0, x_2)
363 b = a + d
364 c = b + e
365 x_2 = c + x_1
367 If the loop is unrolled, the calculations of b and c from
368 different iterations can be interleaved.
370 To obtain this result during reassociation, we bias the rank
371 of the phi definition x_1 upward, when it is recognized as an
372 accumulator pattern. The artificial rank causes it to be
373 added last, providing the desired independence. */
375 if (TREE_CODE (e) == SSA_NAME)
377 gimple stmt;
378 long rank;
379 int i, n;
380 tree op;
382 if (SSA_NAME_IS_DEFAULT_DEF (e))
383 return find_operand_rank (e);
385 stmt = SSA_NAME_DEF_STMT (e);
386 if (gimple_code (stmt) == GIMPLE_PHI)
387 return phi_rank (stmt);
389 if (!is_gimple_assign (stmt)
390 || gimple_vdef (stmt))
391 return bb_rank[gimple_bb (stmt)->index];
393 /* If we already have a rank for this expression, use that. */
394 rank = find_operand_rank (e);
395 if (rank != -1)
396 return rank;
398 /* Otherwise, find the maximum rank for the operands. As an
399 exception, remove the bias from loop-carried phis when propagating
400 the rank so that dependent operations are not also biased. */
401 rank = 0;
402 if (gimple_assign_single_p (stmt))
404 tree rhs = gimple_assign_rhs1 (stmt);
405 n = TREE_OPERAND_LENGTH (rhs);
406 if (n == 0)
407 rank = propagate_rank (rank, rhs);
408 else
410 for (i = 0; i < n; i++)
412 op = TREE_OPERAND (rhs, i);
414 if (op != NULL_TREE)
415 rank = propagate_rank (rank, op);
419 else
421 n = gimple_num_ops (stmt);
422 for (i = 1; i < n; i++)
424 op = gimple_op (stmt, i);
425 gcc_assert (op);
426 rank = propagate_rank (rank, op);
430 if (dump_file && (dump_flags & TDF_DETAILS))
432 fprintf (dump_file, "Rank for ");
433 print_generic_expr (dump_file, e, 0);
434 fprintf (dump_file, " is %ld\n", (rank + 1));
437 /* Note the rank in the hashtable so we don't recompute it. */
438 insert_operand_rank (e, (rank + 1));
439 return (rank + 1);
442 /* Globals, etc, are rank 0 */
443 return 0;
447 /* We want integer ones to end up last no matter what, since they are
448 the ones we can do the most with. */
449 #define INTEGER_CONST_TYPE 1 << 3
450 #define FLOAT_CONST_TYPE 1 << 2
451 #define OTHER_CONST_TYPE 1 << 1
453 /* Classify an invariant tree into integer, float, or other, so that
454 we can sort them to be near other constants of the same type. */
455 static inline int
456 constant_type (tree t)
458 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
459 return INTEGER_CONST_TYPE;
460 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
461 return FLOAT_CONST_TYPE;
462 else
463 return OTHER_CONST_TYPE;
466 /* qsort comparison function to sort operand entries PA and PB by rank
467 so that the sorted array is ordered by rank in decreasing order. */
468 static int
469 sort_by_operand_rank (const void *pa, const void *pb)
471 const operand_entry_t oea = *(const operand_entry_t *)pa;
472 const operand_entry_t oeb = *(const operand_entry_t *)pb;
474 /* It's nicer for optimize_expression if constants that are likely
475 to fold when added/multiplied//whatever are put next to each
476 other. Since all constants have rank 0, order them by type. */
477 if (oeb->rank == 0 && oea->rank == 0)
479 if (constant_type (oeb->op) != constant_type (oea->op))
480 return constant_type (oeb->op) - constant_type (oea->op);
481 else
482 /* To make sorting result stable, we use unique IDs to determine
483 order. */
484 return oeb->id - oea->id;
487 /* Lastly, make sure the versions that are the same go next to each
488 other. We use SSA_NAME_VERSION because it's stable. */
489 if ((oeb->rank - oea->rank == 0)
490 && TREE_CODE (oea->op) == SSA_NAME
491 && TREE_CODE (oeb->op) == SSA_NAME)
493 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
494 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
495 else
496 return oeb->id - oea->id;
499 if (oeb->rank != oea->rank)
500 return oeb->rank - oea->rank;
501 else
502 return oeb->id - oea->id;
505 /* Add an operand entry to *OPS for the tree operand OP. */
507 static void
508 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
510 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
512 oe->op = op;
513 oe->rank = get_rank (op);
514 oe->id = next_operand_entry_id++;
515 oe->count = 1;
516 ops->safe_push (oe);
519 /* Add an operand entry to *OPS for the tree operand OP with repeat
520 count REPEAT. */
522 static void
523 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
524 HOST_WIDE_INT repeat)
526 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
528 oe->op = op;
529 oe->rank = get_rank (op);
530 oe->id = next_operand_entry_id++;
531 oe->count = repeat;
532 ops->safe_push (oe);
534 reassociate_stats.pows_encountered++;
537 /* Return true if STMT is reassociable operation containing a binary
538 operation with tree code CODE, and is inside LOOP. */
540 static bool
541 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
543 basic_block bb = gimple_bb (stmt);
545 if (gimple_bb (stmt) == NULL)
546 return false;
548 if (!flow_bb_inside_loop_p (loop, bb))
549 return false;
551 if (is_gimple_assign (stmt)
552 && gimple_assign_rhs_code (stmt) == code
553 && has_single_use (gimple_assign_lhs (stmt)))
554 return true;
556 return false;
560 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
561 operand of the negate operation. Otherwise, return NULL. */
563 static tree
564 get_unary_op (tree name, enum tree_code opcode)
566 gimple stmt = SSA_NAME_DEF_STMT (name);
568 if (!is_gimple_assign (stmt))
569 return NULL_TREE;
571 if (gimple_assign_rhs_code (stmt) == opcode)
572 return gimple_assign_rhs1 (stmt);
573 return NULL_TREE;
576 /* If CURR and LAST are a pair of ops that OPCODE allows us to
577 eliminate through equivalences, do so, remove them from OPS, and
578 return true. Otherwise, return false. */
580 static bool
581 eliminate_duplicate_pair (enum tree_code opcode,
582 vec<operand_entry_t> *ops,
583 bool *all_done,
584 unsigned int i,
585 operand_entry_t curr,
586 operand_entry_t last)
589 /* If we have two of the same op, and the opcode is & |, min, or max,
590 we can eliminate one of them.
591 If we have two of the same op, and the opcode is ^, we can
592 eliminate both of them. */
594 if (last && last->op == curr->op)
596 switch (opcode)
598 case MAX_EXPR:
599 case MIN_EXPR:
600 case BIT_IOR_EXPR:
601 case BIT_AND_EXPR:
602 if (dump_file && (dump_flags & TDF_DETAILS))
604 fprintf (dump_file, "Equivalence: ");
605 print_generic_expr (dump_file, curr->op, 0);
606 fprintf (dump_file, " [&|minmax] ");
607 print_generic_expr (dump_file, last->op, 0);
608 fprintf (dump_file, " -> ");
609 print_generic_stmt (dump_file, last->op, 0);
612 ops->ordered_remove (i);
613 reassociate_stats.ops_eliminated ++;
615 return true;
617 case BIT_XOR_EXPR:
618 if (dump_file && (dump_flags & TDF_DETAILS))
620 fprintf (dump_file, "Equivalence: ");
621 print_generic_expr (dump_file, curr->op, 0);
622 fprintf (dump_file, " ^ ");
623 print_generic_expr (dump_file, last->op, 0);
624 fprintf (dump_file, " -> nothing\n");
627 reassociate_stats.ops_eliminated += 2;
629 if (ops->length () == 2)
631 ops->create (0);
632 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
633 *all_done = true;
635 else
637 ops->ordered_remove (i-1);
638 ops->ordered_remove (i-1);
641 return true;
643 default:
644 break;
647 return false;
650 static vec<tree> plus_negates;
652 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
653 expression, look in OPS for a corresponding positive operation to cancel
654 it out. If we find one, remove the other from OPS, replace
655 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
656 return false. */
658 static bool
659 eliminate_plus_minus_pair (enum tree_code opcode,
660 vec<operand_entry_t> *ops,
661 unsigned int currindex,
662 operand_entry_t curr)
664 tree negateop;
665 tree notop;
666 unsigned int i;
667 operand_entry_t oe;
669 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
670 return false;
672 negateop = get_unary_op (curr->op, NEGATE_EXPR);
673 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
674 if (negateop == NULL_TREE && notop == NULL_TREE)
675 return false;
677 /* Any non-negated version will have a rank that is one less than
678 the current rank. So once we hit those ranks, if we don't find
679 one, we can stop. */
681 for (i = currindex + 1;
682 ops->iterate (i, &oe)
683 && oe->rank >= curr->rank - 1 ;
684 i++)
686 if (oe->op == negateop)
689 if (dump_file && (dump_flags & TDF_DETAILS))
691 fprintf (dump_file, "Equivalence: ");
692 print_generic_expr (dump_file, negateop, 0);
693 fprintf (dump_file, " + -");
694 print_generic_expr (dump_file, oe->op, 0);
695 fprintf (dump_file, " -> 0\n");
698 ops->ordered_remove (i);
699 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
700 ops->ordered_remove (currindex);
701 reassociate_stats.ops_eliminated ++;
703 return true;
705 else if (oe->op == notop)
707 tree op_type = TREE_TYPE (oe->op);
709 if (dump_file && (dump_flags & TDF_DETAILS))
711 fprintf (dump_file, "Equivalence: ");
712 print_generic_expr (dump_file, notop, 0);
713 fprintf (dump_file, " + ~");
714 print_generic_expr (dump_file, oe->op, 0);
715 fprintf (dump_file, " -> -1\n");
718 ops->ordered_remove (i);
719 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
720 ops->ordered_remove (currindex);
721 reassociate_stats.ops_eliminated ++;
723 return true;
727 /* CURR->OP is a negate expr in a plus expr: save it for later
728 inspection in repropagate_negates(). */
729 if (negateop != NULL_TREE)
730 plus_negates.safe_push (curr->op);
732 return false;
735 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
736 bitwise not expression, look in OPS for a corresponding operand to
737 cancel it out. If we find one, remove the other from OPS, replace
738 OPS[CURRINDEX] with 0, and return true. Otherwise, return
739 false. */
741 static bool
742 eliminate_not_pairs (enum tree_code opcode,
743 vec<operand_entry_t> *ops,
744 unsigned int currindex,
745 operand_entry_t curr)
747 tree notop;
748 unsigned int i;
749 operand_entry_t oe;
751 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
752 || TREE_CODE (curr->op) != SSA_NAME)
753 return false;
755 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
756 if (notop == NULL_TREE)
757 return false;
759 /* Any non-not version will have a rank that is one less than
760 the current rank. So once we hit those ranks, if we don't find
761 one, we can stop. */
763 for (i = currindex + 1;
764 ops->iterate (i, &oe)
765 && oe->rank >= curr->rank - 1;
766 i++)
768 if (oe->op == notop)
770 if (dump_file && (dump_flags & TDF_DETAILS))
772 fprintf (dump_file, "Equivalence: ");
773 print_generic_expr (dump_file, notop, 0);
774 if (opcode == BIT_AND_EXPR)
775 fprintf (dump_file, " & ~");
776 else if (opcode == BIT_IOR_EXPR)
777 fprintf (dump_file, " | ~");
778 print_generic_expr (dump_file, oe->op, 0);
779 if (opcode == BIT_AND_EXPR)
780 fprintf (dump_file, " -> 0\n");
781 else if (opcode == BIT_IOR_EXPR)
782 fprintf (dump_file, " -> -1\n");
785 if (opcode == BIT_AND_EXPR)
786 oe->op = build_zero_cst (TREE_TYPE (oe->op));
787 else if (opcode == BIT_IOR_EXPR)
788 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
789 TYPE_PRECISION (TREE_TYPE (oe->op)));
791 reassociate_stats.ops_eliminated += ops->length () - 1;
792 ops->truncate (0);
793 ops->quick_push (oe);
794 return true;
798 return false;
801 /* Use constant value that may be present in OPS to try to eliminate
802 operands. Note that this function is only really used when we've
803 eliminated ops for other reasons, or merged constants. Across
804 single statements, fold already does all of this, plus more. There
805 is little point in duplicating logic, so I've only included the
806 identities that I could ever construct testcases to trigger. */
808 static void
809 eliminate_using_constants (enum tree_code opcode,
810 vec<operand_entry_t> *ops)
812 operand_entry_t oelast = ops->last ();
813 tree type = TREE_TYPE (oelast->op);
815 if (oelast->rank == 0
816 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
818 switch (opcode)
820 case BIT_AND_EXPR:
821 if (integer_zerop (oelast->op))
823 if (ops->length () != 1)
825 if (dump_file && (dump_flags & TDF_DETAILS))
826 fprintf (dump_file, "Found & 0, removing all other ops\n");
828 reassociate_stats.ops_eliminated += ops->length () - 1;
830 ops->truncate (0);
831 ops->quick_push (oelast);
832 return;
835 else if (integer_all_onesp (oelast->op))
837 if (ops->length () != 1)
839 if (dump_file && (dump_flags & TDF_DETAILS))
840 fprintf (dump_file, "Found & -1, removing\n");
841 ops->pop ();
842 reassociate_stats.ops_eliminated++;
845 break;
846 case BIT_IOR_EXPR:
847 if (integer_all_onesp (oelast->op))
849 if (ops->length () != 1)
851 if (dump_file && (dump_flags & TDF_DETAILS))
852 fprintf (dump_file, "Found | -1, removing all other ops\n");
854 reassociate_stats.ops_eliminated += ops->length () - 1;
856 ops->truncate (0);
857 ops->quick_push (oelast);
858 return;
861 else if (integer_zerop (oelast->op))
863 if (ops->length () != 1)
865 if (dump_file && (dump_flags & TDF_DETAILS))
866 fprintf (dump_file, "Found | 0, removing\n");
867 ops->pop ();
868 reassociate_stats.ops_eliminated++;
871 break;
872 case MULT_EXPR:
873 if (integer_zerop (oelast->op)
874 || (FLOAT_TYPE_P (type)
875 && !HONOR_NANS (TYPE_MODE (type))
876 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
877 && real_zerop (oelast->op)))
879 if (ops->length () != 1)
881 if (dump_file && (dump_flags & TDF_DETAILS))
882 fprintf (dump_file, "Found * 0, removing all other ops\n");
884 reassociate_stats.ops_eliminated += ops->length () - 1;
885 ops->truncate (1);
886 ops->quick_push (oelast);
887 return;
890 else if (integer_onep (oelast->op)
891 || (FLOAT_TYPE_P (type)
892 && !HONOR_SNANS (TYPE_MODE (type))
893 && real_onep (oelast->op)))
895 if (ops->length () != 1)
897 if (dump_file && (dump_flags & TDF_DETAILS))
898 fprintf (dump_file, "Found * 1, removing\n");
899 ops->pop ();
900 reassociate_stats.ops_eliminated++;
901 return;
904 break;
905 case BIT_XOR_EXPR:
906 case PLUS_EXPR:
907 case MINUS_EXPR:
908 if (integer_zerop (oelast->op)
909 || (FLOAT_TYPE_P (type)
910 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
911 && fold_real_zero_addition_p (type, oelast->op,
912 opcode == MINUS_EXPR)))
914 if (ops->length () != 1)
916 if (dump_file && (dump_flags & TDF_DETAILS))
917 fprintf (dump_file, "Found [|^+] 0, removing\n");
918 ops->pop ();
919 reassociate_stats.ops_eliminated++;
920 return;
923 break;
924 default:
925 break;
931 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
932 bool, bool);
934 /* Structure for tracking and counting operands. */
935 typedef struct oecount_s {
936 int cnt;
937 int id;
938 enum tree_code oecode;
939 tree op;
940 } oecount;
943 /* The heap for the oecount hashtable and the sorted list of operands. */
944 static vec<oecount> cvec;
946 /* Hash function for oecount. */
948 static hashval_t
949 oecount_hash (const void *p)
951 const oecount *c = &cvec[(size_t)p - 42];
952 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
955 /* Comparison function for oecount. */
957 static int
958 oecount_eq (const void *p1, const void *p2)
960 const oecount *c1 = &cvec[(size_t)p1 - 42];
961 const oecount *c2 = &cvec[(size_t)p2 - 42];
962 return (c1->oecode == c2->oecode
963 && c1->op == c2->op);
966 /* Comparison function for qsort sorting oecount elements by count. */
968 static int
969 oecount_cmp (const void *p1, const void *p2)
971 const oecount *c1 = (const oecount *)p1;
972 const oecount *c2 = (const oecount *)p2;
973 if (c1->cnt != c2->cnt)
974 return c1->cnt - c2->cnt;
975 else
976 /* If counts are identical, use unique IDs to stabilize qsort. */
977 return c1->id - c2->id;
980 /* Return TRUE iff STMT represents a builtin call that raises OP
981 to some exponent. */
983 static bool
984 stmt_is_power_of_op (gimple stmt, tree op)
986 tree fndecl;
988 if (!is_gimple_call (stmt))
989 return false;
991 fndecl = gimple_call_fndecl (stmt);
993 if (!fndecl
994 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
995 return false;
997 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
999 CASE_FLT_FN (BUILT_IN_POW):
1000 CASE_FLT_FN (BUILT_IN_POWI):
1001 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1003 default:
1004 return false;
1008 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1009 in place and return the result. Assumes that stmt_is_power_of_op
1010 was previously called for STMT and returned TRUE. */
1012 static HOST_WIDE_INT
1013 decrement_power (gimple stmt)
1015 REAL_VALUE_TYPE c, cint;
1016 HOST_WIDE_INT power;
1017 tree arg1;
1019 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1021 CASE_FLT_FN (BUILT_IN_POW):
1022 arg1 = gimple_call_arg (stmt, 1);
1023 c = TREE_REAL_CST (arg1);
1024 power = real_to_integer (&c) - 1;
1025 real_from_integer (&cint, VOIDmode, power, 0, 0);
1026 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1027 return power;
1029 CASE_FLT_FN (BUILT_IN_POWI):
1030 arg1 = gimple_call_arg (stmt, 1);
1031 power = TREE_INT_CST_LOW (arg1) - 1;
1032 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1033 return power;
1035 default:
1036 gcc_unreachable ();
1040 /* Find the single immediate use of STMT's LHS, and replace it
1041 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1042 replace *DEF with OP as well. */
1044 static void
1045 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1047 tree lhs;
1048 gimple use_stmt;
1049 use_operand_p use;
1050 gimple_stmt_iterator gsi;
1052 if (is_gimple_call (stmt))
1053 lhs = gimple_call_lhs (stmt);
1054 else
1055 lhs = gimple_assign_lhs (stmt);
1057 gcc_assert (has_single_use (lhs));
1058 single_imm_use (lhs, &use, &use_stmt);
1059 if (lhs == *def)
1060 *def = op;
1061 SET_USE (use, op);
1062 if (TREE_CODE (op) != SSA_NAME)
1063 update_stmt (use_stmt);
1064 gsi = gsi_for_stmt (stmt);
1065 unlink_stmt_vdef (stmt);
1066 gsi_remove (&gsi, true);
1067 release_defs (stmt);
1070 /* Walks the linear chain with result *DEF searching for an operation
1071 with operand OP and code OPCODE removing that from the chain. *DEF
1072 is updated if there is only one operand but no operation left. */
1074 static void
1075 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1077 gimple stmt = SSA_NAME_DEF_STMT (*def);
1081 tree name;
1083 if (opcode == MULT_EXPR
1084 && stmt_is_power_of_op (stmt, op))
1086 if (decrement_power (stmt) == 1)
1087 propagate_op_to_single_use (op, stmt, def);
1088 return;
1091 name = gimple_assign_rhs1 (stmt);
1093 /* If this is the operation we look for and one of the operands
1094 is ours simply propagate the other operand into the stmts
1095 single use. */
1096 if (gimple_assign_rhs_code (stmt) == opcode
1097 && (name == op
1098 || gimple_assign_rhs2 (stmt) == op))
1100 if (name == op)
1101 name = gimple_assign_rhs2 (stmt);
1102 propagate_op_to_single_use (name, stmt, def);
1103 return;
1106 /* We might have a multiply of two __builtin_pow* calls, and
1107 the operand might be hiding in the rightmost one. */
1108 if (opcode == MULT_EXPR
1109 && gimple_assign_rhs_code (stmt) == opcode
1110 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1111 && has_single_use (gimple_assign_rhs2 (stmt)))
1113 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1114 if (stmt_is_power_of_op (stmt2, op))
1116 if (decrement_power (stmt2) == 1)
1117 propagate_op_to_single_use (op, stmt2, def);
1118 return;
1122 /* Continue walking the chain. */
1123 gcc_assert (name != op
1124 && TREE_CODE (name) == SSA_NAME);
1125 stmt = SSA_NAME_DEF_STMT (name);
1127 while (1);
1130 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1131 the result. Places the statement after the definition of either
1132 OP1 or OP2. Returns the new statement. */
1134 static gimple
1135 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1137 gimple op1def = NULL, op2def = NULL;
1138 gimple_stmt_iterator gsi;
1139 tree op;
1140 gimple sum;
1142 /* Create the addition statement. */
1143 op = make_ssa_name (type, NULL);
1144 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1146 /* Find an insertion place and insert. */
1147 if (TREE_CODE (op1) == SSA_NAME)
1148 op1def = SSA_NAME_DEF_STMT (op1);
1149 if (TREE_CODE (op2) == SSA_NAME)
1150 op2def = SSA_NAME_DEF_STMT (op2);
1151 if ((!op1def || gimple_nop_p (op1def))
1152 && (!op2def || gimple_nop_p (op2def)))
1154 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1155 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1157 else if ((!op1def || gimple_nop_p (op1def))
1158 || (op2def && !gimple_nop_p (op2def)
1159 && stmt_dominates_stmt_p (op1def, op2def)))
1161 if (gimple_code (op2def) == GIMPLE_PHI)
1163 gsi = gsi_after_labels (gimple_bb (op2def));
1164 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1166 else
1168 if (!stmt_ends_bb_p (op2def))
1170 gsi = gsi_for_stmt (op2def);
1171 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1173 else
1175 edge e;
1176 edge_iterator ei;
1178 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1179 if (e->flags & EDGE_FALLTHRU)
1180 gsi_insert_on_edge_immediate (e, sum);
1184 else
1186 if (gimple_code (op1def) == GIMPLE_PHI)
1188 gsi = gsi_after_labels (gimple_bb (op1def));
1189 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1191 else
1193 if (!stmt_ends_bb_p (op1def))
1195 gsi = gsi_for_stmt (op1def);
1196 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1198 else
1200 edge e;
1201 edge_iterator ei;
1203 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1204 if (e->flags & EDGE_FALLTHRU)
1205 gsi_insert_on_edge_immediate (e, sum);
1209 update_stmt (sum);
1211 return sum;
1214 /* Perform un-distribution of divisions and multiplications.
1215 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1216 to (A + B) / X for real X.
1218 The algorithm is organized as follows.
1220 - First we walk the addition chain *OPS looking for summands that
1221 are defined by a multiplication or a real division. This results
1222 in the candidates bitmap with relevant indices into *OPS.
1224 - Second we build the chains of multiplications or divisions for
1225 these candidates, counting the number of occurrences of (operand, code)
1226 pairs in all of the candidates chains.
1228 - Third we sort the (operand, code) pairs by number of occurrence and
1229 process them starting with the pair with the most uses.
1231 * For each such pair we walk the candidates again to build a
1232 second candidate bitmap noting all multiplication/division chains
1233 that have at least one occurrence of (operand, code).
1235 * We build an alternate addition chain only covering these
1236 candidates with one (operand, code) operation removed from their
1237 multiplication/division chain.
1239 * The first candidate gets replaced by the alternate addition chain
1240 multiplied/divided by the operand.
1242 * All candidate chains get disabled for further processing and
1243 processing of (operand, code) pairs continues.
1245 The alternate addition chains built are re-processed by the main
1246 reassociation algorithm which allows optimizing a * x * y + b * y * x
1247 to (a + b ) * x * y in one invocation of the reassociation pass. */
1249 static bool
1250 undistribute_ops_list (enum tree_code opcode,
1251 vec<operand_entry_t> *ops, struct loop *loop)
1253 unsigned int length = ops->length ();
1254 operand_entry_t oe1;
1255 unsigned i, j;
1256 sbitmap candidates, candidates2;
1257 unsigned nr_candidates, nr_candidates2;
1258 sbitmap_iterator sbi0;
1259 vec<operand_entry_t> *subops;
1260 htab_t ctable;
1261 bool changed = false;
1262 int next_oecount_id = 0;
1264 if (length <= 1
1265 || opcode != PLUS_EXPR)
1266 return false;
1268 /* Build a list of candidates to process. */
1269 candidates = sbitmap_alloc (length);
1270 bitmap_clear (candidates);
1271 nr_candidates = 0;
1272 FOR_EACH_VEC_ELT (*ops, i, oe1)
1274 enum tree_code dcode;
1275 gimple oe1def;
1277 if (TREE_CODE (oe1->op) != SSA_NAME)
1278 continue;
1279 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1280 if (!is_gimple_assign (oe1def))
1281 continue;
1282 dcode = gimple_assign_rhs_code (oe1def);
1283 if ((dcode != MULT_EXPR
1284 && dcode != RDIV_EXPR)
1285 || !is_reassociable_op (oe1def, dcode, loop))
1286 continue;
1288 bitmap_set_bit (candidates, i);
1289 nr_candidates++;
1292 if (nr_candidates < 2)
1294 sbitmap_free (candidates);
1295 return false;
1298 if (dump_file && (dump_flags & TDF_DETAILS))
1300 fprintf (dump_file, "searching for un-distribute opportunities ");
1301 print_generic_expr (dump_file,
1302 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1303 fprintf (dump_file, " %d\n", nr_candidates);
1306 /* Build linearized sub-operand lists and the counting table. */
1307 cvec.create (0);
1308 ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1309 /* ??? Macro arguments cannot have multi-argument template types in
1310 them. This typedef is needed to workaround that limitation. */
1311 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1312 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1313 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1315 gimple oedef;
1316 enum tree_code oecode;
1317 unsigned j;
1319 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1320 oecode = gimple_assign_rhs_code (oedef);
1321 linearize_expr_tree (&subops[i], oedef,
1322 associative_tree_code (oecode), false);
1324 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1326 oecount c;
1327 void **slot;
1328 size_t idx;
1329 c.oecode = oecode;
1330 c.cnt = 1;
1331 c.id = next_oecount_id++;
1332 c.op = oe1->op;
1333 cvec.safe_push (c);
1334 idx = cvec.length () + 41;
1335 slot = htab_find_slot (ctable, (void *)idx, INSERT);
1336 if (!*slot)
1338 *slot = (void *)idx;
1340 else
1342 cvec.pop ();
1343 cvec[(size_t)*slot - 42].cnt++;
1347 htab_delete (ctable);
1349 /* Sort the counting table. */
1350 cvec.qsort (oecount_cmp);
1352 if (dump_file && (dump_flags & TDF_DETAILS))
1354 oecount *c;
1355 fprintf (dump_file, "Candidates:\n");
1356 FOR_EACH_VEC_ELT (cvec, j, c)
1358 fprintf (dump_file, " %u %s: ", c->cnt,
1359 c->oecode == MULT_EXPR
1360 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1361 print_generic_expr (dump_file, c->op, 0);
1362 fprintf (dump_file, "\n");
1366 /* Process the (operand, code) pairs in order of most occurence. */
1367 candidates2 = sbitmap_alloc (length);
1368 while (!cvec.is_empty ())
1370 oecount *c = &cvec.last ();
1371 if (c->cnt < 2)
1372 break;
1374 /* Now collect the operands in the outer chain that contain
1375 the common operand in their inner chain. */
1376 bitmap_clear (candidates2);
1377 nr_candidates2 = 0;
1378 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1380 gimple oedef;
1381 enum tree_code oecode;
1382 unsigned j;
1383 tree op = (*ops)[i]->op;
1385 /* If we undistributed in this chain already this may be
1386 a constant. */
1387 if (TREE_CODE (op) != SSA_NAME)
1388 continue;
1390 oedef = SSA_NAME_DEF_STMT (op);
1391 oecode = gimple_assign_rhs_code (oedef);
1392 if (oecode != c->oecode)
1393 continue;
1395 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1397 if (oe1->op == c->op)
1399 bitmap_set_bit (candidates2, i);
1400 ++nr_candidates2;
1401 break;
1406 if (nr_candidates2 >= 2)
1408 operand_entry_t oe1, oe2;
1409 gimple prod;
1410 int first = bitmap_first_set_bit (candidates2);
1412 /* Build the new addition chain. */
1413 oe1 = (*ops)[first];
1414 if (dump_file && (dump_flags & TDF_DETAILS))
1416 fprintf (dump_file, "Building (");
1417 print_generic_expr (dump_file, oe1->op, 0);
1419 zero_one_operation (&oe1->op, c->oecode, c->op);
1420 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1422 gimple sum;
1423 oe2 = (*ops)[i];
1424 if (dump_file && (dump_flags & TDF_DETAILS))
1426 fprintf (dump_file, " + ");
1427 print_generic_expr (dump_file, oe2->op, 0);
1429 zero_one_operation (&oe2->op, c->oecode, c->op);
1430 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1431 oe1->op, oe2->op, opcode);
1432 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1433 oe2->rank = 0;
1434 oe1->op = gimple_get_lhs (sum);
1437 /* Apply the multiplication/division. */
1438 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1439 oe1->op, c->op, c->oecode);
1440 if (dump_file && (dump_flags & TDF_DETAILS))
1442 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1443 print_generic_expr (dump_file, c->op, 0);
1444 fprintf (dump_file, "\n");
1447 /* Record it in the addition chain and disable further
1448 undistribution with this op. */
1449 oe1->op = gimple_assign_lhs (prod);
1450 oe1->rank = get_rank (oe1->op);
1451 subops[first].release ();
1453 changed = true;
1456 cvec.pop ();
1459 for (i = 0; i < ops->length (); ++i)
1460 subops[i].release ();
1461 free (subops);
1462 cvec.release ();
1463 sbitmap_free (candidates);
1464 sbitmap_free (candidates2);
1466 return changed;
1469 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1470 expression, examine the other OPS to see if any of them are comparisons
1471 of the same values, which we may be able to combine or eliminate.
1472 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1474 static bool
1475 eliminate_redundant_comparison (enum tree_code opcode,
1476 vec<operand_entry_t> *ops,
1477 unsigned int currindex,
1478 operand_entry_t curr)
1480 tree op1, op2;
1481 enum tree_code lcode, rcode;
1482 gimple def1, def2;
1483 int i;
1484 operand_entry_t oe;
1486 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1487 return false;
1489 /* Check that CURR is a comparison. */
1490 if (TREE_CODE (curr->op) != SSA_NAME)
1491 return false;
1492 def1 = SSA_NAME_DEF_STMT (curr->op);
1493 if (!is_gimple_assign (def1))
1494 return false;
1495 lcode = gimple_assign_rhs_code (def1);
1496 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1497 return false;
1498 op1 = gimple_assign_rhs1 (def1);
1499 op2 = gimple_assign_rhs2 (def1);
1501 /* Now look for a similar comparison in the remaining OPS. */
1502 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1504 tree t;
1506 if (TREE_CODE (oe->op) != SSA_NAME)
1507 continue;
1508 def2 = SSA_NAME_DEF_STMT (oe->op);
1509 if (!is_gimple_assign (def2))
1510 continue;
1511 rcode = gimple_assign_rhs_code (def2);
1512 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1513 continue;
1515 /* If we got here, we have a match. See if we can combine the
1516 two comparisons. */
1517 if (opcode == BIT_IOR_EXPR)
1518 t = maybe_fold_or_comparisons (lcode, op1, op2,
1519 rcode, gimple_assign_rhs1 (def2),
1520 gimple_assign_rhs2 (def2));
1521 else
1522 t = maybe_fold_and_comparisons (lcode, op1, op2,
1523 rcode, gimple_assign_rhs1 (def2),
1524 gimple_assign_rhs2 (def2));
1525 if (!t)
1526 continue;
1528 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1529 always give us a boolean_type_node value back. If the original
1530 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1531 we need to convert. */
1532 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1533 t = fold_convert (TREE_TYPE (curr->op), t);
1535 if (TREE_CODE (t) != INTEGER_CST
1536 && !operand_equal_p (t, curr->op, 0))
1538 enum tree_code subcode;
1539 tree newop1, newop2;
1540 if (!COMPARISON_CLASS_P (t))
1541 continue;
1542 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1543 STRIP_USELESS_TYPE_CONVERSION (newop1);
1544 STRIP_USELESS_TYPE_CONVERSION (newop2);
1545 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1546 continue;
1549 if (dump_file && (dump_flags & TDF_DETAILS))
1551 fprintf (dump_file, "Equivalence: ");
1552 print_generic_expr (dump_file, curr->op, 0);
1553 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1554 print_generic_expr (dump_file, oe->op, 0);
1555 fprintf (dump_file, " -> ");
1556 print_generic_expr (dump_file, t, 0);
1557 fprintf (dump_file, "\n");
1560 /* Now we can delete oe, as it has been subsumed by the new combined
1561 expression t. */
1562 ops->ordered_remove (i);
1563 reassociate_stats.ops_eliminated ++;
1565 /* If t is the same as curr->op, we're done. Otherwise we must
1566 replace curr->op with t. Special case is if we got a constant
1567 back, in which case we add it to the end instead of in place of
1568 the current entry. */
1569 if (TREE_CODE (t) == INTEGER_CST)
1571 ops->ordered_remove (currindex);
1572 add_to_ops_vec (ops, t);
1574 else if (!operand_equal_p (t, curr->op, 0))
1576 gimple sum;
1577 enum tree_code subcode;
1578 tree newop1;
1579 tree newop2;
1580 gcc_assert (COMPARISON_CLASS_P (t));
1581 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1582 STRIP_USELESS_TYPE_CONVERSION (newop1);
1583 STRIP_USELESS_TYPE_CONVERSION (newop2);
1584 gcc_checking_assert (is_gimple_val (newop1)
1585 && is_gimple_val (newop2));
1586 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1587 curr->op = gimple_get_lhs (sum);
1589 return true;
1592 return false;
1595 /* Perform various identities and other optimizations on the list of
1596 operand entries, stored in OPS. The tree code for the binary
1597 operation between all the operands is OPCODE. */
1599 static void
1600 optimize_ops_list (enum tree_code opcode,
1601 vec<operand_entry_t> *ops)
1603 unsigned int length = ops->length ();
1604 unsigned int i;
1605 operand_entry_t oe;
1606 operand_entry_t oelast = NULL;
1607 bool iterate = false;
1609 if (length == 1)
1610 return;
1612 oelast = ops->last ();
1614 /* If the last two are constants, pop the constants off, merge them
1615 and try the next two. */
1616 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1618 operand_entry_t oelm1 = (*ops)[length - 2];
1620 if (oelm1->rank == 0
1621 && is_gimple_min_invariant (oelm1->op)
1622 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1623 TREE_TYPE (oelast->op)))
1625 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1626 oelm1->op, oelast->op);
1628 if (folded && is_gimple_min_invariant (folded))
1630 if (dump_file && (dump_flags & TDF_DETAILS))
1631 fprintf (dump_file, "Merging constants\n");
1633 ops->pop ();
1634 ops->pop ();
1636 add_to_ops_vec (ops, folded);
1637 reassociate_stats.constants_eliminated++;
1639 optimize_ops_list (opcode, ops);
1640 return;
1645 eliminate_using_constants (opcode, ops);
1646 oelast = NULL;
1648 for (i = 0; ops->iterate (i, &oe);)
1650 bool done = false;
1652 if (eliminate_not_pairs (opcode, ops, i, oe))
1653 return;
1654 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1655 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1656 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1658 if (done)
1659 return;
1660 iterate = true;
1661 oelast = NULL;
1662 continue;
1664 oelast = oe;
1665 i++;
1668 length = ops->length ();
1669 oelast = ops->last ();
1671 if (iterate)
1672 optimize_ops_list (opcode, ops);
1675 /* The following functions are subroutines to optimize_range_tests and allow
1676 it to try to change a logical combination of comparisons into a range
1677 test.
1679 For example, both
1680 X == 2 || X == 5 || X == 3 || X == 4
1682 X >= 2 && X <= 5
1683 are converted to
1684 (unsigned) (X - 2) <= 3
1686 For more information see comments above fold_test_range in fold-const.c,
1687 this implementation is for GIMPLE. */
1689 struct range_entry
1691 tree exp;
1692 tree low;
1693 tree high;
1694 bool in_p;
1695 bool strict_overflow_p;
1696 unsigned int idx, next;
1699 /* This is similar to make_range in fold-const.c, but on top of
1700 GIMPLE instead of trees. If EXP is non-NULL, it should be
1701 an SSA_NAME and STMT argument is ignored, otherwise STMT
1702 argument should be a GIMPLE_COND. */
1704 static void
1705 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1707 int in_p;
1708 tree low, high;
1709 bool is_bool, strict_overflow_p;
1711 r->exp = NULL_TREE;
1712 r->in_p = false;
1713 r->strict_overflow_p = false;
1714 r->low = NULL_TREE;
1715 r->high = NULL_TREE;
1716 if (exp != NULL_TREE
1717 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1718 return;
1720 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1721 and see if we can refine the range. Some of the cases below may not
1722 happen, but it doesn't seem worth worrying about this. We "continue"
1723 the outer loop when we've changed something; otherwise we "break"
1724 the switch, which will "break" the while. */
1725 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1726 high = low;
1727 in_p = 0;
1728 strict_overflow_p = false;
1729 is_bool = false;
1730 if (exp == NULL_TREE)
1731 is_bool = true;
1732 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1734 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1735 is_bool = true;
1736 else
1737 return;
1739 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1740 is_bool = true;
1742 while (1)
1744 enum tree_code code;
1745 tree arg0, arg1, exp_type;
1746 tree nexp;
1747 location_t loc;
1749 if (exp != NULL_TREE)
1751 if (TREE_CODE (exp) != SSA_NAME)
1752 break;
1754 stmt = SSA_NAME_DEF_STMT (exp);
1755 if (!is_gimple_assign (stmt))
1756 break;
1758 code = gimple_assign_rhs_code (stmt);
1759 arg0 = gimple_assign_rhs1 (stmt);
1760 arg1 = gimple_assign_rhs2 (stmt);
1761 exp_type = TREE_TYPE (exp);
1763 else
1765 code = gimple_cond_code (stmt);
1766 arg0 = gimple_cond_lhs (stmt);
1767 arg1 = gimple_cond_rhs (stmt);
1768 exp_type = boolean_type_node;
1771 if (TREE_CODE (arg0) != SSA_NAME)
1772 break;
1773 loc = gimple_location (stmt);
1774 switch (code)
1776 case BIT_NOT_EXPR:
1777 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1779 in_p = !in_p;
1780 exp = arg0;
1781 continue;
1783 break;
1784 case SSA_NAME:
1785 exp = arg0;
1786 continue;
1787 CASE_CONVERT:
1788 if (is_bool)
1789 goto do_default;
1790 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1792 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1793 is_bool = true;
1794 else
1795 return;
1797 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1798 is_bool = true;
1799 goto do_default;
1800 case EQ_EXPR:
1801 case NE_EXPR:
1802 case LT_EXPR:
1803 case LE_EXPR:
1804 case GE_EXPR:
1805 case GT_EXPR:
1806 is_bool = true;
1807 /* FALLTHRU */
1808 default:
1809 if (!is_bool)
1810 return;
1811 do_default:
1812 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1813 &low, &high, &in_p,
1814 &strict_overflow_p);
1815 if (nexp != NULL_TREE)
1817 exp = nexp;
1818 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1819 continue;
1821 break;
1823 break;
1825 if (is_bool)
1827 r->exp = exp;
1828 r->in_p = in_p;
1829 r->low = low;
1830 r->high = high;
1831 r->strict_overflow_p = strict_overflow_p;
1835 /* Comparison function for qsort. Sort entries
1836 without SSA_NAME exp first, then with SSA_NAMEs sorted
1837 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1838 by increasing ->low and if ->low is the same, by increasing
1839 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1840 maximum. */
1842 static int
1843 range_entry_cmp (const void *a, const void *b)
1845 const struct range_entry *p = (const struct range_entry *) a;
1846 const struct range_entry *q = (const struct range_entry *) b;
1848 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1850 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1852 /* Group range_entries for the same SSA_NAME together. */
1853 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1854 return -1;
1855 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1856 return 1;
1857 /* If ->low is different, NULL low goes first, then by
1858 ascending low. */
1859 if (p->low != NULL_TREE)
1861 if (q->low != NULL_TREE)
1863 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1864 p->low, q->low);
1865 if (tem && integer_onep (tem))
1866 return -1;
1867 tem = fold_binary (GT_EXPR, boolean_type_node,
1868 p->low, q->low);
1869 if (tem && integer_onep (tem))
1870 return 1;
1872 else
1873 return 1;
1875 else if (q->low != NULL_TREE)
1876 return -1;
1877 /* If ->high is different, NULL high goes last, before that by
1878 ascending high. */
1879 if (p->high != NULL_TREE)
1881 if (q->high != NULL_TREE)
1883 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1884 p->high, q->high);
1885 if (tem && integer_onep (tem))
1886 return -1;
1887 tem = fold_binary (GT_EXPR, boolean_type_node,
1888 p->high, q->high);
1889 if (tem && integer_onep (tem))
1890 return 1;
1892 else
1893 return -1;
1895 else if (p->high != NULL_TREE)
1896 return 1;
1897 /* If both ranges are the same, sort below by ascending idx. */
1899 else
1900 return 1;
1902 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1903 return -1;
1905 if (p->idx < q->idx)
1906 return -1;
1907 else
1909 gcc_checking_assert (p->idx > q->idx);
1910 return 1;
1914 /* Helper routine of optimize_range_test.
1915 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1916 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1917 OPCODE and OPS are arguments of optimize_range_tests. Return
1918 true if the range merge has been successful.
1919 If OPCODE is ERROR_MARK, this is called from within
1920 maybe_optimize_range_tests and is performing inter-bb range optimization.
1921 Changes should be then performed right away, and whether an op is
1922 BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank. */
1924 static bool
1925 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1926 unsigned int count, enum tree_code opcode,
1927 vec<operand_entry_t> *ops, tree exp, bool in_p,
1928 tree low, tree high, bool strict_overflow_p)
1930 operand_entry_t oe = (*ops)[range->idx];
1931 tree op = oe->op;
1932 gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1933 location_t loc = gimple_location (stmt);
1934 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1935 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1936 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1937 gimple_stmt_iterator gsi;
1939 if (tem == NULL_TREE)
1940 return false;
1942 if (strict_overflow_p && issue_strict_overflow_warning (wc))
1943 warning_at (loc, OPT_Wstrict_overflow,
1944 "assuming signed overflow does not occur "
1945 "when simplifying range test");
1947 if (dump_file && (dump_flags & TDF_DETAILS))
1949 struct range_entry *r;
1950 fprintf (dump_file, "Optimizing range tests ");
1951 print_generic_expr (dump_file, range->exp, 0);
1952 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1953 print_generic_expr (dump_file, range->low, 0);
1954 fprintf (dump_file, ", ");
1955 print_generic_expr (dump_file, range->high, 0);
1956 fprintf (dump_file, "]");
1957 for (r = otherrange; r < otherrange + count; r++)
1959 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1960 print_generic_expr (dump_file, r->low, 0);
1961 fprintf (dump_file, ", ");
1962 print_generic_expr (dump_file, r->high, 0);
1963 fprintf (dump_file, "]");
1965 fprintf (dump_file, "\n into ");
1966 print_generic_expr (dump_file, tem, 0);
1967 fprintf (dump_file, "\n");
1970 if (opcode == BIT_IOR_EXPR
1971 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
1972 tem = invert_truthvalue_loc (loc, tem);
1974 tem = fold_convert_loc (loc, optype, tem);
1975 gsi = gsi_for_stmt (stmt);
1976 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1977 GSI_SAME_STMT);
1979 /* If doing inter-bb range test optimization, update the
1980 stmts immediately. Start with changing the first range test
1981 immediate use to the new value (TEM), or, if the first range
1982 test is a GIMPLE_COND stmt, change that condition. */
1983 if (opcode == ERROR_MARK)
1985 if (op)
1987 imm_use_iterator iter;
1988 use_operand_p use_p;
1989 gimple use_stmt;
1991 FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
1993 if (is_gimple_debug (use_stmt))
1994 continue;
1995 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
1996 SET_USE (use_p, tem);
1997 update_stmt (use_stmt);
2000 else
2002 gimple_cond_set_code (stmt, NE_EXPR);
2003 gimple_cond_set_lhs (stmt, tem);
2004 gimple_cond_set_rhs (stmt, boolean_false_node);
2005 update_stmt (stmt);
2008 oe->op = tem;
2009 range->exp = exp;
2010 range->low = low;
2011 range->high = high;
2012 range->in_p = in_p;
2013 range->strict_overflow_p = false;
2015 for (range = otherrange; range < otherrange + count; range++)
2017 oe = (*ops)[range->idx];
2018 /* Now change all the other range test immediate uses, so that
2019 those tests will be optimized away. */
2020 if (opcode == ERROR_MARK)
2022 if (oe->op)
2024 imm_use_iterator iter;
2025 use_operand_p use_p;
2026 gimple use_stmt;
2028 FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2030 if (is_gimple_debug (use_stmt))
2031 continue;
2032 /* If imm use of _8 is a statement like _7 = _8 | _9;,
2033 adjust it into _7 = _9;. */
2034 if (is_gimple_assign (use_stmt)
2035 && gimple_assign_rhs_code (use_stmt) == oe->rank)
2037 tree expr = NULL_TREE;
2038 if (oe->op == gimple_assign_rhs1 (use_stmt))
2039 expr = gimple_assign_rhs2 (use_stmt);
2040 else if (oe->op == gimple_assign_rhs2 (use_stmt))
2041 expr = gimple_assign_rhs1 (use_stmt);
2042 if (expr
2043 && expr != oe->op
2044 && TREE_CODE (expr) == SSA_NAME)
2046 gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2047 gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2048 expr, NULL_TREE);
2049 update_stmt (use_stmt);
2050 continue;
2053 /* If imm use of _8 is a statement like _7 = (int) _8;,
2054 adjust it into _7 = 0; or _7 = 1;. */
2055 if (gimple_assign_cast_p (use_stmt)
2056 && oe->op == gimple_assign_rhs1 (use_stmt))
2058 tree lhs = gimple_assign_lhs (use_stmt);
2059 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2061 gimple_stmt_iterator gsi2
2062 = gsi_for_stmt (use_stmt);
2063 tree expr = build_int_cst (TREE_TYPE (lhs),
2064 oe->rank == BIT_IOR_EXPR
2065 ? 0 : 1);
2066 gimple_assign_set_rhs_with_ops (&gsi2,
2067 INTEGER_CST,
2068 expr, NULL_TREE);
2069 update_stmt (use_stmt);
2070 continue;
2073 /* Otherwise replace the use with 0 or 1. */
2074 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2075 SET_USE (use_p,
2076 build_int_cst (TREE_TYPE (oe->op),
2077 oe->rank == BIT_IOR_EXPR
2078 ? 0 : 1));
2079 update_stmt (use_stmt);
2082 else
2084 /* If range test was a GIMPLE_COND, simply change it
2085 into an always false or always true condition. */
2086 stmt = last_stmt (BASIC_BLOCK (oe->id));
2087 if (oe->rank == BIT_IOR_EXPR)
2088 gimple_cond_make_false (stmt);
2089 else
2090 gimple_cond_make_true (stmt);
2091 update_stmt (stmt);
2094 oe->op = error_mark_node;
2095 range->exp = NULL_TREE;
2097 return true;
2100 /* Optimize range tests, similarly how fold_range_test optimizes
2101 it on trees. The tree code for the binary
2102 operation between all the operands is OPCODE.
2103 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2104 maybe_optimize_range_tests for inter-bb range optimization.
2105 In that case if oe->op is NULL, oe->id is bb->index whose
2106 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2107 the actual opcode. */
2109 static void
2110 optimize_range_tests (enum tree_code opcode,
2111 vec<operand_entry_t> *ops)
2113 unsigned int length = ops->length (), i, j, first;
2114 operand_entry_t oe;
2115 struct range_entry *ranges;
2116 bool any_changes = false;
2118 if (length == 1)
2119 return;
2121 ranges = XNEWVEC (struct range_entry, length);
2122 for (i = 0; i < length; i++)
2124 oe = (*ops)[i];
2125 ranges[i].idx = i;
2126 init_range_entry (ranges + i, oe->op,
2127 oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2128 /* For | invert it now, we will invert it again before emitting
2129 the optimized expression. */
2130 if (opcode == BIT_IOR_EXPR
2131 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2132 ranges[i].in_p = !ranges[i].in_p;
2135 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2136 for (i = 0; i < length; i++)
2137 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2138 break;
2140 /* Try to merge ranges. */
2141 for (first = i; i < length; i++)
2143 tree low = ranges[i].low;
2144 tree high = ranges[i].high;
2145 int in_p = ranges[i].in_p;
2146 bool strict_overflow_p = ranges[i].strict_overflow_p;
2147 int update_fail_count = 0;
2149 for (j = i + 1; j < length; j++)
2151 if (ranges[i].exp != ranges[j].exp)
2152 break;
2153 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2154 ranges[j].in_p, ranges[j].low, ranges[j].high))
2155 break;
2156 strict_overflow_p |= ranges[j].strict_overflow_p;
2159 if (j == i + 1)
2160 continue;
2162 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2163 ops, ranges[i].exp, in_p, low, high,
2164 strict_overflow_p))
2166 i = j - 1;
2167 any_changes = true;
2169 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2170 while update_range_test would fail. */
2171 else if (update_fail_count == 64)
2172 i = j - 1;
2173 else
2174 ++update_fail_count;
2177 /* Optimize X == CST1 || X == CST2
2178 if popcount (CST1 ^ CST2) == 1 into
2179 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2180 Similarly for ranges. E.g.
2181 X != 2 && X != 3 && X != 10 && X != 11
2182 will be transformed by the above loop into
2183 (X - 2U) <= 1U && (X - 10U) <= 1U
2184 and this loop can transform that into
2185 ((X & ~8) - 2U) <= 1U. */
2186 for (i = first; i < length; i++)
2188 tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2190 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2191 continue;
2192 type = TREE_TYPE (ranges[i].exp);
2193 if (!INTEGRAL_TYPE_P (type))
2194 continue;
2195 lowi = ranges[i].low;
2196 if (lowi == NULL_TREE)
2197 lowi = TYPE_MIN_VALUE (type);
2198 highi = ranges[i].high;
2199 if (highi == NULL_TREE)
2200 continue;
2201 for (j = i + 1; j < length && j < i + 64; j++)
2203 if (ranges[j].exp == NULL_TREE)
2204 continue;
2205 if (ranges[i].exp != ranges[j].exp)
2206 break;
2207 if (ranges[j].in_p)
2208 continue;
2209 lowj = ranges[j].low;
2210 if (lowj == NULL_TREE)
2211 continue;
2212 highj = ranges[j].high;
2213 if (highj == NULL_TREE)
2214 highj = TYPE_MAX_VALUE (type);
2215 tem = fold_binary (GT_EXPR, boolean_type_node,
2216 lowj, highi);
2217 if (tem == NULL_TREE || !integer_onep (tem))
2218 continue;
2219 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2220 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2221 continue;
2222 gcc_checking_assert (!integer_zerop (lowxor));
2223 tem = fold_binary (MINUS_EXPR, type, lowxor,
2224 build_int_cst (type, 1));
2225 if (tem == NULL_TREE)
2226 continue;
2227 tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2228 if (tem == NULL_TREE || !integer_zerop (tem))
2229 continue;
2230 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2231 if (!tree_int_cst_equal (lowxor, highxor))
2232 continue;
2233 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2234 exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2235 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2236 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2237 if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2238 ranges[i].in_p, lowj, highj,
2239 ranges[i].strict_overflow_p
2240 || ranges[j].strict_overflow_p))
2242 any_changes = true;
2243 break;
2248 if (any_changes && opcode != ERROR_MARK)
2250 j = 0;
2251 FOR_EACH_VEC_ELT (*ops, i, oe)
2253 if (oe->op == error_mark_node)
2254 continue;
2255 else if (i != j)
2256 (*ops)[j] = oe;
2257 j++;
2259 ops->truncate (j);
2262 XDELETEVEC (ranges);
2265 /* Return true if STMT is a cast like:
2266 <bb N>:
2268 _123 = (int) _234;
2270 <bb M>:
2271 # _345 = PHI <_123(N), 1(...), 1(...)>
2272 where _234 has bool type, _123 has single use and
2273 bb N has a single successor M. This is commonly used in
2274 the last block of a range test. */
2276 static bool
2277 final_range_test_p (gimple stmt)
2279 basic_block bb, rhs_bb;
2280 edge e;
2281 tree lhs, rhs;
2282 use_operand_p use_p;
2283 gimple use_stmt;
2285 if (!gimple_assign_cast_p (stmt))
2286 return false;
2287 bb = gimple_bb (stmt);
2288 if (!single_succ_p (bb))
2289 return false;
2290 e = single_succ_edge (bb);
2291 if (e->flags & EDGE_COMPLEX)
2292 return false;
2294 lhs = gimple_assign_lhs (stmt);
2295 rhs = gimple_assign_rhs1 (stmt);
2296 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2297 || TREE_CODE (rhs) != SSA_NAME
2298 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2299 return false;
2301 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2302 if (!single_imm_use (lhs, &use_p, &use_stmt))
2303 return false;
2305 if (gimple_code (use_stmt) != GIMPLE_PHI
2306 || gimple_bb (use_stmt) != e->dest)
2307 return false;
2309 /* And that the rhs is defined in the same loop. */
2310 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2311 if (rhs_bb == NULL
2312 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2313 return false;
2315 return true;
2318 /* Return true if BB is suitable basic block for inter-bb range test
2319 optimization. If BACKWARD is true, BB should be the only predecessor
2320 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2321 or compared with to find a common basic block to which all conditions
2322 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2323 be the only predecessor of BB. */
2325 static bool
2326 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2327 bool backward)
2329 edge_iterator ei, ei2;
2330 edge e, e2;
2331 gimple stmt;
2332 gimple_stmt_iterator gsi;
2333 bool other_edge_seen = false;
2334 bool is_cond;
2336 if (test_bb == bb)
2337 return false;
2338 /* Check last stmt first. */
2339 stmt = last_stmt (bb);
2340 if (stmt == NULL
2341 || (gimple_code (stmt) != GIMPLE_COND
2342 && (backward || !final_range_test_p (stmt)))
2343 || gimple_visited_p (stmt)
2344 || stmt_could_throw_p (stmt)
2345 || *other_bb == bb)
2346 return false;
2347 is_cond = gimple_code (stmt) == GIMPLE_COND;
2348 if (is_cond)
2350 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2351 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2352 to *OTHER_BB (if not set yet, try to find it out). */
2353 if (EDGE_COUNT (bb->succs) != 2)
2354 return false;
2355 FOR_EACH_EDGE (e, ei, bb->succs)
2357 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2358 return false;
2359 if (e->dest == test_bb)
2361 if (backward)
2362 continue;
2363 else
2364 return false;
2366 if (e->dest == bb)
2367 return false;
2368 if (*other_bb == NULL)
2370 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2371 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2372 return false;
2373 else if (e->dest == e2->dest)
2374 *other_bb = e->dest;
2375 if (*other_bb == NULL)
2376 return false;
2378 if (e->dest == *other_bb)
2379 other_edge_seen = true;
2380 else if (backward)
2381 return false;
2383 if (*other_bb == NULL || !other_edge_seen)
2384 return false;
2386 else if (single_succ (bb) != *other_bb)
2387 return false;
2389 /* Now check all PHIs of *OTHER_BB. */
2390 e = find_edge (bb, *other_bb);
2391 e2 = find_edge (test_bb, *other_bb);
2392 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2394 gimple phi = gsi_stmt (gsi);
2395 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2396 corresponding to BB and TEST_BB predecessor must be the same. */
2397 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2398 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2400 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2401 one of the PHIs should have the lhs of the last stmt in
2402 that block as PHI arg and that PHI should have 0 or 1
2403 corresponding to it in all other range test basic blocks
2404 considered. */
2405 if (!is_cond)
2407 if (gimple_phi_arg_def (phi, e->dest_idx)
2408 == gimple_assign_lhs (stmt)
2409 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2410 || integer_onep (gimple_phi_arg_def (phi,
2411 e2->dest_idx))))
2412 continue;
2414 else
2416 gimple test_last = last_stmt (test_bb);
2417 if (gimple_code (test_last) != GIMPLE_COND
2418 && gimple_phi_arg_def (phi, e2->dest_idx)
2419 == gimple_assign_lhs (test_last)
2420 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2421 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2422 continue;
2425 return false;
2428 return true;
2431 /* Return true if BB doesn't have side-effects that would disallow
2432 range test optimization, all SSA_NAMEs set in the bb are consumed
2433 in the bb and there are no PHIs. */
2435 static bool
2436 no_side_effect_bb (basic_block bb)
2438 gimple_stmt_iterator gsi;
2439 gimple last;
2441 if (!gimple_seq_empty_p (phi_nodes (bb)))
2442 return false;
2443 last = last_stmt (bb);
2444 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2446 gimple stmt = gsi_stmt (gsi);
2447 tree lhs;
2448 imm_use_iterator imm_iter;
2449 use_operand_p use_p;
2451 if (is_gimple_debug (stmt))
2452 continue;
2453 if (gimple_has_side_effects (stmt))
2454 return false;
2455 if (stmt == last)
2456 return true;
2457 if (!is_gimple_assign (stmt))
2458 return false;
2459 lhs = gimple_assign_lhs (stmt);
2460 if (TREE_CODE (lhs) != SSA_NAME)
2461 return false;
2462 if (gimple_assign_rhs_could_trap_p (stmt))
2463 return false;
2464 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2466 gimple use_stmt = USE_STMT (use_p);
2467 if (is_gimple_debug (use_stmt))
2468 continue;
2469 if (gimple_bb (use_stmt) != bb)
2470 return false;
2473 return false;
2476 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2477 return true and fill in *OPS recursively. */
2479 static bool
2480 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2481 struct loop *loop)
2483 gimple stmt = SSA_NAME_DEF_STMT (var);
2484 tree rhs[2];
2485 int i;
2487 if (!is_reassociable_op (stmt, code, loop))
2488 return false;
2490 rhs[0] = gimple_assign_rhs1 (stmt);
2491 rhs[1] = gimple_assign_rhs2 (stmt);
2492 gimple_set_visited (stmt, true);
2493 for (i = 0; i < 2; i++)
2494 if (TREE_CODE (rhs[i]) == SSA_NAME
2495 && !get_ops (rhs[i], code, ops, loop)
2496 && has_single_use (rhs[i]))
2498 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2500 oe->op = rhs[i];
2501 oe->rank = code;
2502 oe->id = 0;
2503 oe->count = 1;
2504 ops->safe_push (oe);
2506 return true;
2509 /* Inter-bb range test optimization. */
2511 static void
2512 maybe_optimize_range_tests (gimple stmt)
2514 basic_block first_bb = gimple_bb (stmt);
2515 basic_block last_bb = first_bb;
2516 basic_block other_bb = NULL;
2517 basic_block bb;
2518 edge_iterator ei;
2519 edge e;
2520 vec<operand_entry_t> ops = vNULL;
2522 /* Consider only basic blocks that end with GIMPLE_COND or
2523 a cast statement satisfying final_range_test_p. All
2524 but the last bb in the first_bb .. last_bb range
2525 should end with GIMPLE_COND. */
2526 if (gimple_code (stmt) == GIMPLE_COND)
2528 if (EDGE_COUNT (first_bb->succs) != 2)
2529 return;
2531 else if (final_range_test_p (stmt))
2532 other_bb = single_succ (first_bb);
2533 else
2534 return;
2536 if (stmt_could_throw_p (stmt))
2537 return;
2539 /* As relative ordering of post-dominator sons isn't fixed,
2540 maybe_optimize_range_tests can be called first on any
2541 bb in the range we want to optimize. So, start searching
2542 backwards, if first_bb can be set to a predecessor. */
2543 while (single_pred_p (first_bb))
2545 basic_block pred_bb = single_pred (first_bb);
2546 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2547 break;
2548 if (!no_side_effect_bb (first_bb))
2549 break;
2550 first_bb = pred_bb;
2552 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2553 Before starting forward search in last_bb successors, find
2554 out the other_bb. */
2555 if (first_bb == last_bb)
2557 other_bb = NULL;
2558 /* As non-GIMPLE_COND last stmt always terminates the range,
2559 if forward search didn't discover anything, just give up. */
2560 if (gimple_code (stmt) != GIMPLE_COND)
2561 return;
2562 /* Look at both successors. Either it ends with a GIMPLE_COND
2563 and satisfies suitable_cond_bb, or ends with a cast and
2564 other_bb is that cast's successor. */
2565 FOR_EACH_EDGE (e, ei, first_bb->succs)
2566 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2567 || e->dest == first_bb)
2568 return;
2569 else if (single_pred_p (e->dest))
2571 stmt = last_stmt (e->dest);
2572 if (stmt
2573 && gimple_code (stmt) == GIMPLE_COND
2574 && EDGE_COUNT (e->dest->succs) == 2)
2576 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2577 break;
2578 else
2579 other_bb = NULL;
2581 else if (stmt
2582 && final_range_test_p (stmt)
2583 && find_edge (first_bb, single_succ (e->dest)))
2585 other_bb = single_succ (e->dest);
2586 if (other_bb == first_bb)
2587 other_bb = NULL;
2590 if (other_bb == NULL)
2591 return;
2593 /* Now do the forward search, moving last_bb to successor bbs
2594 that aren't other_bb. */
2595 while (EDGE_COUNT (last_bb->succs) == 2)
2597 FOR_EACH_EDGE (e, ei, last_bb->succs)
2598 if (e->dest != other_bb)
2599 break;
2600 if (e == NULL)
2601 break;
2602 if (!single_pred_p (e->dest))
2603 break;
2604 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2605 break;
2606 if (!no_side_effect_bb (e->dest))
2607 break;
2608 last_bb = e->dest;
2610 if (first_bb == last_bb)
2611 return;
2612 /* Here basic blocks first_bb through last_bb's predecessor
2613 end with GIMPLE_COND, all of them have one of the edges to
2614 other_bb and another to another block in the range,
2615 all blocks except first_bb don't have side-effects and
2616 last_bb ends with either GIMPLE_COND, or cast satisfying
2617 final_range_test_p. */
2618 for (bb = last_bb; ; bb = single_pred (bb))
2620 enum tree_code code;
2621 tree lhs, rhs;
2623 e = find_edge (bb, other_bb);
2624 stmt = last_stmt (bb);
2625 gimple_set_visited (stmt, true);
2626 if (gimple_code (stmt) != GIMPLE_COND)
2628 use_operand_p use_p;
2629 gimple phi;
2630 edge e2;
2631 unsigned int d;
2633 lhs = gimple_assign_lhs (stmt);
2634 rhs = gimple_assign_rhs1 (stmt);
2635 gcc_assert (bb == last_bb);
2637 /* stmt is
2638 _123 = (int) _234;
2640 followed by:
2641 <bb M>:
2642 # _345 = PHI <_123(N), 1(...), 1(...)>
2644 or 0 instead of 1. If it is 0, the _234
2645 range test is anded together with all the
2646 other range tests, if it is 1, it is ored with
2647 them. */
2648 single_imm_use (lhs, &use_p, &phi);
2649 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2650 e2 = find_edge (first_bb, other_bb);
2651 d = e2->dest_idx;
2652 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2653 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2654 code = BIT_AND_EXPR;
2655 else
2657 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2658 code = BIT_IOR_EXPR;
2661 /* If _234 SSA_NAME_DEF_STMT is
2662 _234 = _567 | _789;
2663 (or &, corresponding to 1/0 in the phi arguments,
2664 push into ops the individual range test arguments
2665 of the bitwise or resp. and, recursively. */
2666 if (!get_ops (rhs, code, &ops,
2667 loop_containing_stmt (stmt))
2668 && has_single_use (rhs))
2670 /* Otherwise, push the _234 range test itself. */
2671 operand_entry_t oe
2672 = (operand_entry_t) pool_alloc (operand_entry_pool);
2674 oe->op = rhs;
2675 oe->rank = code;
2676 oe->id = 0;
2677 oe->count = 1;
2678 ops.safe_push (oe);
2680 continue;
2682 /* Otherwise stmt is GIMPLE_COND. */
2683 code = gimple_cond_code (stmt);
2684 lhs = gimple_cond_lhs (stmt);
2685 rhs = gimple_cond_rhs (stmt);
2686 if (TREE_CODE (lhs) == SSA_NAME
2687 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2688 && ((code != EQ_EXPR && code != NE_EXPR)
2689 || rhs != boolean_false_node
2690 /* Either push into ops the individual bitwise
2691 or resp. and operands, depending on which
2692 edge is other_bb. */
2693 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2694 ^ (code == EQ_EXPR))
2695 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2696 loop_containing_stmt (stmt))))
2698 /* Or push the GIMPLE_COND stmt itself. */
2699 operand_entry_t oe
2700 = (operand_entry_t) pool_alloc (operand_entry_pool);
2702 oe->op = NULL;
2703 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2704 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2705 /* oe->op = NULL signs that there is no SSA_NAME
2706 for the range test, and oe->id instead is the
2707 basic block number, at which's end the GIMPLE_COND
2708 is. */
2709 oe->id = bb->index;
2710 oe->count = 1;
2711 ops.safe_push (oe);
2713 if (bb == first_bb)
2714 break;
2716 if (ops.length () > 1)
2717 optimize_range_tests (ERROR_MARK, &ops);
2718 ops.release ();
2721 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2722 of STMT in it's operands. This is also known as a "destructive
2723 update" operation. */
2725 static bool
2726 is_phi_for_stmt (gimple stmt, tree operand)
2728 gimple def_stmt;
2729 tree lhs;
2730 use_operand_p arg_p;
2731 ssa_op_iter i;
2733 if (TREE_CODE (operand) != SSA_NAME)
2734 return false;
2736 lhs = gimple_assign_lhs (stmt);
2738 def_stmt = SSA_NAME_DEF_STMT (operand);
2739 if (gimple_code (def_stmt) != GIMPLE_PHI)
2740 return false;
2742 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2743 if (lhs == USE_FROM_PTR (arg_p))
2744 return true;
2745 return false;
2748 /* Remove def stmt of VAR if VAR has zero uses and recurse
2749 on rhs1 operand if so. */
2751 static void
2752 remove_visited_stmt_chain (tree var)
2754 gimple stmt;
2755 gimple_stmt_iterator gsi;
2757 while (1)
2759 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2760 return;
2761 stmt = SSA_NAME_DEF_STMT (var);
2762 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2764 var = gimple_assign_rhs1 (stmt);
2765 gsi = gsi_for_stmt (stmt);
2766 gsi_remove (&gsi, true);
2767 release_defs (stmt);
2769 else
2770 return;
2774 /* This function checks three consequtive operands in
2775 passed operands vector OPS starting from OPINDEX and
2776 swaps two operands if it is profitable for binary operation
2777 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2779 We pair ops with the same rank if possible.
2781 The alternative we try is to see if STMT is a destructive
2782 update style statement, which is like:
2783 b = phi (a, ...)
2784 a = c + b;
2785 In that case, we want to use the destructive update form to
2786 expose the possible vectorizer sum reduction opportunity.
2787 In that case, the third operand will be the phi node. This
2788 check is not performed if STMT is null.
2790 We could, of course, try to be better as noted above, and do a
2791 lot of work to try to find these opportunities in >3 operand
2792 cases, but it is unlikely to be worth it. */
2794 static void
2795 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2796 unsigned int opindex, gimple stmt)
2798 operand_entry_t oe1, oe2, oe3;
2800 oe1 = ops[opindex];
2801 oe2 = ops[opindex + 1];
2802 oe3 = ops[opindex + 2];
2804 if ((oe1->rank == oe2->rank
2805 && oe2->rank != oe3->rank)
2806 || (stmt && is_phi_for_stmt (stmt, oe3->op)
2807 && !is_phi_for_stmt (stmt, oe1->op)
2808 && !is_phi_for_stmt (stmt, oe2->op)))
2810 struct operand_entry temp = *oe3;
2811 oe3->op = oe1->op;
2812 oe3->rank = oe1->rank;
2813 oe1->op = temp.op;
2814 oe1->rank= temp.rank;
2816 else if ((oe1->rank == oe3->rank
2817 && oe2->rank != oe3->rank)
2818 || (stmt && is_phi_for_stmt (stmt, oe2->op)
2819 && !is_phi_for_stmt (stmt, oe1->op)
2820 && !is_phi_for_stmt (stmt, oe3->op)))
2822 struct operand_entry temp = *oe2;
2823 oe2->op = oe1->op;
2824 oe2->rank = oe1->rank;
2825 oe1->op = temp.op;
2826 oe1->rank= temp.rank;
2830 /* Recursively rewrite our linearized statements so that the operators
2831 match those in OPS[OPINDEX], putting the computation in rank
2832 order. */
2834 static void
2835 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2836 vec<operand_entry_t> ops, bool moved)
2838 tree rhs1 = gimple_assign_rhs1 (stmt);
2839 tree rhs2 = gimple_assign_rhs2 (stmt);
2840 operand_entry_t oe;
2842 /* If we have three operands left, then we want to make sure the ones
2843 that get the double binary op are chosen wisely. */
2844 if (opindex + 3 == ops.length ())
2845 swap_ops_for_binary_stmt (ops, opindex, stmt);
2847 /* The final recursion case for this function is that you have
2848 exactly two operations left.
2849 If we had one exactly one op in the entire list to start with, we
2850 would have never called this function, and the tail recursion
2851 rewrites them one at a time. */
2852 if (opindex + 2 == ops.length ())
2854 operand_entry_t oe1, oe2;
2856 oe1 = ops[opindex];
2857 oe2 = ops[opindex + 1];
2859 if (rhs1 != oe1->op || rhs2 != oe2->op)
2861 if (dump_file && (dump_flags & TDF_DETAILS))
2863 fprintf (dump_file, "Transforming ");
2864 print_gimple_stmt (dump_file, stmt, 0, 0);
2867 gimple_assign_set_rhs1 (stmt, oe1->op);
2868 gimple_assign_set_rhs2 (stmt, oe2->op);
2869 update_stmt (stmt);
2870 if (rhs1 != oe1->op && rhs1 != oe2->op)
2871 remove_visited_stmt_chain (rhs1);
2873 if (dump_file && (dump_flags & TDF_DETAILS))
2875 fprintf (dump_file, " into ");
2876 print_gimple_stmt (dump_file, stmt, 0, 0);
2879 return;
2882 /* If we hit here, we should have 3 or more ops left. */
2883 gcc_assert (opindex + 2 < ops.length ());
2885 /* Rewrite the next operator. */
2886 oe = ops[opindex];
2888 if (oe->op != rhs2)
2890 if (!moved)
2892 gimple_stmt_iterator gsinow, gsirhs1;
2893 gimple stmt1 = stmt, stmt2;
2894 unsigned int count;
2896 gsinow = gsi_for_stmt (stmt);
2897 count = ops.length () - opindex - 2;
2898 while (count-- != 0)
2900 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2901 gsirhs1 = gsi_for_stmt (stmt2);
2902 gsi_move_before (&gsirhs1, &gsinow);
2903 gsi_prev (&gsinow);
2904 stmt1 = stmt2;
2906 moved = true;
2909 if (dump_file && (dump_flags & TDF_DETAILS))
2911 fprintf (dump_file, "Transforming ");
2912 print_gimple_stmt (dump_file, stmt, 0, 0);
2915 gimple_assign_set_rhs2 (stmt, oe->op);
2916 update_stmt (stmt);
2918 if (dump_file && (dump_flags & TDF_DETAILS))
2920 fprintf (dump_file, " into ");
2921 print_gimple_stmt (dump_file, stmt, 0, 0);
2924 /* Recurse on the LHS of the binary operator, which is guaranteed to
2925 be the non-leaf side. */
2926 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2929 /* Find out how many cycles we need to compute statements chain.
2930 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
2931 maximum number of independent statements we may execute per cycle. */
2933 static int
2934 get_required_cycles (int ops_num, int cpu_width)
2936 int res;
2937 int elog;
2938 unsigned int rest;
2940 /* While we have more than 2 * cpu_width operands
2941 we may reduce number of operands by cpu_width
2942 per cycle. */
2943 res = ops_num / (2 * cpu_width);
2945 /* Remained operands count may be reduced twice per cycle
2946 until we have only one operand. */
2947 rest = (unsigned)(ops_num - res * cpu_width);
2948 elog = exact_log2 (rest);
2949 if (elog >= 0)
2950 res += elog;
2951 else
2952 res += floor_log2 (rest) + 1;
2954 return res;
2957 /* Returns an optimal number of registers to use for computation of
2958 given statements. */
2960 static int
2961 get_reassociation_width (int ops_num, enum tree_code opc,
2962 enum machine_mode mode)
2964 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2965 int width;
2966 int width_min;
2967 int cycles_best;
2969 if (param_width > 0)
2970 width = param_width;
2971 else
2972 width = targetm.sched.reassociation_width (opc, mode);
2974 if (width == 1)
2975 return width;
2977 /* Get the minimal time required for sequence computation. */
2978 cycles_best = get_required_cycles (ops_num, width);
2980 /* Check if we may use less width and still compute sequence for
2981 the same time. It will allow us to reduce registers usage.
2982 get_required_cycles is monotonically increasing with lower width
2983 so we can perform a binary search for the minimal width that still
2984 results in the optimal cycle count. */
2985 width_min = 1;
2986 while (width > width_min)
2988 int width_mid = (width + width_min) / 2;
2990 if (get_required_cycles (ops_num, width_mid) == cycles_best)
2991 width = width_mid;
2992 else if (width_min < width_mid)
2993 width_min = width_mid;
2994 else
2995 break;
2998 return width;
3001 /* Recursively rewrite our linearized statements so that the operators
3002 match those in OPS[OPINDEX], putting the computation in rank
3003 order and trying to allow operations to be executed in
3004 parallel. */
3006 static void
3007 rewrite_expr_tree_parallel (gimple stmt, int width,
3008 vec<operand_entry_t> ops)
3010 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3011 int op_num = ops.length ();
3012 int stmt_num = op_num - 1;
3013 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3014 int op_index = op_num - 1;
3015 int stmt_index = 0;
3016 int ready_stmts_end = 0;
3017 int i = 0;
3018 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3020 /* We start expression rewriting from the top statements.
3021 So, in this loop we create a full list of statements
3022 we will work with. */
3023 stmts[stmt_num - 1] = stmt;
3024 for (i = stmt_num - 2; i >= 0; i--)
3025 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3027 for (i = 0; i < stmt_num; i++)
3029 tree op1, op2;
3031 /* Determine whether we should use results of
3032 already handled statements or not. */
3033 if (ready_stmts_end == 0
3034 && (i - stmt_index >= width || op_index < 1))
3035 ready_stmts_end = i;
3037 /* Now we choose operands for the next statement. Non zero
3038 value in ready_stmts_end means here that we should use
3039 the result of already generated statements as new operand. */
3040 if (ready_stmts_end > 0)
3042 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3043 if (ready_stmts_end > stmt_index)
3044 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3045 else if (op_index >= 0)
3046 op2 = ops[op_index--]->op;
3047 else
3049 gcc_assert (stmt_index < i);
3050 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3053 if (stmt_index >= ready_stmts_end)
3054 ready_stmts_end = 0;
3056 else
3058 if (op_index > 1)
3059 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3060 op2 = ops[op_index--]->op;
3061 op1 = ops[op_index--]->op;
3064 /* If we emit the last statement then we should put
3065 operands into the last statement. It will also
3066 break the loop. */
3067 if (op_index < 0 && stmt_index == i)
3068 i = stmt_num - 1;
3070 if (dump_file && (dump_flags & TDF_DETAILS))
3072 fprintf (dump_file, "Transforming ");
3073 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3076 /* We keep original statement only for the last one. All
3077 others are recreated. */
3078 if (i == stmt_num - 1)
3080 gimple_assign_set_rhs1 (stmts[i], op1);
3081 gimple_assign_set_rhs2 (stmts[i], op2);
3082 update_stmt (stmts[i]);
3084 else
3085 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3087 if (dump_file && (dump_flags & TDF_DETAILS))
3089 fprintf (dump_file, " into ");
3090 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3094 remove_visited_stmt_chain (last_rhs1);
3097 /* Transform STMT, which is really (A +B) + (C + D) into the left
3098 linear form, ((A+B)+C)+D.
3099 Recurse on D if necessary. */
3101 static void
3102 linearize_expr (gimple stmt)
3104 gimple_stmt_iterator gsinow, gsirhs;
3105 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3106 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3107 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3108 gimple newbinrhs = NULL;
3109 struct loop *loop = loop_containing_stmt (stmt);
3111 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3112 && is_reassociable_op (binrhs, rhscode, loop));
3114 gsinow = gsi_for_stmt (stmt);
3115 gsirhs = gsi_for_stmt (binrhs);
3116 gsi_move_before (&gsirhs, &gsinow);
3118 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3119 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3120 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3122 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3123 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3125 if (dump_file && (dump_flags & TDF_DETAILS))
3127 fprintf (dump_file, "Linearized: ");
3128 print_gimple_stmt (dump_file, stmt, 0, 0);
3131 reassociate_stats.linearized++;
3132 update_stmt (binrhs);
3133 update_stmt (binlhs);
3134 update_stmt (stmt);
3136 gimple_set_visited (stmt, true);
3137 gimple_set_visited (binlhs, true);
3138 gimple_set_visited (binrhs, true);
3140 /* Tail recurse on the new rhs if it still needs reassociation. */
3141 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3142 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3143 want to change the algorithm while converting to tuples. */
3144 linearize_expr (stmt);
3147 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3148 it. Otherwise, return NULL. */
3150 static gimple
3151 get_single_immediate_use (tree lhs)
3153 use_operand_p immuse;
3154 gimple immusestmt;
3156 if (TREE_CODE (lhs) == SSA_NAME
3157 && single_imm_use (lhs, &immuse, &immusestmt)
3158 && is_gimple_assign (immusestmt))
3159 return immusestmt;
3161 return NULL;
3164 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3165 representing the negated value. Insertions of any necessary
3166 instructions go before GSI.
3167 This function is recursive in that, if you hand it "a_5" as the
3168 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3169 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3171 static tree
3172 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3174 gimple negatedefstmt= NULL;
3175 tree resultofnegate;
3177 /* If we are trying to negate a name, defined by an add, negate the
3178 add operands instead. */
3179 if (TREE_CODE (tonegate) == SSA_NAME)
3180 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3181 if (TREE_CODE (tonegate) == SSA_NAME
3182 && is_gimple_assign (negatedefstmt)
3183 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3184 && has_single_use (gimple_assign_lhs (negatedefstmt))
3185 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3187 gimple_stmt_iterator gsi;
3188 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3189 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3191 gsi = gsi_for_stmt (negatedefstmt);
3192 rhs1 = negate_value (rhs1, &gsi);
3193 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3195 gsi = gsi_for_stmt (negatedefstmt);
3196 rhs2 = negate_value (rhs2, &gsi);
3197 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3199 update_stmt (negatedefstmt);
3200 return gimple_assign_lhs (negatedefstmt);
3203 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3204 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3205 NULL_TREE, true, GSI_SAME_STMT);
3206 return resultofnegate;
3209 /* Return true if we should break up the subtract in STMT into an add
3210 with negate. This is true when we the subtract operands are really
3211 adds, or the subtract itself is used in an add expression. In
3212 either case, breaking up the subtract into an add with negate
3213 exposes the adds to reassociation. */
3215 static bool
3216 should_break_up_subtract (gimple stmt)
3218 tree lhs = gimple_assign_lhs (stmt);
3219 tree binlhs = gimple_assign_rhs1 (stmt);
3220 tree binrhs = gimple_assign_rhs2 (stmt);
3221 gimple immusestmt;
3222 struct loop *loop = loop_containing_stmt (stmt);
3224 if (TREE_CODE (binlhs) == SSA_NAME
3225 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3226 return true;
3228 if (TREE_CODE (binrhs) == SSA_NAME
3229 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3230 return true;
3232 if (TREE_CODE (lhs) == SSA_NAME
3233 && (immusestmt = get_single_immediate_use (lhs))
3234 && is_gimple_assign (immusestmt)
3235 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3236 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3237 return true;
3238 return false;
3241 /* Transform STMT from A - B into A + -B. */
3243 static void
3244 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3246 tree rhs1 = gimple_assign_rhs1 (stmt);
3247 tree rhs2 = gimple_assign_rhs2 (stmt);
3249 if (dump_file && (dump_flags & TDF_DETAILS))
3251 fprintf (dump_file, "Breaking up subtract ");
3252 print_gimple_stmt (dump_file, stmt, 0, 0);
3255 rhs2 = negate_value (rhs2, gsip);
3256 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3257 update_stmt (stmt);
3260 /* Determine whether STMT is a builtin call that raises an SSA name
3261 to an integer power and has only one use. If so, and this is early
3262 reassociation and unsafe math optimizations are permitted, place
3263 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3264 If any of these conditions does not hold, return FALSE. */
3266 static bool
3267 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3269 tree fndecl, arg1;
3270 REAL_VALUE_TYPE c, cint;
3272 if (!first_pass_instance
3273 || !flag_unsafe_math_optimizations
3274 || !is_gimple_call (stmt)
3275 || !has_single_use (gimple_call_lhs (stmt)))
3276 return false;
3278 fndecl = gimple_call_fndecl (stmt);
3280 if (!fndecl
3281 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3282 return false;
3284 switch (DECL_FUNCTION_CODE (fndecl))
3286 CASE_FLT_FN (BUILT_IN_POW):
3287 *base = gimple_call_arg (stmt, 0);
3288 arg1 = gimple_call_arg (stmt, 1);
3290 if (TREE_CODE (arg1) != REAL_CST)
3291 return false;
3293 c = TREE_REAL_CST (arg1);
3295 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3296 return false;
3298 *exponent = real_to_integer (&c);
3299 real_from_integer (&cint, VOIDmode, *exponent,
3300 *exponent < 0 ? -1 : 0, 0);
3301 if (!real_identical (&c, &cint))
3302 return false;
3304 break;
3306 CASE_FLT_FN (BUILT_IN_POWI):
3307 *base = gimple_call_arg (stmt, 0);
3308 arg1 = gimple_call_arg (stmt, 1);
3310 if (!host_integerp (arg1, 0))
3311 return false;
3313 *exponent = TREE_INT_CST_LOW (arg1);
3314 break;
3316 default:
3317 return false;
3320 /* Expanding negative exponents is generally unproductive, so we don't
3321 complicate matters with those. Exponents of zero and one should
3322 have been handled by expression folding. */
3323 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3324 return false;
3326 return true;
3329 /* Recursively linearize a binary expression that is the RHS of STMT.
3330 Place the operands of the expression tree in the vector named OPS. */
3332 static void
3333 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3334 bool is_associative, bool set_visited)
3336 tree binlhs = gimple_assign_rhs1 (stmt);
3337 tree binrhs = gimple_assign_rhs2 (stmt);
3338 gimple binlhsdef = NULL, binrhsdef = NULL;
3339 bool binlhsisreassoc = false;
3340 bool binrhsisreassoc = false;
3341 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3342 struct loop *loop = loop_containing_stmt (stmt);
3343 tree base = NULL_TREE;
3344 HOST_WIDE_INT exponent = 0;
3346 if (set_visited)
3347 gimple_set_visited (stmt, true);
3349 if (TREE_CODE (binlhs) == SSA_NAME)
3351 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3352 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3353 && !stmt_could_throw_p (binlhsdef));
3356 if (TREE_CODE (binrhs) == SSA_NAME)
3358 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3359 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3360 && !stmt_could_throw_p (binrhsdef));
3363 /* If the LHS is not reassociable, but the RHS is, we need to swap
3364 them. If neither is reassociable, there is nothing we can do, so
3365 just put them in the ops vector. If the LHS is reassociable,
3366 linearize it. If both are reassociable, then linearize the RHS
3367 and the LHS. */
3369 if (!binlhsisreassoc)
3371 tree temp;
3373 /* If this is not a associative operation like division, give up. */
3374 if (!is_associative)
3376 add_to_ops_vec (ops, binrhs);
3377 return;
3380 if (!binrhsisreassoc)
3382 if (rhscode == MULT_EXPR
3383 && TREE_CODE (binrhs) == SSA_NAME
3384 && acceptable_pow_call (binrhsdef, &base, &exponent))
3386 add_repeat_to_ops_vec (ops, base, exponent);
3387 gimple_set_visited (binrhsdef, true);
3389 else
3390 add_to_ops_vec (ops, binrhs);
3392 if (rhscode == MULT_EXPR
3393 && TREE_CODE (binlhs) == SSA_NAME
3394 && acceptable_pow_call (binlhsdef, &base, &exponent))
3396 add_repeat_to_ops_vec (ops, base, exponent);
3397 gimple_set_visited (binlhsdef, true);
3399 else
3400 add_to_ops_vec (ops, binlhs);
3402 return;
3405 if (dump_file && (dump_flags & TDF_DETAILS))
3407 fprintf (dump_file, "swapping operands of ");
3408 print_gimple_stmt (dump_file, stmt, 0, 0);
3411 swap_tree_operands (stmt,
3412 gimple_assign_rhs1_ptr (stmt),
3413 gimple_assign_rhs2_ptr (stmt));
3414 update_stmt (stmt);
3416 if (dump_file && (dump_flags & TDF_DETAILS))
3418 fprintf (dump_file, " is now ");
3419 print_gimple_stmt (dump_file, stmt, 0, 0);
3422 /* We want to make it so the lhs is always the reassociative op,
3423 so swap. */
3424 temp = binlhs;
3425 binlhs = binrhs;
3426 binrhs = temp;
3428 else if (binrhsisreassoc)
3430 linearize_expr (stmt);
3431 binlhs = gimple_assign_rhs1 (stmt);
3432 binrhs = gimple_assign_rhs2 (stmt);
3435 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3436 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3437 rhscode, loop));
3438 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3439 is_associative, set_visited);
3441 if (rhscode == MULT_EXPR
3442 && TREE_CODE (binrhs) == SSA_NAME
3443 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3445 add_repeat_to_ops_vec (ops, base, exponent);
3446 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3448 else
3449 add_to_ops_vec (ops, binrhs);
3452 /* Repropagate the negates back into subtracts, since no other pass
3453 currently does it. */
3455 static void
3456 repropagate_negates (void)
3458 unsigned int i = 0;
3459 tree negate;
3461 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3463 gimple user = get_single_immediate_use (negate);
3465 if (!user || !is_gimple_assign (user))
3466 continue;
3468 /* The negate operand can be either operand of a PLUS_EXPR
3469 (it can be the LHS if the RHS is a constant for example).
3471 Force the negate operand to the RHS of the PLUS_EXPR, then
3472 transform the PLUS_EXPR into a MINUS_EXPR. */
3473 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3475 /* If the negated operand appears on the LHS of the
3476 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3477 to force the negated operand to the RHS of the PLUS_EXPR. */
3478 if (gimple_assign_rhs1 (user) == negate)
3480 swap_tree_operands (user,
3481 gimple_assign_rhs1_ptr (user),
3482 gimple_assign_rhs2_ptr (user));
3485 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3486 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3487 if (gimple_assign_rhs2 (user) == negate)
3489 tree rhs1 = gimple_assign_rhs1 (user);
3490 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3491 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3492 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3493 update_stmt (user);
3496 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3498 if (gimple_assign_rhs1 (user) == negate)
3500 /* We have
3501 x = -a
3502 y = x - b
3503 which we transform into
3504 x = a + b
3505 y = -x .
3506 This pushes down the negate which we possibly can merge
3507 into some other operation, hence insert it into the
3508 plus_negates vector. */
3509 gimple feed = SSA_NAME_DEF_STMT (negate);
3510 tree a = gimple_assign_rhs1 (feed);
3511 tree rhs2 = gimple_assign_rhs2 (user);
3512 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3513 gimple_replace_lhs (feed, negate);
3514 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3515 update_stmt (gsi_stmt (gsi));
3516 gsi2 = gsi_for_stmt (user);
3517 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3518 update_stmt (gsi_stmt (gsi2));
3519 gsi_move_before (&gsi, &gsi2);
3520 plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3522 else
3524 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3525 rid of one operation. */
3526 gimple feed = SSA_NAME_DEF_STMT (negate);
3527 tree a = gimple_assign_rhs1 (feed);
3528 tree rhs1 = gimple_assign_rhs1 (user);
3529 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3530 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3531 update_stmt (gsi_stmt (gsi));
3537 /* Returns true if OP is of a type for which we can do reassociation.
3538 That is for integral or non-saturating fixed-point types, and for
3539 floating point type when associative-math is enabled. */
3541 static bool
3542 can_reassociate_p (tree op)
3544 tree type = TREE_TYPE (op);
3545 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3546 || NON_SAT_FIXED_POINT_TYPE_P (type)
3547 || (flag_associative_math && FLOAT_TYPE_P (type)))
3548 return true;
3549 return false;
3552 /* Break up subtract operations in block BB.
3554 We do this top down because we don't know whether the subtract is
3555 part of a possible chain of reassociation except at the top.
3557 IE given
3558 d = f + g
3559 c = a + e
3560 b = c - d
3561 q = b - r
3562 k = t - q
3564 we want to break up k = t - q, but we won't until we've transformed q
3565 = b - r, which won't be broken up until we transform b = c - d.
3567 En passant, clear the GIMPLE visited flag on every statement. */
3569 static void
3570 break_up_subtract_bb (basic_block bb)
3572 gimple_stmt_iterator gsi;
3573 basic_block son;
3575 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3577 gimple stmt = gsi_stmt (gsi);
3578 gimple_set_visited (stmt, false);
3580 if (!is_gimple_assign (stmt)
3581 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3582 continue;
3584 /* Look for simple gimple subtract operations. */
3585 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3587 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3588 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3589 continue;
3591 /* Check for a subtract used only in an addition. If this
3592 is the case, transform it into add of a negate for better
3593 reassociation. IE transform C = A-B into C = A + -B if C
3594 is only used in an addition. */
3595 if (should_break_up_subtract (stmt))
3596 break_up_subtract (stmt, &gsi);
3598 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3599 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3600 plus_negates.safe_push (gimple_assign_lhs (stmt));
3602 for (son = first_dom_son (CDI_DOMINATORS, bb);
3603 son;
3604 son = next_dom_son (CDI_DOMINATORS, son))
3605 break_up_subtract_bb (son);
3608 /* Used for repeated factor analysis. */
3609 struct repeat_factor_d
3611 /* An SSA name that occurs in a multiply chain. */
3612 tree factor;
3614 /* Cached rank of the factor. */
3615 unsigned rank;
3617 /* Number of occurrences of the factor in the chain. */
3618 HOST_WIDE_INT count;
3620 /* An SSA name representing the product of this factor and
3621 all factors appearing later in the repeated factor vector. */
3622 tree repr;
3625 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3626 typedef const struct repeat_factor_d *const_repeat_factor_t;
3629 static vec<repeat_factor> repeat_factor_vec;
3631 /* Used for sorting the repeat factor vector. Sort primarily by
3632 ascending occurrence count, secondarily by descending rank. */
3634 static int
3635 compare_repeat_factors (const void *x1, const void *x2)
3637 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3638 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3640 if (rf1->count != rf2->count)
3641 return rf1->count - rf2->count;
3643 return rf2->rank - rf1->rank;
3646 /* Look for repeated operands in OPS in the multiply tree rooted at
3647 STMT. Replace them with an optimal sequence of multiplies and powi
3648 builtin calls, and remove the used operands from OPS. Return an
3649 SSA name representing the value of the replacement sequence. */
3651 static tree
3652 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3654 unsigned i, j, vec_len;
3655 int ii;
3656 operand_entry_t oe;
3657 repeat_factor_t rf1, rf2;
3658 repeat_factor rfnew;
3659 tree result = NULL_TREE;
3660 tree target_ssa, iter_result;
3661 tree type = TREE_TYPE (gimple_get_lhs (stmt));
3662 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3663 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3664 gimple mul_stmt, pow_stmt;
3666 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3667 target. */
3668 if (!powi_fndecl)
3669 return NULL_TREE;
3671 /* Allocate the repeated factor vector. */
3672 repeat_factor_vec.create (10);
3674 /* Scan the OPS vector for all SSA names in the product and build
3675 up a vector of occurrence counts for each factor. */
3676 FOR_EACH_VEC_ELT (*ops, i, oe)
3678 if (TREE_CODE (oe->op) == SSA_NAME)
3680 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3682 if (rf1->factor == oe->op)
3684 rf1->count += oe->count;
3685 break;
3689 if (j >= repeat_factor_vec.length ())
3691 rfnew.factor = oe->op;
3692 rfnew.rank = oe->rank;
3693 rfnew.count = oe->count;
3694 rfnew.repr = NULL_TREE;
3695 repeat_factor_vec.safe_push (rfnew);
3700 /* Sort the repeated factor vector by (a) increasing occurrence count,
3701 and (b) decreasing rank. */
3702 repeat_factor_vec.qsort (compare_repeat_factors);
3704 /* It is generally best to combine as many base factors as possible
3705 into a product before applying __builtin_powi to the result.
3706 However, the sort order chosen for the repeated factor vector
3707 allows us to cache partial results for the product of the base
3708 factors for subsequent use. When we already have a cached partial
3709 result from a previous iteration, it is best to make use of it
3710 before looking for another __builtin_pow opportunity.
3712 As an example, consider x * x * y * y * y * z * z * z * z.
3713 We want to first compose the product x * y * z, raise it to the
3714 second power, then multiply this by y * z, and finally multiply
3715 by z. This can be done in 5 multiplies provided we cache y * z
3716 for use in both expressions:
3718 t1 = y * z
3719 t2 = t1 * x
3720 t3 = t2 * t2
3721 t4 = t1 * t3
3722 result = t4 * z
3724 If we instead ignored the cached y * z and first multiplied by
3725 the __builtin_pow opportunity z * z, we would get the inferior:
3727 t1 = y * z
3728 t2 = t1 * x
3729 t3 = t2 * t2
3730 t4 = z * z
3731 t5 = t3 * t4
3732 result = t5 * y */
3734 vec_len = repeat_factor_vec.length ();
3736 /* Repeatedly look for opportunities to create a builtin_powi call. */
3737 while (true)
3739 HOST_WIDE_INT power;
3741 /* First look for the largest cached product of factors from
3742 preceding iterations. If found, create a builtin_powi for
3743 it if the minimum occurrence count for its factors is at
3744 least 2, or just use this cached product as our next
3745 multiplicand if the minimum occurrence count is 1. */
3746 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3748 if (rf1->repr && rf1->count > 0)
3749 break;
3752 if (j < vec_len)
3754 power = rf1->count;
3756 if (power == 1)
3758 iter_result = rf1->repr;
3760 if (dump_file && (dump_flags & TDF_DETAILS))
3762 unsigned elt;
3763 repeat_factor_t rf;
3764 fputs ("Multiplying by cached product ", dump_file);
3765 for (elt = j; elt < vec_len; elt++)
3767 rf = &repeat_factor_vec[elt];
3768 print_generic_expr (dump_file, rf->factor, 0);
3769 if (elt < vec_len - 1)
3770 fputs (" * ", dump_file);
3772 fputs ("\n", dump_file);
3775 else
3777 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3778 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3779 build_int_cst (integer_type_node,
3780 power));
3781 gimple_call_set_lhs (pow_stmt, iter_result);
3782 gimple_set_location (pow_stmt, gimple_location (stmt));
3783 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3785 if (dump_file && (dump_flags & TDF_DETAILS))
3787 unsigned elt;
3788 repeat_factor_t rf;
3789 fputs ("Building __builtin_pow call for cached product (",
3790 dump_file);
3791 for (elt = j; elt < vec_len; elt++)
3793 rf = &repeat_factor_vec[elt];
3794 print_generic_expr (dump_file, rf->factor, 0);
3795 if (elt < vec_len - 1)
3796 fputs (" * ", dump_file);
3798 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3799 power);
3803 else
3805 /* Otherwise, find the first factor in the repeated factor
3806 vector whose occurrence count is at least 2. If no such
3807 factor exists, there are no builtin_powi opportunities
3808 remaining. */
3809 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3811 if (rf1->count >= 2)
3812 break;
3815 if (j >= vec_len)
3816 break;
3818 power = rf1->count;
3820 if (dump_file && (dump_flags & TDF_DETAILS))
3822 unsigned elt;
3823 repeat_factor_t rf;
3824 fputs ("Building __builtin_pow call for (", dump_file);
3825 for (elt = j; elt < vec_len; elt++)
3827 rf = &repeat_factor_vec[elt];
3828 print_generic_expr (dump_file, rf->factor, 0);
3829 if (elt < vec_len - 1)
3830 fputs (" * ", dump_file);
3832 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3835 reassociate_stats.pows_created++;
3837 /* Visit each element of the vector in reverse order (so that
3838 high-occurrence elements are visited first, and within the
3839 same occurrence count, lower-ranked elements are visited
3840 first). Form a linear product of all elements in this order
3841 whose occurrencce count is at least that of element J.
3842 Record the SSA name representing the product of each element
3843 with all subsequent elements in the vector. */
3844 if (j == vec_len - 1)
3845 rf1->repr = rf1->factor;
3846 else
3848 for (ii = vec_len - 2; ii >= (int)j; ii--)
3850 tree op1, op2;
3852 rf1 = &repeat_factor_vec[ii];
3853 rf2 = &repeat_factor_vec[ii + 1];
3855 /* Init the last factor's representative to be itself. */
3856 if (!rf2->repr)
3857 rf2->repr = rf2->factor;
3859 op1 = rf1->factor;
3860 op2 = rf2->repr;
3862 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
3863 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3864 target_ssa,
3865 op1, op2);
3866 gimple_set_location (mul_stmt, gimple_location (stmt));
3867 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3868 rf1->repr = target_ssa;
3870 /* Don't reprocess the multiply we just introduced. */
3871 gimple_set_visited (mul_stmt, true);
3875 /* Form a call to __builtin_powi for the maximum product
3876 just formed, raised to the power obtained earlier. */
3877 rf1 = &repeat_factor_vec[j];
3878 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3879 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3880 build_int_cst (integer_type_node,
3881 power));
3882 gimple_call_set_lhs (pow_stmt, iter_result);
3883 gimple_set_location (pow_stmt, gimple_location (stmt));
3884 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3887 /* If we previously formed at least one other builtin_powi call,
3888 form the product of this one and those others. */
3889 if (result)
3891 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
3892 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3893 result, iter_result);
3894 gimple_set_location (mul_stmt, gimple_location (stmt));
3895 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3896 gimple_set_visited (mul_stmt, true);
3897 result = new_result;
3899 else
3900 result = iter_result;
3902 /* Decrement the occurrence count of each element in the product
3903 by the count found above, and remove this many copies of each
3904 factor from OPS. */
3905 for (i = j; i < vec_len; i++)
3907 unsigned k = power;
3908 unsigned n;
3910 rf1 = &repeat_factor_vec[i];
3911 rf1->count -= power;
3913 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
3915 if (oe->op == rf1->factor)
3917 if (oe->count <= k)
3919 ops->ordered_remove (n);
3920 k -= oe->count;
3922 if (k == 0)
3923 break;
3925 else
3927 oe->count -= k;
3928 break;
3935 /* At this point all elements in the repeated factor vector have a
3936 remaining occurrence count of 0 or 1, and those with a count of 1
3937 don't have cached representatives. Re-sort the ops vector and
3938 clean up. */
3939 ops->qsort (sort_by_operand_rank);
3940 repeat_factor_vec.release ();
3942 /* Return the final product computed herein. Note that there may
3943 still be some elements with single occurrence count left in OPS;
3944 those will be handled by the normal reassociation logic. */
3945 return result;
3948 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
3950 static void
3951 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3953 tree rhs1;
3955 if (dump_file && (dump_flags & TDF_DETAILS))
3957 fprintf (dump_file, "Transforming ");
3958 print_gimple_stmt (dump_file, stmt, 0, 0);
3961 rhs1 = gimple_assign_rhs1 (stmt);
3962 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3963 update_stmt (stmt);
3964 remove_visited_stmt_chain (rhs1);
3966 if (dump_file && (dump_flags & TDF_DETAILS))
3968 fprintf (dump_file, " into ");
3969 print_gimple_stmt (dump_file, stmt, 0, 0);
3973 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
3975 static void
3976 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3977 tree rhs1, tree rhs2)
3979 if (dump_file && (dump_flags & TDF_DETAILS))
3981 fprintf (dump_file, "Transforming ");
3982 print_gimple_stmt (dump_file, stmt, 0, 0);
3985 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
3986 update_stmt (gsi_stmt (*gsi));
3987 remove_visited_stmt_chain (rhs1);
3989 if (dump_file && (dump_flags & TDF_DETAILS))
3991 fprintf (dump_file, " into ");
3992 print_gimple_stmt (dump_file, stmt, 0, 0);
3996 /* Reassociate expressions in basic block BB and its post-dominator as
3997 children. */
3999 static void
4000 reassociate_bb (basic_block bb)
4002 gimple_stmt_iterator gsi;
4003 basic_block son;
4004 gimple stmt = last_stmt (bb);
4006 if (stmt && !gimple_visited_p (stmt))
4007 maybe_optimize_range_tests (stmt);
4009 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4011 stmt = gsi_stmt (gsi);
4013 if (is_gimple_assign (stmt)
4014 && !stmt_could_throw_p (stmt))
4016 tree lhs, rhs1, rhs2;
4017 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4019 /* If this is not a gimple binary expression, there is
4020 nothing for us to do with it. */
4021 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4022 continue;
4024 /* If this was part of an already processed statement,
4025 we don't need to touch it again. */
4026 if (gimple_visited_p (stmt))
4028 /* This statement might have become dead because of previous
4029 reassociations. */
4030 if (has_zero_uses (gimple_get_lhs (stmt)))
4032 gsi_remove (&gsi, true);
4033 release_defs (stmt);
4034 /* We might end up removing the last stmt above which
4035 places the iterator to the end of the sequence.
4036 Reset it to the last stmt in this case which might
4037 be the end of the sequence as well if we removed
4038 the last statement of the sequence. In which case
4039 we need to bail out. */
4040 if (gsi_end_p (gsi))
4042 gsi = gsi_last_bb (bb);
4043 if (gsi_end_p (gsi))
4044 break;
4047 continue;
4050 lhs = gimple_assign_lhs (stmt);
4051 rhs1 = gimple_assign_rhs1 (stmt);
4052 rhs2 = gimple_assign_rhs2 (stmt);
4054 /* For non-bit or min/max operations we can't associate
4055 all types. Verify that here. */
4056 if (rhs_code != BIT_IOR_EXPR
4057 && rhs_code != BIT_AND_EXPR
4058 && rhs_code != BIT_XOR_EXPR
4059 && rhs_code != MIN_EXPR
4060 && rhs_code != MAX_EXPR
4061 && (!can_reassociate_p (lhs)
4062 || !can_reassociate_p (rhs1)
4063 || !can_reassociate_p (rhs2)))
4064 continue;
4066 if (associative_tree_code (rhs_code))
4068 vec<operand_entry_t> ops = vNULL;
4069 tree powi_result = NULL_TREE;
4071 /* There may be no immediate uses left by the time we
4072 get here because we may have eliminated them all. */
4073 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4074 continue;
4076 gimple_set_visited (stmt, true);
4077 linearize_expr_tree (&ops, stmt, true, true);
4078 ops.qsort (sort_by_operand_rank);
4079 optimize_ops_list (rhs_code, &ops);
4080 if (undistribute_ops_list (rhs_code, &ops,
4081 loop_containing_stmt (stmt)))
4083 ops.qsort (sort_by_operand_rank);
4084 optimize_ops_list (rhs_code, &ops);
4087 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4088 optimize_range_tests (rhs_code, &ops);
4090 if (first_pass_instance
4091 && rhs_code == MULT_EXPR
4092 && flag_unsafe_math_optimizations)
4093 powi_result = attempt_builtin_powi (stmt, &ops);
4095 /* If the operand vector is now empty, all operands were
4096 consumed by the __builtin_powi optimization. */
4097 if (ops.length () == 0)
4098 transform_stmt_to_copy (&gsi, stmt, powi_result);
4099 else if (ops.length () == 1)
4101 tree last_op = ops.last ()->op;
4103 if (powi_result)
4104 transform_stmt_to_multiply (&gsi, stmt, last_op,
4105 powi_result);
4106 else
4107 transform_stmt_to_copy (&gsi, stmt, last_op);
4109 else
4111 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4112 int ops_num = ops.length ();
4113 int width = get_reassociation_width (ops_num, rhs_code, mode);
4115 if (dump_file && (dump_flags & TDF_DETAILS))
4116 fprintf (dump_file,
4117 "Width = %d was chosen for reassociation\n", width);
4119 if (width > 1
4120 && ops.length () > 3)
4121 rewrite_expr_tree_parallel (stmt, width, ops);
4122 else
4123 rewrite_expr_tree (stmt, 0, ops, false);
4125 /* If we combined some repeated factors into a
4126 __builtin_powi call, multiply that result by the
4127 reassociated operands. */
4128 if (powi_result)
4130 gimple mul_stmt;
4131 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4132 tree target_ssa = make_temp_ssa_name (type, NULL,
4133 "reassocpow");
4134 gimple_set_lhs (stmt, target_ssa);
4135 update_stmt (stmt);
4136 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4137 powi_result,
4138 target_ssa);
4139 gimple_set_location (mul_stmt, gimple_location (stmt));
4140 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4144 ops.release ();
4148 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4149 son;
4150 son = next_dom_son (CDI_POST_DOMINATORS, son))
4151 reassociate_bb (son);
4154 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4155 void debug_ops_vector (vec<operand_entry_t> ops);
4157 /* Dump the operand entry vector OPS to FILE. */
4159 void
4160 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4162 operand_entry_t oe;
4163 unsigned int i;
4165 FOR_EACH_VEC_ELT (ops, i, oe)
4167 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4168 print_generic_expr (file, oe->op, 0);
4172 /* Dump the operand entry vector OPS to STDERR. */
4174 DEBUG_FUNCTION void
4175 debug_ops_vector (vec<operand_entry_t> ops)
4177 dump_ops_vector (stderr, ops);
4180 static void
4181 do_reassoc (void)
4183 break_up_subtract_bb (ENTRY_BLOCK_PTR);
4184 reassociate_bb (EXIT_BLOCK_PTR);
4187 /* Initialize the reassociation pass. */
4189 static void
4190 init_reassoc (void)
4192 int i;
4193 long rank = 2;
4194 int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4196 /* Find the loops, so that we can prevent moving calculations in
4197 them. */
4198 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4200 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4202 operand_entry_pool = create_alloc_pool ("operand entry pool",
4203 sizeof (struct operand_entry), 30);
4204 next_operand_entry_id = 0;
4206 /* Reverse RPO (Reverse Post Order) will give us something where
4207 deeper loops come later. */
4208 pre_and_rev_post_order_compute (NULL, bbs, false);
4209 bb_rank = XCNEWVEC (long, last_basic_block);
4210 operand_rank = pointer_map_create ();
4212 /* Give each default definition a distinct rank. This includes
4213 parameters and the static chain. Walk backwards over all
4214 SSA names so that we get proper rank ordering according
4215 to tree_swap_operands_p. */
4216 for (i = num_ssa_names - 1; i > 0; --i)
4218 tree name = ssa_name (i);
4219 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4220 insert_operand_rank (name, ++rank);
4223 /* Set up rank for each BB */
4224 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4225 bb_rank[bbs[i]] = ++rank << 16;
4227 free (bbs);
4228 calculate_dominance_info (CDI_POST_DOMINATORS);
4229 plus_negates = vNULL;
4232 /* Cleanup after the reassociation pass, and print stats if
4233 requested. */
4235 static void
4236 fini_reassoc (void)
4238 statistics_counter_event (cfun, "Linearized",
4239 reassociate_stats.linearized);
4240 statistics_counter_event (cfun, "Constants eliminated",
4241 reassociate_stats.constants_eliminated);
4242 statistics_counter_event (cfun, "Ops eliminated",
4243 reassociate_stats.ops_eliminated);
4244 statistics_counter_event (cfun, "Statements rewritten",
4245 reassociate_stats.rewritten);
4246 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4247 reassociate_stats.pows_encountered);
4248 statistics_counter_event (cfun, "Built-in powi calls created",
4249 reassociate_stats.pows_created);
4251 pointer_map_destroy (operand_rank);
4252 free_alloc_pool (operand_entry_pool);
4253 free (bb_rank);
4254 plus_negates.release ();
4255 free_dominance_info (CDI_POST_DOMINATORS);
4256 loop_optimizer_finalize ();
4259 /* Gate and execute functions for Reassociation. */
4261 static unsigned int
4262 execute_reassoc (void)
4264 init_reassoc ();
4266 do_reassoc ();
4267 repropagate_negates ();
4269 fini_reassoc ();
4270 return 0;
4273 static bool
4274 gate_tree_ssa_reassoc (void)
4276 return flag_tree_reassoc != 0;
4279 struct gimple_opt_pass pass_reassoc =
4282 GIMPLE_PASS,
4283 "reassoc", /* name */
4284 OPTGROUP_NONE, /* optinfo_flags */
4285 gate_tree_ssa_reassoc, /* gate */
4286 execute_reassoc, /* execute */
4287 NULL, /* sub */
4288 NULL, /* next */
4289 0, /* static_pass_number */
4290 TV_TREE_REASSOC, /* tv_id */
4291 PROP_cfg | PROP_ssa, /* properties_required */
4292 0, /* properties_provided */
4293 0, /* properties_destroyed */
4294 0, /* todo_flags_start */
4295 TODO_verify_ssa
4296 | TODO_verify_flow
4297 | TODO_ggc_collect /* todo_flags_finish */