1 /* Optimization of PHI nodes by converting them into straightline code.
2 Copyright (C) 2004-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "tree-flow.h"
30 #include "tree-pass.h"
31 #include "langhooks.h"
32 #include "pointer-set.h"
35 #include "tree-data-ref.h"
36 #include "gimple-pretty-print.h"
37 #include "insn-config.h"
41 #ifndef HAVE_conditional_move
42 #define HAVE_conditional_move (0)
45 static unsigned int tree_ssa_phiopt (void);
46 static unsigned int tree_ssa_phiopt_worker (bool, bool);
47 static bool conditional_replacement (basic_block
, basic_block
,
48 edge
, edge
, gimple
, tree
, tree
);
49 static int value_replacement (basic_block
, basic_block
,
50 edge
, edge
, gimple
, tree
, tree
);
51 static bool minmax_replacement (basic_block
, basic_block
,
52 edge
, edge
, gimple
, tree
, tree
);
53 static bool abs_replacement (basic_block
, basic_block
,
54 edge
, edge
, gimple
, tree
, tree
);
55 static bool cond_store_replacement (basic_block
, basic_block
, edge
, edge
,
56 struct pointer_set_t
*);
57 static bool cond_if_else_store_replacement (basic_block
, basic_block
, basic_block
);
58 static struct pointer_set_t
* get_non_trapping (void);
59 static void replace_phi_edge_with_variable (basic_block
, edge
, gimple
, tree
);
60 static void hoist_adjacent_loads (basic_block
, basic_block
,
61 basic_block
, basic_block
);
62 static bool gate_hoist_loads (void);
64 /* This pass tries to replaces an if-then-else block with an
65 assignment. We have four kinds of transformations. Some of these
66 transformations are also performed by the ifcvt RTL optimizer.
68 Conditional Replacement
69 -----------------------
71 This transformation, implemented in conditional_replacement,
75 if (cond) goto bb2; else goto bb1;
78 x = PHI <0 (bb1), 1 (bb0), ...>;
86 x = PHI <x' (bb0), ...>;
88 We remove bb1 as it becomes unreachable. This occurs often due to
89 gimplification of conditionals.
94 This transformation, implemented in value_replacement, replaces
97 if (a != b) goto bb2; else goto bb1;
100 x = PHI <a (bb1), b (bb0), ...>;
106 x = PHI <b (bb0), ...>;
108 This opportunity can sometimes occur as a result of other
114 This transformation, implemented in abs_replacement, replaces
117 if (a >= 0) goto bb2; else goto bb1;
121 x = PHI <x (bb1), a (bb0), ...>;
128 x = PHI <x' (bb0), ...>;
133 This transformation, minmax_replacement replaces
136 if (a <= b) goto bb2; else goto bb1;
139 x = PHI <b (bb1), a (bb0), ...>;
146 x = PHI <x' (bb0), ...>;
148 A similar transformation is done for MAX_EXPR.
151 This pass also performs a fifth transformation of a slightly different
154 Adjacent Load Hoisting
155 ----------------------
157 This transformation replaces
160 if (...) goto bb2; else goto bb1;
162 x1 = (<expr>).field1;
165 x2 = (<expr>).field2;
172 x1 = (<expr>).field1;
173 x2 = (<expr>).field2;
174 if (...) goto bb2; else goto bb1;
181 The purpose of this transformation is to enable generation of conditional
182 move instructions such as Intel CMOVE or PowerPC ISEL. Because one of
183 the loads is speculative, the transformation is restricted to very
184 specific cases to avoid introducing a page fault. We are looking for
192 where left and right are typically adjacent pointers in a tree structure. */
195 tree_ssa_phiopt (void)
197 return tree_ssa_phiopt_worker (false, gate_hoist_loads ());
200 /* This pass tries to transform conditional stores into unconditional
201 ones, enabling further simplifications with the simpler then and else
202 blocks. In particular it replaces this:
205 if (cond) goto bb2; else goto bb1;
213 if (cond) goto bb1; else goto bb2;
217 condtmp = PHI <RHS, condtmp'>
220 This transformation can only be done under several constraints,
221 documented below. It also replaces:
224 if (cond) goto bb2; else goto bb1;
235 if (cond) goto bb3; else goto bb1;
238 condtmp = PHI <RHS1, RHS2>
242 tree_ssa_cs_elim (void)
244 return tree_ssa_phiopt_worker (true, false);
247 /* Return the singleton PHI in the SEQ of PHIs for edges E0 and E1. */
250 single_non_singleton_phi_for_edges (gimple_seq seq
, edge e0
, edge e1
)
252 gimple_stmt_iterator i
;
254 if (gimple_seq_singleton_p (seq
))
255 return gsi_stmt (gsi_start (seq
));
256 for (i
= gsi_start (seq
); !gsi_end_p (i
); gsi_next (&i
))
258 gimple p
= gsi_stmt (i
);
259 /* If the PHI arguments are equal then we can skip this PHI. */
260 if (operand_equal_for_phi_arg_p (gimple_phi_arg_def (p
, e0
->dest_idx
),
261 gimple_phi_arg_def (p
, e1
->dest_idx
)))
264 /* If we already have a PHI that has the two edge arguments are
265 different, then return it is not a singleton for these PHIs. */
274 /* The core routine of conditional store replacement and normal
275 phi optimizations. Both share much of the infrastructure in how
276 to match applicable basic block patterns. DO_STORE_ELIM is true
277 when we want to do conditional store replacement, false otherwise.
278 DO_HOIST_LOADS is true when we want to hoist adjacent loads out
279 of diamond control flow patterns, false otherwise. */
281 tree_ssa_phiopt_worker (bool do_store_elim
, bool do_hoist_loads
)
284 basic_block
*bb_order
;
286 bool cfgchanged
= false;
287 struct pointer_set_t
*nontrap
= 0;
290 /* Calculate the set of non-trapping memory accesses. */
291 nontrap
= get_non_trapping ();
293 /* Search every basic block for COND_EXPR we may be able to optimize.
295 We walk the blocks in order that guarantees that a block with
296 a single predecessor is processed before the predecessor.
297 This ensures that we collapse inner ifs before visiting the
298 outer ones, and also that we do not try to visit a removed
300 bb_order
= blocks_in_phiopt_order ();
301 n
= n_basic_blocks
- NUM_FIXED_BLOCKS
;
303 for (i
= 0; i
< n
; i
++)
305 gimple cond_stmt
, phi
;
306 basic_block bb1
, bb2
;
312 cond_stmt
= last_stmt (bb
);
313 /* Check to see if the last statement is a GIMPLE_COND. */
315 || gimple_code (cond_stmt
) != GIMPLE_COND
)
318 e1
= EDGE_SUCC (bb
, 0);
320 e2
= EDGE_SUCC (bb
, 1);
323 /* We cannot do the optimization on abnormal edges. */
324 if ((e1
->flags
& EDGE_ABNORMAL
) != 0
325 || (e2
->flags
& EDGE_ABNORMAL
) != 0)
328 /* If either bb1's succ or bb2 or bb2's succ is non NULL. */
329 if (EDGE_COUNT (bb1
->succs
) == 0
331 || EDGE_COUNT (bb2
->succs
) == 0)
334 /* Find the bb which is the fall through to the other. */
335 if (EDGE_SUCC (bb1
, 0)->dest
== bb2
)
337 else if (EDGE_SUCC (bb2
, 0)->dest
== bb1
)
339 basic_block bb_tmp
= bb1
;
346 else if (do_store_elim
347 && EDGE_SUCC (bb1
, 0)->dest
== EDGE_SUCC (bb2
, 0)->dest
)
349 basic_block bb3
= EDGE_SUCC (bb1
, 0)->dest
;
351 if (!single_succ_p (bb1
)
352 || (EDGE_SUCC (bb1
, 0)->flags
& EDGE_FALLTHRU
) == 0
353 || !single_succ_p (bb2
)
354 || (EDGE_SUCC (bb2
, 0)->flags
& EDGE_FALLTHRU
) == 0
355 || EDGE_COUNT (bb3
->preds
) != 2)
357 if (cond_if_else_store_replacement (bb1
, bb2
, bb3
))
361 else if (do_hoist_loads
362 && EDGE_SUCC (bb1
, 0)->dest
== EDGE_SUCC (bb2
, 0)->dest
)
364 basic_block bb3
= EDGE_SUCC (bb1
, 0)->dest
;
366 if (!FLOAT_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond_stmt
)))
367 && single_succ_p (bb1
)
368 && single_succ_p (bb2
)
369 && single_pred_p (bb1
)
370 && single_pred_p (bb2
)
371 && EDGE_COUNT (bb
->succs
) == 2
372 && EDGE_COUNT (bb3
->preds
) == 2
373 /* If one edge or the other is dominant, a conditional move
374 is likely to perform worse than the well-predicted branch. */
375 && !predictable_edge_p (EDGE_SUCC (bb
, 0))
376 && !predictable_edge_p (EDGE_SUCC (bb
, 1)))
377 hoist_adjacent_loads (bb
, bb1
, bb2
, bb3
);
383 e1
= EDGE_SUCC (bb1
, 0);
385 /* Make sure that bb1 is just a fall through. */
386 if (!single_succ_p (bb1
)
387 || (e1
->flags
& EDGE_FALLTHRU
) == 0)
390 /* Also make sure that bb1 only have one predecessor and that it
392 if (!single_pred_p (bb1
)
393 || single_pred (bb1
) != bb
)
398 /* bb1 is the middle block, bb2 the join block, bb the split block,
399 e1 the fallthrough edge from bb1 to bb2. We can't do the
400 optimization if the join block has more than two predecessors. */
401 if (EDGE_COUNT (bb2
->preds
) > 2)
403 if (cond_store_replacement (bb1
, bb2
, e1
, e2
, nontrap
))
408 gimple_seq phis
= phi_nodes (bb2
);
409 gimple_stmt_iterator gsi
;
410 bool candorest
= true;
412 /* Value replacement can work with more than one PHI
413 so try that first. */
414 for (gsi
= gsi_start (phis
); !gsi_end_p (gsi
); gsi_next (&gsi
))
416 phi
= gsi_stmt (gsi
);
417 arg0
= gimple_phi_arg_def (phi
, e1
->dest_idx
);
418 arg1
= gimple_phi_arg_def (phi
, e2
->dest_idx
);
419 if (value_replacement (bb
, bb1
, e1
, e2
, phi
, arg0
, arg1
) == 2)
430 phi
= single_non_singleton_phi_for_edges (phis
, e1
, e2
);
434 arg0
= gimple_phi_arg_def (phi
, e1
->dest_idx
);
435 arg1
= gimple_phi_arg_def (phi
, e2
->dest_idx
);
437 /* Something is wrong if we cannot find the arguments in the PHI
439 gcc_assert (arg0
!= NULL
&& arg1
!= NULL
);
441 /* Do the replacement of conditional if it can be done. */
442 if (conditional_replacement (bb
, bb1
, e1
, e2
, phi
, arg0
, arg1
))
444 else if (abs_replacement (bb
, bb1
, e1
, e2
, phi
, arg0
, arg1
))
446 else if (minmax_replacement (bb
, bb1
, e1
, e2
, phi
, arg0
, arg1
))
454 pointer_set_destroy (nontrap
);
455 /* If the CFG has changed, we should cleanup the CFG. */
456 if (cfgchanged
&& do_store_elim
)
458 /* In cond-store replacement we have added some loads on edges
459 and new VOPS (as we moved the store, and created a load). */
460 gsi_commit_edge_inserts ();
461 return TODO_cleanup_cfg
| TODO_update_ssa_only_virtuals
;
464 return TODO_cleanup_cfg
;
468 /* Returns the list of basic blocks in the function in an order that guarantees
469 that if a block X has just a single predecessor Y, then Y is after X in the
473 blocks_in_phiopt_order (void)
476 basic_block
*order
= XNEWVEC (basic_block
, n_basic_blocks
);
477 unsigned n
= n_basic_blocks
- NUM_FIXED_BLOCKS
;
479 sbitmap visited
= sbitmap_alloc (last_basic_block
);
481 #define MARK_VISITED(BB) (bitmap_set_bit (visited, (BB)->index))
482 #define VISITED_P(BB) (bitmap_bit_p (visited, (BB)->index))
484 bitmap_clear (visited
);
486 MARK_VISITED (ENTRY_BLOCK_PTR
);
492 /* Walk the predecessors of x as long as they have precisely one
493 predecessor and add them to the list, so that they get stored
496 single_pred_p (y
) && !VISITED_P (single_pred (y
));
499 for (y
= x
, i
= n
- np
;
500 single_pred_p (y
) && !VISITED_P (single_pred (y
));
501 y
= single_pred (y
), i
++)
509 gcc_assert (i
== n
- 1);
513 sbitmap_free (visited
);
521 /* Replace PHI node element whose edge is E in block BB with variable NEW.
522 Remove the edge from COND_BLOCK which does not lead to BB (COND_BLOCK
523 is known to have two edges, one of which must reach BB). */
526 replace_phi_edge_with_variable (basic_block cond_block
,
527 edge e
, gimple phi
, tree new_tree
)
529 basic_block bb
= gimple_bb (phi
);
530 basic_block block_to_remove
;
531 gimple_stmt_iterator gsi
;
533 /* Change the PHI argument to new. */
534 SET_USE (PHI_ARG_DEF_PTR (phi
, e
->dest_idx
), new_tree
);
536 /* Remove the empty basic block. */
537 if (EDGE_SUCC (cond_block
, 0)->dest
== bb
)
539 EDGE_SUCC (cond_block
, 0)->flags
|= EDGE_FALLTHRU
;
540 EDGE_SUCC (cond_block
, 0)->flags
&= ~(EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
);
541 EDGE_SUCC (cond_block
, 0)->probability
= REG_BR_PROB_BASE
;
542 EDGE_SUCC (cond_block
, 0)->count
+= EDGE_SUCC (cond_block
, 1)->count
;
544 block_to_remove
= EDGE_SUCC (cond_block
, 1)->dest
;
548 EDGE_SUCC (cond_block
, 1)->flags
|= EDGE_FALLTHRU
;
549 EDGE_SUCC (cond_block
, 1)->flags
550 &= ~(EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
);
551 EDGE_SUCC (cond_block
, 1)->probability
= REG_BR_PROB_BASE
;
552 EDGE_SUCC (cond_block
, 1)->count
+= EDGE_SUCC (cond_block
, 0)->count
;
554 block_to_remove
= EDGE_SUCC (cond_block
, 0)->dest
;
556 delete_basic_block (block_to_remove
);
558 /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
559 gsi
= gsi_last_bb (cond_block
);
560 gsi_remove (&gsi
, true);
562 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
564 "COND_EXPR in block %d and PHI in block %d converted to straightline code.\n",
569 /* The function conditional_replacement does the main work of doing the
570 conditional replacement. Return true if the replacement is done.
571 Otherwise return false.
572 BB is the basic block where the replacement is going to be done on. ARG0
573 is argument 0 from PHI. Likewise for ARG1. */
576 conditional_replacement (basic_block cond_bb
, basic_block middle_bb
,
577 edge e0
, edge e1
, gimple phi
,
578 tree arg0
, tree arg1
)
581 gimple stmt
, new_stmt
;
583 gimple_stmt_iterator gsi
;
584 edge true_edge
, false_edge
;
585 tree new_var
, new_var2
;
588 /* FIXME: Gimplification of complex type is too hard for now. */
589 /* We aren't prepared to handle vectors either (and it is a question
590 if it would be worthwhile anyway). */
591 if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0
))
592 || POINTER_TYPE_P (TREE_TYPE (arg0
)))
593 || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1
))
594 || POINTER_TYPE_P (TREE_TYPE (arg1
))))
597 /* The PHI arguments have the constants 0 and 1, or 0 and -1, then
598 convert it to the conditional. */
599 if ((integer_zerop (arg0
) && integer_onep (arg1
))
600 || (integer_zerop (arg1
) && integer_onep (arg0
)))
602 else if ((integer_zerop (arg0
) && integer_all_onesp (arg1
))
603 || (integer_zerop (arg1
) && integer_all_onesp (arg0
)))
608 if (!empty_block_p (middle_bb
))
611 /* At this point we know we have a GIMPLE_COND with two successors.
612 One successor is BB, the other successor is an empty block which
613 falls through into BB.
615 There is a single PHI node at the join point (BB) and its arguments
616 are constants (0, 1) or (0, -1).
618 So, given the condition COND, and the two PHI arguments, we can
619 rewrite this PHI into non-branching code:
621 dest = (COND) or dest = COND'
623 We use the condition as-is if the argument associated with the
624 true edge has the value one or the argument associated with the
625 false edge as the value zero. Note that those conditions are not
626 the same since only one of the outgoing edges from the GIMPLE_COND
627 will directly reach BB and thus be associated with an argument. */
629 stmt
= last_stmt (cond_bb
);
630 result
= PHI_RESULT (phi
);
632 /* To handle special cases like floating point comparison, it is easier and
633 less error-prone to build a tree and gimplify it on the fly though it is
635 cond
= fold_build2_loc (gimple_location (stmt
),
636 gimple_cond_code (stmt
), boolean_type_node
,
637 gimple_cond_lhs (stmt
), gimple_cond_rhs (stmt
));
639 /* We need to know which is the true edge and which is the false
640 edge so that we know when to invert the condition below. */
641 extract_true_false_edges_from_block (cond_bb
, &true_edge
, &false_edge
);
642 if ((e0
== true_edge
&& integer_zerop (arg0
))
643 || (e0
== false_edge
&& !integer_zerop (arg0
))
644 || (e1
== true_edge
&& integer_zerop (arg1
))
645 || (e1
== false_edge
&& !integer_zerop (arg1
)))
646 cond
= fold_build1_loc (gimple_location (stmt
),
647 TRUTH_NOT_EXPR
, TREE_TYPE (cond
), cond
);
651 cond
= fold_convert_loc (gimple_location (stmt
),
652 TREE_TYPE (result
), cond
);
653 cond
= fold_build1_loc (gimple_location (stmt
),
654 NEGATE_EXPR
, TREE_TYPE (cond
), cond
);
657 /* Insert our new statements at the end of conditional block before the
659 gsi
= gsi_for_stmt (stmt
);
660 new_var
= force_gimple_operand_gsi (&gsi
, cond
, true, NULL
, true,
663 if (!useless_type_conversion_p (TREE_TYPE (result
), TREE_TYPE (new_var
)))
665 source_location locus_0
, locus_1
;
667 new_var2
= make_ssa_name (TREE_TYPE (result
), NULL
);
668 new_stmt
= gimple_build_assign_with_ops (CONVERT_EXPR
, new_var2
,
670 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
673 /* Set the locus to the first argument, unless is doesn't have one. */
674 locus_0
= gimple_phi_arg_location (phi
, 0);
675 locus_1
= gimple_phi_arg_location (phi
, 1);
676 if (locus_0
== UNKNOWN_LOCATION
)
678 gimple_set_location (new_stmt
, locus_0
);
681 replace_phi_edge_with_variable (cond_bb
, e1
, phi
, new_var
);
683 /* Note that we optimized this PHI. */
687 /* Update *ARG which is defined in STMT so that it contains the
688 computed value if that seems profitable. Return true if the
689 statement is made dead by that rewriting. */
692 jump_function_from_stmt (tree
*arg
, gimple stmt
)
694 enum tree_code code
= gimple_assign_rhs_code (stmt
);
695 if (code
== ADDR_EXPR
)
697 /* For arg = &p->i transform it to p, if possible. */
698 tree rhs1
= gimple_assign_rhs1 (stmt
);
699 HOST_WIDE_INT offset
;
700 tree tem
= get_addr_base_and_unit_offset (TREE_OPERAND (rhs1
, 0),
703 && TREE_CODE (tem
) == MEM_REF
704 && (mem_ref_offset (tem
) + double_int::from_shwi (offset
)).is_zero ())
706 *arg
= TREE_OPERAND (tem
, 0);
710 /* TODO: Much like IPA-CP jump-functions we want to handle constant
711 additions symbolically here, and we'd need to update the comparison
712 code that compares the arg + cst tuples in our caller. For now the
713 code above exactly handles the VEC_BASE pattern from vec.h. */
717 /* The function value_replacement does the main work of doing the value
718 replacement. Return non-zero if the replacement is done. Otherwise return
719 0. If we remove the middle basic block, return 2.
720 BB is the basic block where the replacement is going to be done on. ARG0
721 is argument 0 from the PHI. Likewise for ARG1. */
724 value_replacement (basic_block cond_bb
, basic_block middle_bb
,
725 edge e0
, edge e1
, gimple phi
,
726 tree arg0
, tree arg1
)
728 gimple_stmt_iterator gsi
;
730 edge true_edge
, false_edge
;
732 bool emtpy_or_with_defined_p
= true;
734 /* If the type says honor signed zeros we cannot do this
736 if (HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (arg1
))))
739 /* If there is a statement in MIDDLE_BB that defines one of the PHI
740 arguments, then adjust arg0 or arg1. */
741 gsi
= gsi_after_labels (middle_bb
);
742 if (!gsi_end_p (gsi
) && is_gimple_debug (gsi_stmt (gsi
)))
743 gsi_next_nondebug (&gsi
);
744 while (!gsi_end_p (gsi
))
746 gimple stmt
= gsi_stmt (gsi
);
748 gsi_next_nondebug (&gsi
);
749 if (!is_gimple_assign (stmt
))
751 emtpy_or_with_defined_p
= false;
754 /* Now try to adjust arg0 or arg1 according to the computation
756 lhs
= gimple_assign_lhs (stmt
);
758 && jump_function_from_stmt (&arg0
, stmt
))
760 && jump_function_from_stmt (&arg1
, stmt
)))
761 emtpy_or_with_defined_p
= false;
764 cond
= last_stmt (cond_bb
);
765 code
= gimple_cond_code (cond
);
767 /* This transformation is only valid for equality comparisons. */
768 if (code
!= NE_EXPR
&& code
!= EQ_EXPR
)
771 /* We need to know which is the true edge and which is the false
772 edge so that we know if have abs or negative abs. */
773 extract_true_false_edges_from_block (cond_bb
, &true_edge
, &false_edge
);
775 /* At this point we know we have a COND_EXPR with two successors.
776 One successor is BB, the other successor is an empty block which
777 falls through into BB.
779 The condition for the COND_EXPR is known to be NE_EXPR or EQ_EXPR.
781 There is a single PHI node at the join point (BB) with two arguments.
783 We now need to verify that the two arguments in the PHI node match
784 the two arguments to the equality comparison. */
786 if ((operand_equal_for_phi_arg_p (arg0
, gimple_cond_lhs (cond
))
787 && operand_equal_for_phi_arg_p (arg1
, gimple_cond_rhs (cond
)))
788 || (operand_equal_for_phi_arg_p (arg1
, gimple_cond_lhs (cond
))
789 && operand_equal_for_phi_arg_p (arg0
, gimple_cond_rhs (cond
))))
794 /* For NE_EXPR, we want to build an assignment result = arg where
795 arg is the PHI argument associated with the true edge. For
796 EQ_EXPR we want the PHI argument associated with the false edge. */
797 e
= (code
== NE_EXPR
? true_edge
: false_edge
);
799 /* Unfortunately, E may not reach BB (it may instead have gone to
800 OTHER_BLOCK). If that is the case, then we want the single outgoing
801 edge from OTHER_BLOCK which reaches BB and represents the desired
802 path from COND_BLOCK. */
803 if (e
->dest
== middle_bb
)
804 e
= single_succ_edge (e
->dest
);
806 /* Now we know the incoming edge to BB that has the argument for the
807 RHS of our new assignment statement. */
813 /* If the middle basic block was empty or is defining the
814 PHI arguments and this is a single phi where the args are different
815 for the edges e0 and e1 then we can remove the middle basic block. */
816 if (emtpy_or_with_defined_p
817 && single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi
)),
820 replace_phi_edge_with_variable (cond_bb
, e1
, phi
, arg
);
821 /* Note that we optimized this PHI. */
826 /* Replace the PHI arguments with arg. */
827 SET_PHI_ARG_DEF (phi
, e0
->dest_idx
, arg
);
828 SET_PHI_ARG_DEF (phi
, e1
->dest_idx
, arg
);
829 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
831 fprintf (dump_file
, "PHI ");
832 print_generic_expr (dump_file
, gimple_phi_result (phi
), 0);
833 fprintf (dump_file
, " reduced for COND_EXPR in block %d to ",
835 print_generic_expr (dump_file
, arg
, 0);
836 fprintf (dump_file
, ".\n");
845 /* The function minmax_replacement does the main work of doing the minmax
846 replacement. Return true if the replacement is done. Otherwise return
848 BB is the basic block where the replacement is going to be done on. ARG0
849 is argument 0 from the PHI. Likewise for ARG1. */
852 minmax_replacement (basic_block cond_bb
, basic_block middle_bb
,
853 edge e0
, edge e1
, gimple phi
,
854 tree arg0
, tree arg1
)
857 gimple cond
, new_stmt
;
858 edge true_edge
, false_edge
;
859 enum tree_code cmp
, minmax
, ass_code
;
860 tree smaller
, larger
, arg_true
, arg_false
;
861 gimple_stmt_iterator gsi
, gsi_from
;
863 type
= TREE_TYPE (PHI_RESULT (phi
));
865 /* The optimization may be unsafe due to NaNs. */
866 if (HONOR_NANS (TYPE_MODE (type
)))
869 cond
= last_stmt (cond_bb
);
870 cmp
= gimple_cond_code (cond
);
872 /* This transformation is only valid for order comparisons. Record which
873 operand is smaller/larger if the result of the comparison is true. */
874 if (cmp
== LT_EXPR
|| cmp
== LE_EXPR
)
876 smaller
= gimple_cond_lhs (cond
);
877 larger
= gimple_cond_rhs (cond
);
879 else if (cmp
== GT_EXPR
|| cmp
== GE_EXPR
)
881 smaller
= gimple_cond_rhs (cond
);
882 larger
= gimple_cond_lhs (cond
);
887 /* We need to know which is the true edge and which is the false
888 edge so that we know if have abs or negative abs. */
889 extract_true_false_edges_from_block (cond_bb
, &true_edge
, &false_edge
);
891 /* Forward the edges over the middle basic block. */
892 if (true_edge
->dest
== middle_bb
)
893 true_edge
= EDGE_SUCC (true_edge
->dest
, 0);
894 if (false_edge
->dest
== middle_bb
)
895 false_edge
= EDGE_SUCC (false_edge
->dest
, 0);
899 gcc_assert (false_edge
== e1
);
905 gcc_assert (false_edge
== e0
);
906 gcc_assert (true_edge
== e1
);
911 if (empty_block_p (middle_bb
))
913 if (operand_equal_for_phi_arg_p (arg_true
, smaller
)
914 && operand_equal_for_phi_arg_p (arg_false
, larger
))
918 if (smaller < larger)
924 else if (operand_equal_for_phi_arg_p (arg_false
, smaller
)
925 && operand_equal_for_phi_arg_p (arg_true
, larger
))
932 /* Recognize the following case, assuming d <= u:
938 This is equivalent to
943 gimple assign
= last_and_only_stmt (middle_bb
);
944 tree lhs
, op0
, op1
, bound
;
947 || gimple_code (assign
) != GIMPLE_ASSIGN
)
950 lhs
= gimple_assign_lhs (assign
);
951 ass_code
= gimple_assign_rhs_code (assign
);
952 if (ass_code
!= MAX_EXPR
&& ass_code
!= MIN_EXPR
)
954 op0
= gimple_assign_rhs1 (assign
);
955 op1
= gimple_assign_rhs2 (assign
);
957 if (true_edge
->src
== middle_bb
)
959 /* We got here if the condition is true, i.e., SMALLER < LARGER. */
960 if (!operand_equal_for_phi_arg_p (lhs
, arg_true
))
963 if (operand_equal_for_phi_arg_p (arg_false
, larger
))
967 if (smaller < larger)
969 r' = MAX_EXPR (smaller, bound)
971 r = PHI <r', larger> --> to be turned to MIN_EXPR. */
972 if (ass_code
!= MAX_EXPR
)
976 if (operand_equal_for_phi_arg_p (op0
, smaller
))
978 else if (operand_equal_for_phi_arg_p (op1
, smaller
))
983 /* We need BOUND <= LARGER. */
984 if (!integer_nonzerop (fold_build2 (LE_EXPR
, boolean_type_node
,
988 else if (operand_equal_for_phi_arg_p (arg_false
, smaller
))
992 if (smaller < larger)
994 r' = MIN_EXPR (larger, bound)
996 r = PHI <r', smaller> --> to be turned to MAX_EXPR. */
997 if (ass_code
!= MIN_EXPR
)
1001 if (operand_equal_for_phi_arg_p (op0
, larger
))
1003 else if (operand_equal_for_phi_arg_p (op1
, larger
))
1008 /* We need BOUND >= SMALLER. */
1009 if (!integer_nonzerop (fold_build2 (GE_EXPR
, boolean_type_node
,
1018 /* We got here if the condition is false, i.e., SMALLER > LARGER. */
1019 if (!operand_equal_for_phi_arg_p (lhs
, arg_false
))
1022 if (operand_equal_for_phi_arg_p (arg_true
, larger
))
1026 if (smaller > larger)
1028 r' = MIN_EXPR (smaller, bound)
1030 r = PHI <r', larger> --> to be turned to MAX_EXPR. */
1031 if (ass_code
!= MIN_EXPR
)
1035 if (operand_equal_for_phi_arg_p (op0
, smaller
))
1037 else if (operand_equal_for_phi_arg_p (op1
, smaller
))
1042 /* We need BOUND >= LARGER. */
1043 if (!integer_nonzerop (fold_build2 (GE_EXPR
, boolean_type_node
,
1047 else if (operand_equal_for_phi_arg_p (arg_true
, smaller
))
1051 if (smaller > larger)
1053 r' = MAX_EXPR (larger, bound)
1055 r = PHI <r', smaller> --> to be turned to MIN_EXPR. */
1056 if (ass_code
!= MAX_EXPR
)
1060 if (operand_equal_for_phi_arg_p (op0
, larger
))
1062 else if (operand_equal_for_phi_arg_p (op1
, larger
))
1067 /* We need BOUND <= SMALLER. */
1068 if (!integer_nonzerop (fold_build2 (LE_EXPR
, boolean_type_node
,
1076 /* Move the statement from the middle block. */
1077 gsi
= gsi_last_bb (cond_bb
);
1078 gsi_from
= gsi_last_nondebug_bb (middle_bb
);
1079 gsi_move_before (&gsi_from
, &gsi
);
1082 /* Emit the statement to compute min/max. */
1083 result
= duplicate_ssa_name (PHI_RESULT (phi
), NULL
);
1084 new_stmt
= gimple_build_assign_with_ops (minmax
, result
, arg0
, arg1
);
1085 gsi
= gsi_last_bb (cond_bb
);
1086 gsi_insert_before (&gsi
, new_stmt
, GSI_NEW_STMT
);
1088 replace_phi_edge_with_variable (cond_bb
, e1
, phi
, result
);
1092 /* The function absolute_replacement does the main work of doing the absolute
1093 replacement. Return true if the replacement is done. Otherwise return
1095 bb is the basic block where the replacement is going to be done on. arg0
1096 is argument 0 from the phi. Likewise for arg1. */
1099 abs_replacement (basic_block cond_bb
, basic_block middle_bb
,
1100 edge e0 ATTRIBUTE_UNUSED
, edge e1
,
1101 gimple phi
, tree arg0
, tree arg1
)
1104 gimple new_stmt
, cond
;
1105 gimple_stmt_iterator gsi
;
1106 edge true_edge
, false_edge
;
1111 enum tree_code cond_code
;
1113 /* If the type says honor signed zeros we cannot do this
1115 if (HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (arg1
))))
1118 /* OTHER_BLOCK must have only one executable statement which must have the
1119 form arg0 = -arg1 or arg1 = -arg0. */
1121 assign
= last_and_only_stmt (middle_bb
);
1122 /* If we did not find the proper negation assignment, then we can not
1127 /* If we got here, then we have found the only executable statement
1128 in OTHER_BLOCK. If it is anything other than arg = -arg1 or
1129 arg1 = -arg0, then we can not optimize. */
1130 if (gimple_code (assign
) != GIMPLE_ASSIGN
)
1133 lhs
= gimple_assign_lhs (assign
);
1135 if (gimple_assign_rhs_code (assign
) != NEGATE_EXPR
)
1138 rhs
= gimple_assign_rhs1 (assign
);
1140 /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
1141 if (!(lhs
== arg0
&& rhs
== arg1
)
1142 && !(lhs
== arg1
&& rhs
== arg0
))
1145 cond
= last_stmt (cond_bb
);
1146 result
= PHI_RESULT (phi
);
1148 /* Only relationals comparing arg[01] against zero are interesting. */
1149 cond_code
= gimple_cond_code (cond
);
1150 if (cond_code
!= GT_EXPR
&& cond_code
!= GE_EXPR
1151 && cond_code
!= LT_EXPR
&& cond_code
!= LE_EXPR
)
1154 /* Make sure the conditional is arg[01] OP y. */
1155 if (gimple_cond_lhs (cond
) != rhs
)
1158 if (FLOAT_TYPE_P (TREE_TYPE (gimple_cond_rhs (cond
)))
1159 ? real_zerop (gimple_cond_rhs (cond
))
1160 : integer_zerop (gimple_cond_rhs (cond
)))
1165 /* We need to know which is the true edge and which is the false
1166 edge so that we know if have abs or negative abs. */
1167 extract_true_false_edges_from_block (cond_bb
, &true_edge
, &false_edge
);
1169 /* For GT_EXPR/GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
1170 will need to negate the result. Similarly for LT_EXPR/LE_EXPR if
1171 the false edge goes to OTHER_BLOCK. */
1172 if (cond_code
== GT_EXPR
|| cond_code
== GE_EXPR
)
1177 if (e
->dest
== middle_bb
)
1182 result
= duplicate_ssa_name (result
, NULL
);
1185 lhs
= make_ssa_name (TREE_TYPE (result
), NULL
);
1189 /* Build the modify expression with abs expression. */
1190 new_stmt
= gimple_build_assign_with_ops (ABS_EXPR
, lhs
, rhs
, NULL
);
1192 gsi
= gsi_last_bb (cond_bb
);
1193 gsi_insert_before (&gsi
, new_stmt
, GSI_NEW_STMT
);
1197 /* Get the right GSI. We want to insert after the recently
1198 added ABS_EXPR statement (which we know is the first statement
1200 new_stmt
= gimple_build_assign_with_ops (NEGATE_EXPR
, result
, lhs
, NULL
);
1202 gsi_insert_after (&gsi
, new_stmt
, GSI_NEW_STMT
);
1205 replace_phi_edge_with_variable (cond_bb
, e1
, phi
, result
);
1207 /* Note that we optimized this PHI. */
1211 /* Auxiliary functions to determine the set of memory accesses which
1212 can't trap because they are preceded by accesses to the same memory
1213 portion. We do that for MEM_REFs, so we only need to track
1214 the SSA_NAME of the pointer indirectly referenced. The algorithm
1215 simply is a walk over all instructions in dominator order. When
1216 we see an MEM_REF we determine if we've already seen a same
1217 ref anywhere up to the root of the dominator tree. If we do the
1218 current access can't trap. If we don't see any dominating access
1219 the current access might trap, but might also make later accesses
1220 non-trapping, so we remember it. We need to be careful with loads
1221 or stores, for instance a load might not trap, while a store would,
1222 so if we see a dominating read access this doesn't mean that a later
1223 write access would not trap. Hence we also need to differentiate the
1224 type of access(es) seen.
1226 ??? We currently are very conservative and assume that a load might
1227 trap even if a store doesn't (write-only memory). This probably is
1228 overly conservative. */
1230 /* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
1231 through it was seen, which would constitute a no-trap region for
1235 unsigned int ssa_name_ver
;
1238 HOST_WIDE_INT offset
, size
;
1242 /* The hash table for remembering what we've seen. */
1243 static htab_t seen_ssa_names
;
1245 /* Used for quick clearing of the hash-table when we see calls.
1246 Hash entries with phase < nt_call_phase are invalid. */
1247 static unsigned int nt_call_phase
;
1249 /* The set of MEM_REFs which can't trap. */
1250 static struct pointer_set_t
*nontrap_set
;
1252 /* The hash function. */
1254 name_to_bb_hash (const void *p
)
1256 const struct name_to_bb
*n
= (const struct name_to_bb
*) p
;
1257 return n
->ssa_name_ver
^ (((hashval_t
) n
->store
) << 31)
1258 ^ (n
->offset
<< 6) ^ (n
->size
<< 3);
1261 /* The equality function of *P1 and *P2. */
1263 name_to_bb_eq (const void *p1
, const void *p2
)
1265 const struct name_to_bb
*n1
= (const struct name_to_bb
*)p1
;
1266 const struct name_to_bb
*n2
= (const struct name_to_bb
*)p2
;
1268 return n1
->ssa_name_ver
== n2
->ssa_name_ver
1269 && n1
->store
== n2
->store
1270 && n1
->offset
== n2
->offset
1271 && n1
->size
== n2
->size
;
1274 /* We see the expression EXP in basic block BB. If it's an interesting
1275 expression (an MEM_REF through an SSA_NAME) possibly insert the
1276 expression into the set NONTRAP or the hash table of seen expressions.
1277 STORE is true if this expression is on the LHS, otherwise it's on
1280 add_or_mark_expr (basic_block bb
, tree exp
,
1281 struct pointer_set_t
*nontrap
, bool store
)
1285 if (TREE_CODE (exp
) == MEM_REF
1286 && TREE_CODE (TREE_OPERAND (exp
, 0)) == SSA_NAME
1287 && host_integerp (TREE_OPERAND (exp
, 1), 0)
1288 && (size
= int_size_in_bytes (TREE_TYPE (exp
))) > 0)
1290 tree name
= TREE_OPERAND (exp
, 0);
1291 struct name_to_bb map
;
1293 struct name_to_bb
*n2bb
;
1294 basic_block found_bb
= 0;
1296 /* Try to find the last seen MEM_REF through the same
1297 SSA_NAME, which can trap. */
1298 map
.ssa_name_ver
= SSA_NAME_VERSION (name
);
1302 map
.offset
= tree_low_cst (TREE_OPERAND (exp
, 1), 0);
1305 slot
= htab_find_slot (seen_ssa_names
, &map
, INSERT
);
1306 n2bb
= (struct name_to_bb
*) *slot
;
1307 if (n2bb
&& n2bb
->phase
>= nt_call_phase
)
1308 found_bb
= n2bb
->bb
;
1310 /* If we've found a trapping MEM_REF, _and_ it dominates EXP
1311 (it's in a basic block on the path from us to the dominator root)
1312 then we can't trap. */
1313 if (found_bb
&& (((size_t)found_bb
->aux
) & 1) == 1)
1315 pointer_set_insert (nontrap
, exp
);
1319 /* EXP might trap, so insert it into the hash table. */
1322 n2bb
->phase
= nt_call_phase
;
1327 n2bb
= XNEW (struct name_to_bb
);
1328 n2bb
->ssa_name_ver
= SSA_NAME_VERSION (name
);
1329 n2bb
->phase
= nt_call_phase
;
1331 n2bb
->store
= store
;
1332 n2bb
->offset
= map
.offset
;
1340 /* Return true when CALL is a call stmt that definitely doesn't
1341 free any memory or makes it unavailable otherwise. */
1343 nonfreeing_call_p (gimple call
)
1345 if (gimple_call_builtin_p (call
, BUILT_IN_NORMAL
)
1346 && gimple_call_flags (call
) & ECF_LEAF
)
1347 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call
)))
1349 /* Just in case these become ECF_LEAF in the future. */
1351 case BUILT_IN_TM_FREE
:
1352 case BUILT_IN_REALLOC
:
1353 case BUILT_IN_STACK_RESTORE
:
1362 /* Called by walk_dominator_tree, when entering the block BB. */
1364 nt_init_block (struct dom_walk_data
*data ATTRIBUTE_UNUSED
, basic_block bb
)
1368 gimple_stmt_iterator gsi
;
1370 /* If we haven't seen all our predecessors, clear the hash-table. */
1371 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1372 if ((((size_t)e
->src
->aux
) & 2) == 0)
1378 /* Mark this BB as being on the path to dominator root and as visited. */
1379 bb
->aux
= (void*)(1 | 2);
1381 /* And walk the statements in order. */
1382 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1384 gimple stmt
= gsi_stmt (gsi
);
1386 if (is_gimple_call (stmt
) && !nonfreeing_call_p (stmt
))
1388 else if (gimple_assign_single_p (stmt
) && !gimple_has_volatile_ops (stmt
))
1390 add_or_mark_expr (bb
, gimple_assign_lhs (stmt
), nontrap_set
, true);
1391 add_or_mark_expr (bb
, gimple_assign_rhs1 (stmt
), nontrap_set
, false);
1396 /* Called by walk_dominator_tree, when basic block BB is exited. */
1398 nt_fini_block (struct dom_walk_data
*data ATTRIBUTE_UNUSED
, basic_block bb
)
1400 /* This BB isn't on the path to dominator root anymore. */
1404 /* This is the entry point of gathering non trapping memory accesses.
1405 It will do a dominator walk over the whole function, and it will
1406 make use of the bb->aux pointers. It returns a set of trees
1407 (the MEM_REFs itself) which can't trap. */
1408 static struct pointer_set_t
*
1409 get_non_trapping (void)
1411 struct pointer_set_t
*nontrap
;
1412 struct dom_walk_data walk_data
;
1415 nontrap
= pointer_set_create ();
1416 seen_ssa_names
= htab_create (128, name_to_bb_hash
, name_to_bb_eq
,
1418 /* We're going to do a dominator walk, so ensure that we have
1419 dominance information. */
1420 calculate_dominance_info (CDI_DOMINATORS
);
1422 /* Setup callbacks for the generic dominator tree walker. */
1423 nontrap_set
= nontrap
;
1424 walk_data
.dom_direction
= CDI_DOMINATORS
;
1425 walk_data
.initialize_block_local_data
= NULL
;
1426 walk_data
.before_dom_children
= nt_init_block
;
1427 walk_data
.after_dom_children
= nt_fini_block
;
1428 walk_data
.global_data
= NULL
;
1429 walk_data
.block_local_data_size
= 0;
1431 init_walk_dominator_tree (&walk_data
);
1432 walk_dominator_tree (&walk_data
, ENTRY_BLOCK_PTR
);
1433 fini_walk_dominator_tree (&walk_data
);
1434 htab_delete (seen_ssa_names
);
1436 clear_aux_for_blocks ();
1440 /* Do the main work of conditional store replacement. We already know
1441 that the recognized pattern looks like so:
1444 if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1)
1447 fallthrough (edge E0)
1451 We check that MIDDLE_BB contains only one store, that that store
1452 doesn't trap (not via NOTRAP, but via checking if an access to the same
1453 memory location dominates us) and that the store has a "simple" RHS. */
1456 cond_store_replacement (basic_block middle_bb
, basic_block join_bb
,
1457 edge e0
, edge e1
, struct pointer_set_t
*nontrap
)
1459 gimple assign
= last_and_only_stmt (middle_bb
);
1460 tree lhs
, rhs
, name
, name2
;
1461 gimple newphi
, new_stmt
;
1462 gimple_stmt_iterator gsi
;
1463 source_location locus
;
1465 /* Check if middle_bb contains of only one store. */
1467 || !gimple_assign_single_p (assign
)
1468 || gimple_has_volatile_ops (assign
))
1471 locus
= gimple_location (assign
);
1472 lhs
= gimple_assign_lhs (assign
);
1473 rhs
= gimple_assign_rhs1 (assign
);
1474 if (TREE_CODE (lhs
) != MEM_REF
1475 || TREE_CODE (TREE_OPERAND (lhs
, 0)) != SSA_NAME
1476 || !is_gimple_reg_type (TREE_TYPE (lhs
)))
1479 /* Prove that we can move the store down. We could also check
1480 TREE_THIS_NOTRAP here, but in that case we also could move stores,
1481 whose value is not available readily, which we want to avoid. */
1482 if (!pointer_set_contains (nontrap
, lhs
))
1485 /* Now we've checked the constraints, so do the transformation:
1486 1) Remove the single store. */
1487 gsi
= gsi_for_stmt (assign
);
1488 unlink_stmt_vdef (assign
);
1489 gsi_remove (&gsi
, true);
1490 release_defs (assign
);
1492 /* 2) Insert a load from the memory of the store to the temporary
1493 on the edge which did not contain the store. */
1494 lhs
= unshare_expr (lhs
);
1495 name
= make_temp_ssa_name (TREE_TYPE (lhs
), NULL
, "cstore");
1496 new_stmt
= gimple_build_assign (name
, lhs
);
1497 gimple_set_location (new_stmt
, locus
);
1498 gsi_insert_on_edge (e1
, new_stmt
);
1500 /* 3) Create a PHI node at the join block, with one argument
1501 holding the old RHS, and the other holding the temporary
1502 where we stored the old memory contents. */
1503 name2
= make_temp_ssa_name (TREE_TYPE (lhs
), NULL
, "cstore");
1504 newphi
= create_phi_node (name2
, join_bb
);
1505 add_phi_arg (newphi
, rhs
, e0
, locus
);
1506 add_phi_arg (newphi
, name
, e1
, locus
);
1508 lhs
= unshare_expr (lhs
);
1509 new_stmt
= gimple_build_assign (lhs
, PHI_RESULT (newphi
));
1511 /* 4) Insert that PHI node. */
1512 gsi
= gsi_after_labels (join_bb
);
1513 if (gsi_end_p (gsi
))
1515 gsi
= gsi_last_bb (join_bb
);
1516 gsi_insert_after (&gsi
, new_stmt
, GSI_NEW_STMT
);
1519 gsi_insert_before (&gsi
, new_stmt
, GSI_NEW_STMT
);
1524 /* Do the main work of conditional store replacement. */
1527 cond_if_else_store_replacement_1 (basic_block then_bb
, basic_block else_bb
,
1528 basic_block join_bb
, gimple then_assign
,
1531 tree lhs_base
, lhs
, then_rhs
, else_rhs
, name
;
1532 source_location then_locus
, else_locus
;
1533 gimple_stmt_iterator gsi
;
1534 gimple newphi
, new_stmt
;
1536 if (then_assign
== NULL
1537 || !gimple_assign_single_p (then_assign
)
1538 || gimple_clobber_p (then_assign
)
1539 || gimple_has_volatile_ops (then_assign
)
1540 || else_assign
== NULL
1541 || !gimple_assign_single_p (else_assign
)
1542 || gimple_clobber_p (else_assign
)
1543 || gimple_has_volatile_ops (else_assign
))
1546 lhs
= gimple_assign_lhs (then_assign
);
1547 if (!is_gimple_reg_type (TREE_TYPE (lhs
))
1548 || !operand_equal_p (lhs
, gimple_assign_lhs (else_assign
), 0))
1551 lhs_base
= get_base_address (lhs
);
1552 if (lhs_base
== NULL_TREE
1553 || (!DECL_P (lhs_base
) && TREE_CODE (lhs_base
) != MEM_REF
))
1556 then_rhs
= gimple_assign_rhs1 (then_assign
);
1557 else_rhs
= gimple_assign_rhs1 (else_assign
);
1558 then_locus
= gimple_location (then_assign
);
1559 else_locus
= gimple_location (else_assign
);
1561 /* Now we've checked the constraints, so do the transformation:
1562 1) Remove the stores. */
1563 gsi
= gsi_for_stmt (then_assign
);
1564 unlink_stmt_vdef (then_assign
);
1565 gsi_remove (&gsi
, true);
1566 release_defs (then_assign
);
1568 gsi
= gsi_for_stmt (else_assign
);
1569 unlink_stmt_vdef (else_assign
);
1570 gsi_remove (&gsi
, true);
1571 release_defs (else_assign
);
1573 /* 2) Create a PHI node at the join block, with one argument
1574 holding the old RHS, and the other holding the temporary
1575 where we stored the old memory contents. */
1576 name
= make_temp_ssa_name (TREE_TYPE (lhs
), NULL
, "cstore");
1577 newphi
= create_phi_node (name
, join_bb
);
1578 add_phi_arg (newphi
, then_rhs
, EDGE_SUCC (then_bb
, 0), then_locus
);
1579 add_phi_arg (newphi
, else_rhs
, EDGE_SUCC (else_bb
, 0), else_locus
);
1581 new_stmt
= gimple_build_assign (lhs
, PHI_RESULT (newphi
));
1583 /* 3) Insert that PHI node. */
1584 gsi
= gsi_after_labels (join_bb
);
1585 if (gsi_end_p (gsi
))
1587 gsi
= gsi_last_bb (join_bb
);
1588 gsi_insert_after (&gsi
, new_stmt
, GSI_NEW_STMT
);
1591 gsi_insert_before (&gsi
, new_stmt
, GSI_NEW_STMT
);
1596 /* Conditional store replacement. We already know
1597 that the recognized pattern looks like so:
1600 if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
1610 fallthrough (edge E0)
1614 We check that it is safe to sink the store to JOIN_BB by verifying that
1615 there are no read-after-write or write-after-write dependencies in
1616 THEN_BB and ELSE_BB. */
1619 cond_if_else_store_replacement (basic_block then_bb
, basic_block else_bb
,
1620 basic_block join_bb
)
1622 gimple then_assign
= last_and_only_stmt (then_bb
);
1623 gimple else_assign
= last_and_only_stmt (else_bb
);
1624 vec
<data_reference_p
> then_datarefs
, else_datarefs
;
1625 vec
<ddr_p
> then_ddrs
, else_ddrs
;
1626 gimple then_store
, else_store
;
1627 bool found
, ok
= false, res
;
1628 struct data_dependence_relation
*ddr
;
1629 data_reference_p then_dr
, else_dr
;
1631 tree then_lhs
, else_lhs
;
1632 vec
<gimple
> then_stores
, else_stores
;
1633 basic_block blocks
[3];
1635 if (MAX_STORES_TO_SINK
== 0)
1638 /* Handle the case with single statement in THEN_BB and ELSE_BB. */
1639 if (then_assign
&& else_assign
)
1640 return cond_if_else_store_replacement_1 (then_bb
, else_bb
, join_bb
,
1641 then_assign
, else_assign
);
1643 /* Find data references. */
1644 then_datarefs
.create (1);
1645 else_datarefs
.create (1);
1646 if ((find_data_references_in_bb (NULL
, then_bb
, &then_datarefs
)
1648 || !then_datarefs
.length ()
1649 || (find_data_references_in_bb (NULL
, else_bb
, &else_datarefs
)
1651 || !else_datarefs
.length ())
1653 free_data_refs (then_datarefs
);
1654 free_data_refs (else_datarefs
);
1658 /* Find pairs of stores with equal LHS. */
1659 then_stores
.create (1);
1660 else_stores
.create (1);
1661 FOR_EACH_VEC_ELT (then_datarefs
, i
, then_dr
)
1663 if (DR_IS_READ (then_dr
))
1666 then_store
= DR_STMT (then_dr
);
1667 then_lhs
= gimple_get_lhs (then_store
);
1670 FOR_EACH_VEC_ELT (else_datarefs
, j
, else_dr
)
1672 if (DR_IS_READ (else_dr
))
1675 else_store
= DR_STMT (else_dr
);
1676 else_lhs
= gimple_get_lhs (else_store
);
1678 if (operand_equal_p (then_lhs
, else_lhs
, 0))
1688 then_stores
.safe_push (then_store
);
1689 else_stores
.safe_push (else_store
);
1692 /* No pairs of stores found. */
1693 if (!then_stores
.length ()
1694 || then_stores
.length () > (unsigned) MAX_STORES_TO_SINK
)
1696 free_data_refs (then_datarefs
);
1697 free_data_refs (else_datarefs
);
1698 then_stores
.release ();
1699 else_stores
.release ();
1703 /* Compute and check data dependencies in both basic blocks. */
1704 then_ddrs
.create (1);
1705 else_ddrs
.create (1);
1706 if (!compute_all_dependences (then_datarefs
, &then_ddrs
,
1708 || !compute_all_dependences (else_datarefs
, &else_ddrs
,
1711 free_dependence_relations (then_ddrs
);
1712 free_dependence_relations (else_ddrs
);
1713 free_data_refs (then_datarefs
);
1714 free_data_refs (else_datarefs
);
1715 then_stores
.release ();
1716 else_stores
.release ();
1719 blocks
[0] = then_bb
;
1720 blocks
[1] = else_bb
;
1721 blocks
[2] = join_bb
;
1722 renumber_gimple_stmt_uids_in_blocks (blocks
, 3);
1724 /* Check that there are no read-after-write or write-after-write dependencies
1726 FOR_EACH_VEC_ELT (then_ddrs
, i
, ddr
)
1728 struct data_reference
*dra
= DDR_A (ddr
);
1729 struct data_reference
*drb
= DDR_B (ddr
);
1731 if (DDR_ARE_DEPENDENT (ddr
) != chrec_known
1732 && ((DR_IS_READ (dra
) && DR_IS_WRITE (drb
)
1733 && gimple_uid (DR_STMT (dra
)) > gimple_uid (DR_STMT (drb
)))
1734 || (DR_IS_READ (drb
) && DR_IS_WRITE (dra
)
1735 && gimple_uid (DR_STMT (drb
)) > gimple_uid (DR_STMT (dra
)))
1736 || (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))))
1738 free_dependence_relations (then_ddrs
);
1739 free_dependence_relations (else_ddrs
);
1740 free_data_refs (then_datarefs
);
1741 free_data_refs (else_datarefs
);
1742 then_stores
.release ();
1743 else_stores
.release ();
1748 /* Check that there are no read-after-write or write-after-write dependencies
1750 FOR_EACH_VEC_ELT (else_ddrs
, i
, ddr
)
1752 struct data_reference
*dra
= DDR_A (ddr
);
1753 struct data_reference
*drb
= DDR_B (ddr
);
1755 if (DDR_ARE_DEPENDENT (ddr
) != chrec_known
1756 && ((DR_IS_READ (dra
) && DR_IS_WRITE (drb
)
1757 && gimple_uid (DR_STMT (dra
)) > gimple_uid (DR_STMT (drb
)))
1758 || (DR_IS_READ (drb
) && DR_IS_WRITE (dra
)
1759 && gimple_uid (DR_STMT (drb
)) > gimple_uid (DR_STMT (dra
)))
1760 || (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))))
1762 free_dependence_relations (then_ddrs
);
1763 free_dependence_relations (else_ddrs
);
1764 free_data_refs (then_datarefs
);
1765 free_data_refs (else_datarefs
);
1766 then_stores
.release ();
1767 else_stores
.release ();
1772 /* Sink stores with same LHS. */
1773 FOR_EACH_VEC_ELT (then_stores
, i
, then_store
)
1775 else_store
= else_stores
[i
];
1776 res
= cond_if_else_store_replacement_1 (then_bb
, else_bb
, join_bb
,
1777 then_store
, else_store
);
1781 free_dependence_relations (then_ddrs
);
1782 free_dependence_relations (else_ddrs
);
1783 free_data_refs (then_datarefs
);
1784 free_data_refs (else_datarefs
);
1785 then_stores
.release ();
1786 else_stores
.release ();
1791 /* Return TRUE if STMT has a VUSE whose corresponding VDEF is in BB. */
1794 local_mem_dependence (gimple stmt
, basic_block bb
)
1796 tree vuse
= gimple_vuse (stmt
);
1802 def
= SSA_NAME_DEF_STMT (vuse
);
1803 return (def
&& gimple_bb (def
) == bb
);
1806 /* Given a "diamond" control-flow pattern where BB0 tests a condition,
1807 BB1 and BB2 are "then" and "else" blocks dependent on this test,
1808 and BB3 rejoins control flow following BB1 and BB2, look for
1809 opportunities to hoist loads as follows. If BB3 contains a PHI of
1810 two loads, one each occurring in BB1 and BB2, and the loads are
1811 provably of adjacent fields in the same structure, then move both
1812 loads into BB0. Of course this can only be done if there are no
1813 dependencies preventing such motion.
1815 One of the hoisted loads will always be speculative, so the
1816 transformation is currently conservative:
1818 - The fields must be strictly adjacent.
1819 - The two fields must occupy a single memory block that is
1820 guaranteed to not cross a page boundary.
1822 The last is difficult to prove, as such memory blocks should be
1823 aligned on the minimum of the stack alignment boundary and the
1824 alignment guaranteed by heap allocation interfaces. Thus we rely
1825 on a parameter for the alignment value.
1827 Provided a good value is used for the last case, the first
1828 restriction could possibly be relaxed. */
1831 hoist_adjacent_loads (basic_block bb0
, basic_block bb1
,
1832 basic_block bb2
, basic_block bb3
)
1834 int param_align
= PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE
);
1835 unsigned param_align_bits
= (unsigned) (param_align
* BITS_PER_UNIT
);
1836 gimple_stmt_iterator gsi
;
1838 /* Walk the phis in bb3 looking for an opportunity. We are looking
1839 for phis of two SSA names, one each of which is defined in bb1 and
1841 for (gsi
= gsi_start_phis (bb3
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1843 gimple phi_stmt
= gsi_stmt (gsi
);
1844 gimple def1
, def2
, defswap
;
1845 tree arg1
, arg2
, ref1
, ref2
, field1
, field2
, fieldswap
;
1846 tree tree_offset1
, tree_offset2
, tree_size2
, next
;
1847 int offset1
, offset2
, size2
;
1849 gimple_stmt_iterator gsi2
;
1850 basic_block bb_for_def1
, bb_for_def2
;
1852 if (gimple_phi_num_args (phi_stmt
) != 2
1853 || virtual_operand_p (gimple_phi_result (phi_stmt
)))
1856 arg1
= gimple_phi_arg_def (phi_stmt
, 0);
1857 arg2
= gimple_phi_arg_def (phi_stmt
, 1);
1859 if (TREE_CODE (arg1
) != SSA_NAME
1860 || TREE_CODE (arg2
) != SSA_NAME
1861 || SSA_NAME_IS_DEFAULT_DEF (arg1
)
1862 || SSA_NAME_IS_DEFAULT_DEF (arg2
))
1865 def1
= SSA_NAME_DEF_STMT (arg1
);
1866 def2
= SSA_NAME_DEF_STMT (arg2
);
1868 if ((gimple_bb (def1
) != bb1
|| gimple_bb (def2
) != bb2
)
1869 && (gimple_bb (def2
) != bb1
|| gimple_bb (def1
) != bb2
))
1872 /* Check the mode of the arguments to be sure a conditional move
1873 can be generated for it. */
1874 if (optab_handler (movcc_optab
, TYPE_MODE (TREE_TYPE (arg1
)))
1875 == CODE_FOR_nothing
)
1878 /* Both statements must be assignments whose RHS is a COMPONENT_REF. */
1879 if (!gimple_assign_single_p (def1
)
1880 || !gimple_assign_single_p (def2
)
1881 || gimple_has_volatile_ops (def1
)
1882 || gimple_has_volatile_ops (def2
))
1885 ref1
= gimple_assign_rhs1 (def1
);
1886 ref2
= gimple_assign_rhs1 (def2
);
1888 if (TREE_CODE (ref1
) != COMPONENT_REF
1889 || TREE_CODE (ref2
) != COMPONENT_REF
)
1892 /* The zeroth operand of the two component references must be
1893 identical. It is not sufficient to compare get_base_address of
1894 the two references, because this could allow for different
1895 elements of the same array in the two trees. It is not safe to
1896 assume that the existence of one array element implies the
1897 existence of a different one. */
1898 if (!operand_equal_p (TREE_OPERAND (ref1
, 0), TREE_OPERAND (ref2
, 0), 0))
1901 field1
= TREE_OPERAND (ref1
, 1);
1902 field2
= TREE_OPERAND (ref2
, 1);
1904 /* Check for field adjacency, and ensure field1 comes first. */
1905 for (next
= DECL_CHAIN (field1
);
1906 next
&& TREE_CODE (next
) != FIELD_DECL
;
1907 next
= DECL_CHAIN (next
))
1912 for (next
= DECL_CHAIN (field2
);
1913 next
&& TREE_CODE (next
) != FIELD_DECL
;
1914 next
= DECL_CHAIN (next
))
1928 bb_for_def1
= gimple_bb (def1
);
1929 bb_for_def2
= gimple_bb (def2
);
1931 /* Check for proper alignment of the first field. */
1932 tree_offset1
= bit_position (field1
);
1933 tree_offset2
= bit_position (field2
);
1934 tree_size2
= DECL_SIZE (field2
);
1936 if (!host_integerp (tree_offset1
, 1)
1937 || !host_integerp (tree_offset2
, 1)
1938 || !host_integerp (tree_size2
, 1))
1941 offset1
= TREE_INT_CST_LOW (tree_offset1
);
1942 offset2
= TREE_INT_CST_LOW (tree_offset2
);
1943 size2
= TREE_INT_CST_LOW (tree_size2
);
1944 align1
= DECL_ALIGN (field1
) % param_align_bits
;
1946 if (offset1
% BITS_PER_UNIT
!= 0)
1949 /* For profitability, the two field references should fit within
1950 a single cache line. */
1951 if (align1
+ offset2
- offset1
+ size2
> param_align_bits
)
1954 /* The two expressions cannot be dependent upon vdefs defined
1956 if (local_mem_dependence (def1
, bb_for_def1
)
1957 || local_mem_dependence (def2
, bb_for_def2
))
1960 /* The conditions are satisfied; hoist the loads from bb1 and bb2 into
1961 bb0. We hoist the first one first so that a cache miss is handled
1962 efficiently regardless of hardware cache-fill policy. */
1963 gsi2
= gsi_for_stmt (def1
);
1964 gsi_move_to_bb_end (&gsi2
, bb0
);
1965 gsi2
= gsi_for_stmt (def2
);
1966 gsi_move_to_bb_end (&gsi2
, bb0
);
1968 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1971 "\nHoisting adjacent loads from %d and %d into %d: \n",
1972 bb_for_def1
->index
, bb_for_def2
->index
, bb0
->index
);
1973 print_gimple_stmt (dump_file
, def1
, 0, TDF_VOPS
|TDF_MEMSYMS
);
1974 print_gimple_stmt (dump_file
, def2
, 0, TDF_VOPS
|TDF_MEMSYMS
);
1979 /* Determine whether we should attempt to hoist adjacent loads out of
1980 diamond patterns in pass_phiopt. Always hoist loads if
1981 -fhoist-adjacent-loads is specified and the target machine has
1982 both a conditional move instruction and a defined cache line size. */
1985 gate_hoist_loads (void)
1987 return (flag_hoist_adjacent_loads
== 1
1988 && PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE
)
1989 && HAVE_conditional_move
);
1992 /* Always do these optimizations if we have SSA
1993 trees to work on. */
2000 struct gimple_opt_pass pass_phiopt
=
2004 "phiopt", /* name */
2005 OPTGROUP_NONE
, /* optinfo_flags */
2006 gate_phiopt
, /* gate */
2007 tree_ssa_phiopt
, /* execute */
2010 0, /* static_pass_number */
2011 TV_TREE_PHIOPT
, /* tv_id */
2012 PROP_cfg
| PROP_ssa
, /* properties_required */
2013 0, /* properties_provided */
2014 0, /* properties_destroyed */
2015 0, /* todo_flags_start */
2019 | TODO_verify_stmts
/* todo_flags_finish */
2026 return flag_tree_cselim
;
2029 struct gimple_opt_pass pass_cselim
=
2033 "cselim", /* name */
2034 OPTGROUP_NONE
, /* optinfo_flags */
2035 gate_cselim
, /* gate */
2036 tree_ssa_cs_elim
, /* execute */
2039 0, /* static_pass_number */
2040 TV_TREE_PHIOPT
, /* tv_id */
2041 PROP_cfg
| PROP_ssa
, /* properties_required */
2042 0, /* properties_provided */
2043 0, /* properties_destroyed */
2044 0, /* todo_flags_start */
2048 | TODO_verify_stmts
/* todo_flags_finish */