1 /* Shrink-wrapping related optimizations.
2 Copyright (C) 1987-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file handles shrink-wrapping related optimizations. */
24 #include "coretypes.h"
26 #include "rtl-error.h"
31 #include "fold-const.h"
32 #include "stor-layout.h"
34 #include "stringpool.h"
37 #include "hard-reg-set.h"
40 #include "insn-config.h"
48 #include "insn-codes.h"
55 #include "langhooks.h"
57 #include "common/common-target.h"
58 #include "gimple-expr.h"
60 #include "tree-pass.h"
62 #include "dominance.h"
65 #include "basic-block.h"
68 #include "bb-reorder.h"
69 #include "shrink-wrap.h"
74 /* Return true if INSN requires the stack frame to be set up.
75 PROLOGUE_USED contains the hard registers used in the function
76 prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the
77 prologue to set up for the function. */
79 requires_stack_frame_p (rtx_insn
*insn
, HARD_REG_SET prologue_used
,
80 HARD_REG_SET set_up_by_prologue
)
83 HARD_REG_SET hardregs
;
87 return !SIBLING_CALL_P (insn
);
89 /* We need a frame to get the unique CFA expected by the unwinder. */
90 if (cfun
->can_throw_non_call_exceptions
&& can_throw_internal (insn
))
93 CLEAR_HARD_REG_SET (hardregs
);
94 FOR_EACH_INSN_DEF (def
, insn
)
96 rtx dreg
= DF_REF_REG (def
);
101 add_to_hard_reg_set (&hardregs
, GET_MODE (dreg
),
104 if (hard_reg_set_intersect_p (hardregs
, prologue_used
))
106 AND_COMPL_HARD_REG_SET (hardregs
, call_used_reg_set
);
107 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
108 if (TEST_HARD_REG_BIT (hardregs
, regno
)
109 && df_regs_ever_live_p (regno
))
112 FOR_EACH_INSN_USE (use
, insn
)
114 rtx reg
= DF_REF_REG (use
);
119 add_to_hard_reg_set (&hardregs
, GET_MODE (reg
),
122 if (hard_reg_set_intersect_p (hardregs
, set_up_by_prologue
))
128 /* See whether there has a single live edge from BB, which dest uses
129 [REGNO, END_REGNO). Return the live edge if its dest bb has
130 one or two predecessors. Otherwise return NULL. */
133 live_edge_for_reg (basic_block bb
, int regno
, int end_regno
)
141 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
143 live
= df_get_live_in (e
->dest
);
144 for (i
= regno
; i
< end_regno
; i
++)
145 if (REGNO_REG_SET_P (live
, i
))
147 if (live_edge
&& live_edge
!= e
)
153 /* We can sometimes encounter dead code. Don't try to move it
154 into the exit block. */
155 if (!live_edge
|| live_edge
->dest
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
158 /* Reject targets of abnormal edges. This is needed for correctness
159 on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
160 exception edges even though it is generally treated as call-saved
161 for the majority of the compilation. Moving across abnormal edges
162 isn't going to be interesting for shrink-wrap usage anyway. */
163 if (live_edge
->flags
& EDGE_ABNORMAL
)
166 /* When live_edge->dest->preds == 2, we can create a new block on
167 the edge to make it meet the requirement. */
168 if (EDGE_COUNT (live_edge
->dest
->preds
) > 2)
174 /* Try to move INSN from BB to a successor. Return true on success.
175 USES and DEFS are the set of registers that are used and defined
176 after INSN in BB. SPLIT_P indicates whether a live edge from BB
177 is splitted or not. */
180 move_insn_for_shrink_wrap (basic_block bb
, rtx_insn
*insn
,
181 const HARD_REG_SET uses
,
182 const HARD_REG_SET defs
,
186 bitmap live_out
, live_in
, bb_uses
, bb_defs
;
187 unsigned int i
, dregno
, end_dregno
;
188 unsigned int sregno
= FIRST_PSEUDO_REGISTER
;
189 unsigned int end_sregno
= FIRST_PSEUDO_REGISTER
;
190 basic_block next_block
;
193 /* Look for a simple register assignment. We don't use single_set here
194 because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
198 set
= PATTERN (insn
);
199 if (GET_CODE (set
) != SET
)
202 dest
= SET_DEST (set
);
204 /* For the destination, we want only a register. Also disallow STACK
205 or FRAME related adjustments. They are likely part of the prologue,
206 so keep them in the entry block. */
208 || dest
== stack_pointer_rtx
209 || dest
== frame_pointer_rtx
210 || dest
== hard_frame_pointer_rtx
)
213 /* For the source, we want one of:
214 (1) A (non-overlapping) register
216 (3) An expression involving no more than one register.
218 That last point comes from the code following, which was originally
219 written to handle only register move operations, and still only handles
220 a single source register when checking for overlaps. Happily, the
221 same checks can be applied to expressions like (plus reg const). */
223 if (CONSTANT_P (src
))
225 else if (!REG_P (src
))
227 rtx src_inner
= NULL_RTX
;
229 if (can_throw_internal (insn
))
232 subrtx_var_iterator::array_type array
;
233 FOR_EACH_SUBRTX_VAR (iter
, array
, src
, ALL
)
236 switch (GET_RTX_CLASS (GET_CODE (x
)))
240 case RTX_COMM_COMPARE
:
245 /* Constant or expression. Continue. */
250 switch (GET_CODE (x
))
254 case STRICT_LOW_PART
:
261 /* Fail if we see a second inner register. */
262 if (src_inner
!= NULL
)
277 if (src_inner
!= NULL
)
281 /* Make sure that the source register isn't defined later in BB. */
284 sregno
= REGNO (src
);
285 end_sregno
= END_REGNO (src
);
286 if (overlaps_hard_reg_set_p (defs
, GET_MODE (src
), sregno
))
290 /* Make sure that the destination register isn't referenced later in BB. */
291 dregno
= REGNO (dest
);
292 end_dregno
= END_REGNO (dest
);
293 if (overlaps_hard_reg_set_p (uses
, GET_MODE (dest
), dregno
)
294 || overlaps_hard_reg_set_p (defs
, GET_MODE (dest
), dregno
))
297 /* See whether there is a successor block to which we could move INSN. */
298 live_edge
= live_edge_for_reg (bb
, dregno
, end_dregno
);
302 next_block
= live_edge
->dest
;
303 /* Create a new basic block on the edge. */
304 if (EDGE_COUNT (next_block
->preds
) == 2)
306 /* split_edge for a block with only one successor is meaningless. */
307 if (EDGE_COUNT (bb
->succs
) == 1)
310 /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */
314 basic_block old_dest
= live_edge
->dest
;
315 next_block
= split_edge (live_edge
);
317 /* We create a new basic block. Call df_grow_bb_info to make sure
318 all data structures are allocated. */
319 df_grow_bb_info (df_live
);
321 bitmap_and (df_get_live_in (next_block
), df_get_live_out (bb
),
322 df_get_live_in (old_dest
));
323 df_set_bb_dirty (next_block
);
325 /* We should not split more than once for a function. */
332 /* At this point we are committed to moving INSN, but let's try to
333 move it as far as we can. */
336 live_out
= df_get_live_out (bb
);
337 live_in
= df_get_live_in (next_block
);
340 /* Check whether BB uses DEST or clobbers DEST. We need to add
341 INSN to BB if so. Either way, DEST is no longer live on entry,
342 except for any part that overlaps SRC (next loop). */
343 bb_uses
= &DF_LR_BB_INFO (bb
)->use
;
344 bb_defs
= &DF_LR_BB_INFO (bb
)->def
;
347 for (i
= dregno
; i
< end_dregno
; i
++)
350 || REGNO_REG_SET_P (bb_uses
, i
)
351 || REGNO_REG_SET_P (bb_defs
, i
)
352 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb
)->gen
, i
))
354 CLEAR_REGNO_REG_SET (live_out
, i
);
355 CLEAR_REGNO_REG_SET (live_in
, i
);
358 /* Check whether BB clobbers SRC. We need to add INSN to BB if so.
359 Either way, SRC is now live on entry. */
360 for (i
= sregno
; i
< end_sregno
; i
++)
363 || REGNO_REG_SET_P (bb_defs
, i
)
364 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb
)->gen
, i
))
366 SET_REGNO_REG_SET (live_out
, i
);
367 SET_REGNO_REG_SET (live_in
, i
);
372 /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
373 DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e.
374 at -O1, just give up searching NEXT_BLOCK. */
376 for (i
= dregno
; i
< end_dregno
; i
++)
378 CLEAR_REGNO_REG_SET (live_out
, i
);
379 CLEAR_REGNO_REG_SET (live_in
, i
);
382 for (i
= sregno
; i
< end_sregno
; i
++)
384 SET_REGNO_REG_SET (live_out
, i
);
385 SET_REGNO_REG_SET (live_in
, i
);
389 /* If we don't need to add the move to BB, look for a single
393 live_edge
= live_edge_for_reg (next_block
, dregno
, end_dregno
);
394 if (!live_edge
|| EDGE_COUNT (live_edge
->dest
->preds
) > 1)
396 next_block
= live_edge
->dest
;
401 /* For the new created basic block, there is no dataflow info at all.
402 So skip the following dataflow update and check. */
405 /* BB now defines DEST. It only uses the parts of DEST that overlap SRC
407 for (i
= dregno
; i
< end_dregno
; i
++)
409 CLEAR_REGNO_REG_SET (bb_uses
, i
);
410 SET_REGNO_REG_SET (bb_defs
, i
);
413 /* BB now uses SRC. */
414 for (i
= sregno
; i
< end_sregno
; i
++)
415 SET_REGNO_REG_SET (bb_uses
, i
);
418 emit_insn_after (PATTERN (insn
), bb_note (bb
));
423 /* Look for register copies in the first block of the function, and move
424 them down into successor blocks if the register is used only on one
425 path. This exposes more opportunities for shrink-wrapping. These
426 kinds of sets often occur when incoming argument registers are moved
427 to call-saved registers because their values are live across one or
428 more calls during the function. */
431 prepare_shrink_wrap (basic_block entry_block
)
433 rtx_insn
*insn
, *curr
;
435 HARD_REG_SET uses
, defs
;
437 bool split_p
= false;
439 if (JUMP_P (BB_END (entry_block
)))
441 /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
442 to sink the copies from parameter to callee saved register out of
443 entry block. copyprop_hardreg_forward_bb_without_debug_insn is called
444 to release some dependences. */
445 copyprop_hardreg_forward_bb_without_debug_insn (entry_block
);
448 CLEAR_HARD_REG_SET (uses
);
449 CLEAR_HARD_REG_SET (defs
);
450 FOR_BB_INSNS_REVERSE_SAFE (entry_block
, insn
, curr
)
451 if (NONDEBUG_INSN_P (insn
)
452 && !move_insn_for_shrink_wrap (entry_block
, insn
, uses
, defs
,
455 /* Add all defined registers to DEFs. */
456 FOR_EACH_INSN_DEF (def
, insn
)
458 x
= DF_REF_REG (def
);
459 if (REG_P (x
) && HARD_REGISTER_P (x
))
460 SET_HARD_REG_BIT (defs
, REGNO (x
));
463 /* Add all used registers to USESs. */
464 FOR_EACH_INSN_USE (use
, insn
)
466 x
= DF_REF_REG (use
);
467 if (REG_P (x
) && HARD_REGISTER_P (x
))
468 SET_HARD_REG_BIT (uses
, REGNO (x
));
473 /* Create a copy of BB instructions and insert at BEFORE. Redirect
474 preds of BB to COPY_BB if they don't appear in NEED_PROLOGUE. */
476 dup_block_and_redirect (basic_block bb
, basic_block copy_bb
, rtx_insn
*before
,
477 bitmap_head
*need_prologue
)
481 rtx_insn
*insn
= BB_END (bb
);
483 /* We know BB has a single successor, so there is no need to copy a
484 simple jump at the end of BB. */
485 if (simplejump_p (insn
))
486 insn
= PREV_INSN (insn
);
489 duplicate_insn_chain (BB_HEAD (bb
), insn
);
493 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
494 if (active_insn_p (insn
))
496 fprintf (dump_file
, "Duplicating bb %d to bb %d, %u active insns.\n",
497 bb
->index
, copy_bb
->index
, count
);
501 emit_insn_before (insn
, before
);
503 /* Redirect all the paths that need no prologue into copy_bb. */
504 for (ei
= ei_start (bb
->preds
); (e
= ei_safe_edge (ei
));)
505 if (!bitmap_bit_p (need_prologue
, e
->src
->index
))
507 int freq
= EDGE_FREQUENCY (e
);
508 copy_bb
->count
+= e
->count
;
509 copy_bb
->frequency
+= EDGE_FREQUENCY (e
);
510 e
->dest
->count
-= e
->count
;
511 if (e
->dest
->count
< 0)
513 e
->dest
->frequency
-= freq
;
514 if (e
->dest
->frequency
< 0)
515 e
->dest
->frequency
= 0;
516 redirect_edge_and_branch_force (e
, copy_bb
);
524 /* Try to perform a kind of shrink-wrapping, making sure the
525 prologue/epilogue is emitted only around those parts of the
526 function that require it. */
529 try_shrink_wrapping (edge
*entry_edge
, edge orig_entry_edge
,
530 bitmap_head
*bb_flags
, rtx_insn
*prologue_seq
)
534 bool nonempty_prologue
= false;
535 unsigned max_grow_size
;
538 for (seq
= prologue_seq
; seq
; seq
= NEXT_INSN (seq
))
539 if (!NOTE_P (seq
) || NOTE_KIND (seq
) != NOTE_INSN_PROLOGUE_END
)
541 nonempty_prologue
= true;
545 if (flag_shrink_wrap
&& HAVE_simple_return
546 && (targetm
.profile_before_prologue () || !crtl
->profile
)
547 && nonempty_prologue
&& !crtl
->calls_eh_return
)
549 HARD_REG_SET prologue_clobbered
, prologue_used
, live_on_edge
;
550 struct hard_reg_set_container set_up_by_prologue
;
552 vec
<basic_block
> vec
;
554 bitmap_head bb_antic_flags
;
555 bitmap_head bb_on_list
;
559 fprintf (dump_file
, "Attempting shrink-wrapping optimization.\n");
561 /* Compute the registers set and used in the prologue. */
562 CLEAR_HARD_REG_SET (prologue_clobbered
);
563 CLEAR_HARD_REG_SET (prologue_used
);
564 for (p_insn
= prologue_seq
; p_insn
; p_insn
= NEXT_INSN (p_insn
))
566 HARD_REG_SET this_used
;
567 if (!NONDEBUG_INSN_P (p_insn
))
570 CLEAR_HARD_REG_SET (this_used
);
571 note_uses (&PATTERN (p_insn
), record_hard_reg_uses
,
573 AND_COMPL_HARD_REG_SET (this_used
, prologue_clobbered
);
574 IOR_HARD_REG_SET (prologue_used
, this_used
);
575 note_stores (PATTERN (p_insn
), record_hard_reg_sets
,
576 &prologue_clobbered
);
579 prepare_shrink_wrap ((*entry_edge
)->dest
);
581 bitmap_initialize (&bb_antic_flags
, &bitmap_default_obstack
);
582 bitmap_initialize (&bb_on_list
, &bitmap_default_obstack
);
583 bitmap_initialize (&bb_tail
, &bitmap_default_obstack
);
585 /* Find the set of basic blocks that require a stack frame,
586 and blocks that are too big to be duplicated. */
588 vec
.create (n_basic_blocks_for_fn (cfun
));
590 CLEAR_HARD_REG_SET (set_up_by_prologue
.set
);
591 add_to_hard_reg_set (&set_up_by_prologue
.set
, Pmode
,
592 STACK_POINTER_REGNUM
);
593 add_to_hard_reg_set (&set_up_by_prologue
.set
, Pmode
, ARG_POINTER_REGNUM
);
594 if (frame_pointer_needed
)
595 add_to_hard_reg_set (&set_up_by_prologue
.set
, Pmode
,
596 HARD_FRAME_POINTER_REGNUM
);
597 if (pic_offset_table_rtx
598 && (unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
599 add_to_hard_reg_set (&set_up_by_prologue
.set
, Pmode
,
600 PIC_OFFSET_TABLE_REGNUM
);
602 add_to_hard_reg_set (&set_up_by_prologue
.set
,
603 GET_MODE (crtl
->drap_reg
),
604 REGNO (crtl
->drap_reg
));
605 if (targetm
.set_up_by_prologue
)
606 targetm
.set_up_by_prologue (&set_up_by_prologue
);
608 /* We don't use a different max size depending on
609 optimize_bb_for_speed_p because increasing shrink-wrapping
610 opportunities by duplicating tail blocks can actually result
611 in an overall decrease in code size. */
612 max_grow_size
= get_uncond_jump_length ();
613 max_grow_size
*= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS
);
615 FOR_EACH_BB_FN (bb
, cfun
)
620 FOR_BB_INSNS (bb
, insn
)
621 if (NONDEBUG_INSN_P (insn
))
623 if (requires_stack_frame_p (insn
, prologue_used
,
624 set_up_by_prologue
.set
))
626 if (bb
== (*entry_edge
)->dest
)
627 goto fail_shrinkwrap
;
628 bitmap_set_bit (bb_flags
, bb
->index
);
632 else if (size
<= max_grow_size
)
634 size
+= get_attr_min_length (insn
);
635 if (size
> max_grow_size
)
636 bitmap_set_bit (&bb_on_list
, bb
->index
);
641 /* Blocks that really need a prologue, or are too big for tails. */
642 bitmap_ior_into (&bb_on_list
, bb_flags
);
644 /* For every basic block that needs a prologue, mark all blocks
645 reachable from it, so as to ensure they are also seen as
646 requiring a prologue. */
647 while (!vec
.is_empty ())
649 basic_block tmp_bb
= vec
.pop ();
651 FOR_EACH_EDGE (e
, ei
, tmp_bb
->succs
)
652 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
653 && bitmap_set_bit (bb_flags
, e
->dest
->index
))
654 vec
.quick_push (e
->dest
);
657 /* Find the set of basic blocks that need no prologue, have a
658 single successor, can be duplicated, meet a max size
659 requirement, and go to the exit via like blocks. */
660 vec
.quick_push (EXIT_BLOCK_PTR_FOR_FN (cfun
));
661 while (!vec
.is_empty ())
663 basic_block tmp_bb
= vec
.pop ();
665 FOR_EACH_EDGE (e
, ei
, tmp_bb
->preds
)
666 if (single_succ_p (e
->src
)
667 && !bitmap_bit_p (&bb_on_list
, e
->src
->index
)
668 && can_duplicate_block_p (e
->src
))
673 /* If there is predecessor of e->src which doesn't
674 need prologue and the edge is complex,
675 we might not be able to redirect the branch
676 to a copy of e->src. */
677 FOR_EACH_EDGE (pe
, pei
, e
->src
->preds
)
678 if ((pe
->flags
& EDGE_COMPLEX
) != 0
679 && !bitmap_bit_p (bb_flags
, pe
->src
->index
))
681 if (pe
== NULL
&& bitmap_set_bit (&bb_tail
, e
->src
->index
))
682 vec
.quick_push (e
->src
);
686 /* Now walk backwards from every block that is marked as needing
687 a prologue to compute the bb_antic_flags bitmap. Exclude
688 tail blocks; They can be duplicated to be used on paths not
689 needing a prologue. */
690 bitmap_clear (&bb_on_list
);
691 bitmap_and_compl (&bb_antic_flags
, bb_flags
, &bb_tail
);
692 FOR_EACH_BB_FN (bb
, cfun
)
694 if (!bitmap_bit_p (&bb_antic_flags
, bb
->index
))
696 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
697 if (!bitmap_bit_p (&bb_antic_flags
, e
->src
->index
)
698 && bitmap_set_bit (&bb_on_list
, e
->src
->index
))
699 vec
.quick_push (e
->src
);
701 while (!vec
.is_empty ())
703 basic_block tmp_bb
= vec
.pop ();
706 bitmap_clear_bit (&bb_on_list
, tmp_bb
->index
);
707 FOR_EACH_EDGE (e
, ei
, tmp_bb
->succs
)
708 if (!bitmap_bit_p (&bb_antic_flags
, e
->dest
->index
))
716 bitmap_set_bit (&bb_antic_flags
, tmp_bb
->index
);
717 FOR_EACH_EDGE (e
, ei
, tmp_bb
->preds
)
718 if (!bitmap_bit_p (&bb_antic_flags
, e
->src
->index
)
719 && bitmap_set_bit (&bb_on_list
, e
->src
->index
))
720 vec
.quick_push (e
->src
);
723 /* Find exactly one edge that leads to a block in ANTIC from
724 a block that isn't. */
725 if (!bitmap_bit_p (&bb_antic_flags
, (*entry_edge
)->dest
->index
))
726 FOR_EACH_BB_FN (bb
, cfun
)
728 if (!bitmap_bit_p (&bb_antic_flags
, bb
->index
))
730 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
731 if (!bitmap_bit_p (&bb_antic_flags
, e
->src
->index
))
733 if (*entry_edge
!= orig_entry_edge
)
735 *entry_edge
= orig_entry_edge
;
737 fprintf (dump_file
, "More than one candidate edge.\n");
738 goto fail_shrinkwrap
;
741 fprintf (dump_file
, "Found candidate edge for "
742 "shrink-wrapping, %d->%d.\n", e
->src
->index
,
748 if (*entry_edge
!= orig_entry_edge
)
750 /* Test whether the prologue is known to clobber any register
751 (other than FP or SP) which are live on the edge. */
752 CLEAR_HARD_REG_BIT (prologue_clobbered
, STACK_POINTER_REGNUM
);
753 if (frame_pointer_needed
)
754 CLEAR_HARD_REG_BIT (prologue_clobbered
, HARD_FRAME_POINTER_REGNUM
);
755 REG_SET_TO_HARD_REG_SET (live_on_edge
,
756 df_get_live_in ((*entry_edge
)->dest
));
757 if (hard_reg_set_intersect_p (live_on_edge
, prologue_clobbered
))
759 *entry_edge
= orig_entry_edge
;
762 "Shrink-wrapping aborted due to clobber.\n");
765 if (*entry_edge
!= orig_entry_edge
)
767 crtl
->shrink_wrapped
= true;
769 fprintf (dump_file
, "Performing shrink-wrapping.\n");
771 /* Find tail blocks reachable from both blocks needing a
772 prologue and blocks not needing a prologue. */
773 if (!bitmap_empty_p (&bb_tail
))
774 FOR_EACH_BB_FN (bb
, cfun
)
776 bool some_pro
, some_no_pro
;
777 if (!bitmap_bit_p (&bb_tail
, bb
->index
))
779 some_pro
= some_no_pro
= false;
780 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
782 if (bitmap_bit_p (bb_flags
, e
->src
->index
))
787 if (some_pro
&& some_no_pro
)
790 bitmap_clear_bit (&bb_tail
, bb
->index
);
792 /* Find the head of each tail. */
793 while (!vec
.is_empty ())
795 basic_block tbb
= vec
.pop ();
797 if (!bitmap_bit_p (&bb_tail
, tbb
->index
))
800 while (single_succ_p (tbb
))
802 tbb
= single_succ (tbb
);
803 bitmap_clear_bit (&bb_tail
, tbb
->index
);
806 /* Now duplicate the tails. */
807 if (!bitmap_empty_p (&bb_tail
))
808 FOR_EACH_BB_REVERSE_FN (bb
, cfun
)
810 basic_block copy_bb
, tbb
;
813 if (!bitmap_clear_bit (&bb_tail
, bb
->index
))
816 /* Create a copy of BB, instructions and all, for
817 use on paths that don't need a prologue.
818 Ideal placement of the copy is on a fall-thru edge
819 or after a block that would jump to the copy. */
820 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
821 if (!bitmap_bit_p (bb_flags
, e
->src
->index
)
822 && single_succ_p (e
->src
))
826 /* Make sure we insert after any barriers. */
827 rtx_insn
*end
= get_last_bb_insn (e
->src
);
828 copy_bb
= create_basic_block (NEXT_INSN (end
),
830 BB_COPY_PARTITION (copy_bb
, e
->src
);
834 /* Otherwise put the copy at the end of the function. */
835 copy_bb
= create_basic_block (NULL_RTX
, NULL_RTX
,
836 EXIT_BLOCK_PTR_FOR_FN (cfun
)->prev_bb
);
837 BB_COPY_PARTITION (copy_bb
, bb
);
840 rtx_note
*insert_point
= emit_note_after (NOTE_INSN_DELETED
,
842 emit_barrier_after (BB_END (copy_bb
));
847 dup_block_and_redirect (tbb
, copy_bb
, insert_point
,
849 tbb
= single_succ (tbb
);
850 if (tbb
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
852 e
= split_block (copy_bb
, PREV_INSN (insert_point
));
856 /* Quiet verify_flow_info by (ab)using EDGE_FAKE.
857 We have yet to add a simple_return to the tails,
858 as we'd like to first convert_jumps_to_returns in
859 case the block is no longer used after that. */
861 if (CALL_P (PREV_INSN (insert_point
))
862 && SIBLING_CALL_P (PREV_INSN (insert_point
)))
863 eflags
= EDGE_SIBCALL
| EDGE_ABNORMAL
;
864 make_single_succ_edge (copy_bb
, EXIT_BLOCK_PTR_FOR_FN (cfun
),
867 /* verify_flow_info doesn't like a note after a
869 delete_insn (insert_point
);
870 if (bitmap_empty_p (&bb_tail
))
876 bitmap_clear (&bb_tail
);
877 bitmap_clear (&bb_antic_flags
);
878 bitmap_clear (&bb_on_list
);
883 /* If we're allowed to generate a simple return instruction, then by
884 definition we don't need a full epilogue. If the last basic
885 block before the exit block does not contain active instructions,
886 examine its predecessors and try to emit (conditional) return
890 get_unconverted_simple_return (edge exit_fallthru_edge
, bitmap_head bb_flags
,
891 vec
<edge
> *unconverted_simple_returns
,
892 rtx_insn
**returnjump
)
898 /* convert_jumps_to_returns may add to preds of the exit block
899 (but won't remove). Stop at end of current preds. */
900 last
= EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
);
901 for (i
= 0; i
< last
; i
++)
903 edge e
= EDGE_I (EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
, i
);
904 if (LABEL_P (BB_HEAD (e
->src
))
905 && !bitmap_bit_p (&bb_flags
, e
->src
->index
)
906 && !active_insn_between (BB_HEAD (e
->src
), BB_END (e
->src
)))
907 *unconverted_simple_returns
908 = convert_jumps_to_returns (e
->src
, true,
909 *unconverted_simple_returns
);
913 if (exit_fallthru_edge
!= NULL
914 && EDGE_COUNT (exit_fallthru_edge
->src
->preds
) != 0
915 && !bitmap_bit_p (&bb_flags
, exit_fallthru_edge
->src
->index
))
919 last_bb
= emit_return_for_exit (exit_fallthru_edge
, true);
920 *returnjump
= BB_END (last_bb
);
921 exit_fallthru_edge
= NULL
;
923 return exit_fallthru_edge
;
926 /* If there were branches to an empty LAST_BB which we tried to
927 convert to conditional simple_returns, but couldn't for some
928 reason, create a block to hold a simple_return insn and redirect
929 those remaining edges. */
932 convert_to_simple_return (edge entry_edge
, edge orig_entry_edge
,
933 bitmap_head bb_flags
, rtx_insn
*returnjump
,
934 vec
<edge
> unconverted_simple_returns
)
939 if (!unconverted_simple_returns
.is_empty ())
941 basic_block simple_return_block_hot
= NULL
;
942 basic_block simple_return_block_cold
= NULL
;
943 edge pending_edge_hot
= NULL
;
944 edge pending_edge_cold
= NULL
;
945 basic_block exit_pred
;
948 gcc_assert (entry_edge
!= orig_entry_edge
);
950 /* See if we can reuse the last insn that was emitted for the
952 if (returnjump
!= NULL_RTX
953 && JUMP_LABEL (returnjump
) == simple_return_rtx
)
955 e
= split_block (BLOCK_FOR_INSN (returnjump
), PREV_INSN (returnjump
));
956 if (BB_PARTITION (e
->src
) == BB_HOT_PARTITION
)
957 simple_return_block_hot
= e
->dest
;
959 simple_return_block_cold
= e
->dest
;
962 /* Also check returns we might need to add to tail blocks. */
963 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
964 if (EDGE_COUNT (e
->src
->preds
) != 0
965 && (e
->flags
& EDGE_FAKE
) != 0
966 && !bitmap_bit_p (&bb_flags
, e
->src
->index
))
968 if (BB_PARTITION (e
->src
) == BB_HOT_PARTITION
)
969 pending_edge_hot
= e
;
971 pending_edge_cold
= e
;
974 /* Save a pointer to the exit's predecessor BB for use in
975 inserting new BBs at the end of the function. Do this
976 after the call to split_block above which may split
977 the original exit pred. */
978 exit_pred
= EXIT_BLOCK_PTR_FOR_FN (cfun
)->prev_bb
;
980 FOR_EACH_VEC_ELT (unconverted_simple_returns
, i
, e
)
982 basic_block
*pdest_bb
;
985 if (BB_PARTITION (e
->src
) == BB_HOT_PARTITION
)
987 pdest_bb
= &simple_return_block_hot
;
988 pending
= pending_edge_hot
;
992 pdest_bb
= &simple_return_block_cold
;
993 pending
= pending_edge_cold
;
996 if (*pdest_bb
== NULL
&& pending
!= NULL
)
998 emit_return_into_block (true, pending
->src
);
999 pending
->flags
&= ~(EDGE_FALLTHRU
| EDGE_FAKE
);
1000 *pdest_bb
= pending
->src
;
1002 else if (*pdest_bb
== NULL
)
1006 bb
= create_basic_block (NULL
, NULL
, exit_pred
);
1007 BB_COPY_PARTITION (bb
, e
->src
);
1008 rtx_jump_insn
*start
= emit_jump_insn_after (gen_simple_return (),
1010 JUMP_LABEL (start
) = simple_return_rtx
;
1011 emit_barrier_after (start
);
1014 make_edge (bb
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
1016 redirect_edge_and_branch_force (e
, *pdest_bb
);
1018 unconverted_simple_returns
.release ();
1021 if (entry_edge
!= orig_entry_edge
)
1023 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
1024 if (EDGE_COUNT (e
->src
->preds
) != 0
1025 && (e
->flags
& EDGE_FAKE
) != 0
1026 && !bitmap_bit_p (&bb_flags
, e
->src
->index
))
1028 emit_return_into_block (true, e
->src
);
1029 e
->flags
&= ~(EDGE_FALLTHRU
| EDGE_FAKE
);