IPA ICF, part 4/5
[official-gcc.git] / gcc / shrink-wrap.c
blob5f6f27cc7b004ccf59a61a83d19e2abbb24fb8e3
1 /* Shrink-wrapping related optimizations.
2 Copyright (C) 1987-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file handles shrink-wrapping related optimizations. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl-error.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "varasm.h"
30 #include "stringpool.h"
31 #include "flags.h"
32 #include "except.h"
33 #include "function.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "libfuncs.h"
37 #include "regs.h"
38 #include "hard-reg-set.h"
39 #include "insn-config.h"
40 #include "recog.h"
41 #include "output.h"
42 #include "hashtab.h"
43 #include "tm_p.h"
44 #include "langhooks.h"
45 #include "target.h"
46 #include "common/common-target.h"
47 #include "gimple-expr.h"
48 #include "gimplify.h"
49 #include "tree-pass.h"
50 #include "predict.h"
51 #include "df.h"
52 #include "params.h"
53 #include "bb-reorder.h"
54 #include "shrink-wrap.h"
55 #include "regcprop.h"
56 #include "rtl-iter.h"
58 #ifdef HAVE_simple_return
60 /* Return true if INSN requires the stack frame to be set up.
61 PROLOGUE_USED contains the hard registers used in the function
62 prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the
63 prologue to set up for the function. */
64 bool
65 requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
66 HARD_REG_SET set_up_by_prologue)
68 df_ref def, use;
69 HARD_REG_SET hardregs;
70 unsigned regno;
72 if (CALL_P (insn))
73 return !SIBLING_CALL_P (insn);
75 /* We need a frame to get the unique CFA expected by the unwinder. */
76 if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
77 return true;
79 CLEAR_HARD_REG_SET (hardregs);
80 FOR_EACH_INSN_DEF (def, insn)
82 rtx dreg = DF_REF_REG (def);
84 if (!REG_P (dreg))
85 continue;
87 add_to_hard_reg_set (&hardregs, GET_MODE (dreg),
88 REGNO (dreg));
90 if (hard_reg_set_intersect_p (hardregs, prologue_used))
91 return true;
92 AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
93 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
94 if (TEST_HARD_REG_BIT (hardregs, regno)
95 && df_regs_ever_live_p (regno))
96 return true;
98 FOR_EACH_INSN_USE (use, insn)
100 rtx reg = DF_REF_REG (use);
102 if (!REG_P (reg))
103 continue;
105 add_to_hard_reg_set (&hardregs, GET_MODE (reg),
106 REGNO (reg));
108 if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
109 return true;
111 return false;
114 /* See whether there has a single live edge from BB, which dest uses
115 [REGNO, END_REGNO). Return the live edge if its dest bb has
116 one or two predecessors. Otherwise return NULL. */
118 static edge
119 live_edge_for_reg (basic_block bb, int regno, int end_regno)
121 edge e, live_edge;
122 edge_iterator ei;
123 bitmap live;
124 int i;
126 live_edge = NULL;
127 FOR_EACH_EDGE (e, ei, bb->succs)
129 live = df_get_live_in (e->dest);
130 for (i = regno; i < end_regno; i++)
131 if (REGNO_REG_SET_P (live, i))
133 if (live_edge && live_edge != e)
134 return NULL;
135 live_edge = e;
139 /* We can sometimes encounter dead code. Don't try to move it
140 into the exit block. */
141 if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
142 return NULL;
144 /* Reject targets of abnormal edges. This is needed for correctness
145 on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
146 exception edges even though it is generally treated as call-saved
147 for the majority of the compilation. Moving across abnormal edges
148 isn't going to be interesting for shrink-wrap usage anyway. */
149 if (live_edge->flags & EDGE_ABNORMAL)
150 return NULL;
152 /* When live_edge->dest->preds == 2, we can create a new block on
153 the edge to make it meet the requirement. */
154 if (EDGE_COUNT (live_edge->dest->preds) > 2)
155 return NULL;
157 return live_edge;
160 /* Try to move INSN from BB to a successor. Return true on success.
161 USES and DEFS are the set of registers that are used and defined
162 after INSN in BB. SPLIT_P indicates whether a live edge from BB
163 is splitted or not. */
165 static bool
166 move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
167 const HARD_REG_SET uses,
168 const HARD_REG_SET defs,
169 bool *split_p)
171 rtx set, src, dest;
172 bitmap live_out, live_in, bb_uses, bb_defs;
173 unsigned int i, dregno, end_dregno;
174 unsigned int sregno = FIRST_PSEUDO_REGISTER;
175 unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
176 basic_block next_block;
177 edge live_edge;
179 /* Look for a simple register assignment. We don't use single_set here
180 because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
181 destinations. */
182 if (!INSN_P (insn))
183 return false;
184 set = PATTERN (insn);
185 if (GET_CODE (set) != SET)
186 return false;
187 src = SET_SRC (set);
188 dest = SET_DEST (set);
190 /* For the destination, we want only a register. Also disallow STACK
191 or FRAME related adjustments. They are likely part of the prologue,
192 so keep them in the entry block. */
193 if (!REG_P (dest)
194 || dest == stack_pointer_rtx
195 || dest == frame_pointer_rtx
196 || dest == hard_frame_pointer_rtx)
197 return false;
199 /* For the source, we want one of:
200 (1) A (non-overlapping) register
201 (2) A constant,
202 (3) An expression involving no more than one register.
204 That last point comes from the code following, which was originally
205 written to handle only register move operations, and still only handles
206 a single source register when checking for overlaps. Happily, the
207 same checks can be applied to expressions like (plus reg const). */
209 if (CONSTANT_P (src))
211 else if (!REG_P (src))
213 rtx src_inner = NULL_RTX;
215 if (can_throw_internal (insn))
216 return false;
218 subrtx_var_iterator::array_type array;
219 FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
221 rtx x = *iter;
222 switch (GET_RTX_CLASS (GET_CODE (x)))
224 case RTX_CONST_OBJ:
225 case RTX_COMPARE:
226 case RTX_COMM_COMPARE:
227 case RTX_BIN_ARITH:
228 case RTX_COMM_ARITH:
229 case RTX_UNARY:
230 case RTX_TERNARY:
231 /* Constant or expression. Continue. */
232 break;
234 case RTX_OBJ:
235 case RTX_EXTRA:
236 switch (GET_CODE (x))
238 case UNSPEC:
239 case SUBREG:
240 case STRICT_LOW_PART:
241 case PC:
242 /* Ok. Continue. */
243 break;
245 case REG:
246 /* Fail if we see a second inner register. */
247 if (src_inner != NULL)
248 return false;
249 src_inner = x;
250 break;
252 default:
253 return false;
255 break;
257 default:
258 return false;
262 if (src_inner != NULL)
263 src = src_inner;
266 /* Make sure that the source register isn't defined later in BB. */
267 if (REG_P (src))
269 sregno = REGNO (src);
270 end_sregno = END_REGNO (src);
271 if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
272 return false;
275 /* Make sure that the destination register isn't referenced later in BB. */
276 dregno = REGNO (dest);
277 end_dregno = END_REGNO (dest);
278 if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
279 || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
280 return false;
282 /* See whether there is a successor block to which we could move INSN. */
283 live_edge = live_edge_for_reg (bb, dregno, end_dregno);
284 if (!live_edge)
285 return false;
287 next_block = live_edge->dest;
288 /* Create a new basic block on the edge. */
289 if (EDGE_COUNT (next_block->preds) == 2)
291 /* split_edge for a block with only one successor is meaningless. */
292 if (EDGE_COUNT (bb->succs) == 1)
293 return false;
295 /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */
296 if (!df_live)
297 return false;
299 basic_block old_dest = live_edge->dest;
300 next_block = split_edge (live_edge);
302 /* We create a new basic block. Call df_grow_bb_info to make sure
303 all data structures are allocated. */
304 df_grow_bb_info (df_live);
306 bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
307 df_get_live_in (old_dest));
308 df_set_bb_dirty (next_block);
310 /* We should not split more than once for a function. */
311 if (*split_p)
312 return false;
314 *split_p = true;
317 /* At this point we are committed to moving INSN, but let's try to
318 move it as far as we can. */
321 live_out = df_get_live_out (bb);
322 live_in = df_get_live_in (next_block);
323 bb = next_block;
325 /* Check whether BB uses DEST or clobbers DEST. We need to add
326 INSN to BB if so. Either way, DEST is no longer live on entry,
327 except for any part that overlaps SRC (next loop). */
328 bb_uses = &DF_LR_BB_INFO (bb)->use;
329 bb_defs = &DF_LR_BB_INFO (bb)->def;
330 if (df_live)
332 for (i = dregno; i < end_dregno; i++)
334 if (*split_p
335 || REGNO_REG_SET_P (bb_uses, i)
336 || REGNO_REG_SET_P (bb_defs, i)
337 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
338 next_block = NULL;
339 CLEAR_REGNO_REG_SET (live_out, i);
340 CLEAR_REGNO_REG_SET (live_in, i);
343 /* Check whether BB clobbers SRC. We need to add INSN to BB if so.
344 Either way, SRC is now live on entry. */
345 for (i = sregno; i < end_sregno; i++)
347 if (*split_p
348 || REGNO_REG_SET_P (bb_defs, i)
349 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
350 next_block = NULL;
351 SET_REGNO_REG_SET (live_out, i);
352 SET_REGNO_REG_SET (live_in, i);
355 else
357 /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
358 DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e.
359 at -O1, just give up searching NEXT_BLOCK. */
360 next_block = NULL;
361 for (i = dregno; i < end_dregno; i++)
363 CLEAR_REGNO_REG_SET (live_out, i);
364 CLEAR_REGNO_REG_SET (live_in, i);
367 for (i = sregno; i < end_sregno; i++)
369 SET_REGNO_REG_SET (live_out, i);
370 SET_REGNO_REG_SET (live_in, i);
374 /* If we don't need to add the move to BB, look for a single
375 successor block. */
376 if (next_block)
378 live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
379 if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
380 break;
381 next_block = live_edge->dest;
384 while (next_block);
386 /* For the new created basic block, there is no dataflow info at all.
387 So skip the following dataflow update and check. */
388 if (!(*split_p))
390 /* BB now defines DEST. It only uses the parts of DEST that overlap SRC
391 (next loop). */
392 for (i = dregno; i < end_dregno; i++)
394 CLEAR_REGNO_REG_SET (bb_uses, i);
395 SET_REGNO_REG_SET (bb_defs, i);
398 /* BB now uses SRC. */
399 for (i = sregno; i < end_sregno; i++)
400 SET_REGNO_REG_SET (bb_uses, i);
403 emit_insn_after (PATTERN (insn), bb_note (bb));
404 delete_insn (insn);
405 return true;
408 /* Look for register copies in the first block of the function, and move
409 them down into successor blocks if the register is used only on one
410 path. This exposes more opportunities for shrink-wrapping. These
411 kinds of sets often occur when incoming argument registers are moved
412 to call-saved registers because their values are live across one or
413 more calls during the function. */
415 void
416 prepare_shrink_wrap (basic_block entry_block)
418 rtx_insn *insn, *curr;
419 rtx x;
420 HARD_REG_SET uses, defs;
421 df_ref def, use;
422 bool split_p = false;
424 if (JUMP_P (BB_END (entry_block)))
426 /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
427 to sink the copies from parameter to callee saved register out of
428 entry block. copyprop_hardreg_forward_bb_without_debug_insn is called
429 to release some dependences. */
430 copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
433 CLEAR_HARD_REG_SET (uses);
434 CLEAR_HARD_REG_SET (defs);
435 FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
436 if (NONDEBUG_INSN_P (insn)
437 && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
438 &split_p))
440 /* Add all defined registers to DEFs. */
441 FOR_EACH_INSN_DEF (def, insn)
443 x = DF_REF_REG (def);
444 if (REG_P (x) && HARD_REGISTER_P (x))
445 SET_HARD_REG_BIT (defs, REGNO (x));
448 /* Add all used registers to USESs. */
449 FOR_EACH_INSN_USE (use, insn)
451 x = DF_REF_REG (use);
452 if (REG_P (x) && HARD_REGISTER_P (x))
453 SET_HARD_REG_BIT (uses, REGNO (x));
458 /* Create a copy of BB instructions and insert at BEFORE. Redirect
459 preds of BB to COPY_BB if they don't appear in NEED_PROLOGUE. */
460 void
461 dup_block_and_redirect (basic_block bb, basic_block copy_bb, rtx_insn *before,
462 bitmap_head *need_prologue)
464 edge_iterator ei;
465 edge e;
466 rtx_insn *insn = BB_END (bb);
468 /* We know BB has a single successor, so there is no need to copy a
469 simple jump at the end of BB. */
470 if (simplejump_p (insn))
471 insn = PREV_INSN (insn);
473 start_sequence ();
474 duplicate_insn_chain (BB_HEAD (bb), insn);
475 if (dump_file)
477 unsigned count = 0;
478 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
479 if (active_insn_p (insn))
480 ++count;
481 fprintf (dump_file, "Duplicating bb %d to bb %d, %u active insns.\n",
482 bb->index, copy_bb->index, count);
484 insn = get_insns ();
485 end_sequence ();
486 emit_insn_before (insn, before);
488 /* Redirect all the paths that need no prologue into copy_bb. */
489 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
490 if (!bitmap_bit_p (need_prologue, e->src->index))
492 int freq = EDGE_FREQUENCY (e);
493 copy_bb->count += e->count;
494 copy_bb->frequency += EDGE_FREQUENCY (e);
495 e->dest->count -= e->count;
496 if (e->dest->count < 0)
497 e->dest->count = 0;
498 e->dest->frequency -= freq;
499 if (e->dest->frequency < 0)
500 e->dest->frequency = 0;
501 redirect_edge_and_branch_force (e, copy_bb);
502 continue;
504 else
505 ei_next (&ei);
509 /* Try to perform a kind of shrink-wrapping, making sure the
510 prologue/epilogue is emitted only around those parts of the
511 function that require it. */
513 void
514 try_shrink_wrapping (edge *entry_edge, edge orig_entry_edge,
515 bitmap_head *bb_flags, rtx_insn *prologue_seq)
517 edge e;
518 edge_iterator ei;
519 bool nonempty_prologue = false;
520 unsigned max_grow_size;
521 rtx_insn *seq;
523 for (seq = prologue_seq; seq; seq = NEXT_INSN (seq))
524 if (!NOTE_P (seq) || NOTE_KIND (seq) != NOTE_INSN_PROLOGUE_END)
526 nonempty_prologue = true;
527 break;
530 if (flag_shrink_wrap && HAVE_simple_return
531 && (targetm.profile_before_prologue () || !crtl->profile)
532 && nonempty_prologue && !crtl->calls_eh_return)
534 HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge;
535 struct hard_reg_set_container set_up_by_prologue;
536 rtx_insn *p_insn;
537 vec<basic_block> vec;
538 basic_block bb;
539 bitmap_head bb_antic_flags;
540 bitmap_head bb_on_list;
541 bitmap_head bb_tail;
543 if (dump_file)
544 fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
546 /* Compute the registers set and used in the prologue. */
547 CLEAR_HARD_REG_SET (prologue_clobbered);
548 CLEAR_HARD_REG_SET (prologue_used);
549 for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
551 HARD_REG_SET this_used;
552 if (!NONDEBUG_INSN_P (p_insn))
553 continue;
555 CLEAR_HARD_REG_SET (this_used);
556 note_uses (&PATTERN (p_insn), record_hard_reg_uses,
557 &this_used);
558 AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
559 IOR_HARD_REG_SET (prologue_used, this_used);
560 note_stores (PATTERN (p_insn), record_hard_reg_sets,
561 &prologue_clobbered);
564 prepare_shrink_wrap ((*entry_edge)->dest);
566 bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack);
567 bitmap_initialize (&bb_on_list, &bitmap_default_obstack);
568 bitmap_initialize (&bb_tail, &bitmap_default_obstack);
570 /* Find the set of basic blocks that require a stack frame,
571 and blocks that are too big to be duplicated. */
573 vec.create (n_basic_blocks_for_fn (cfun));
575 CLEAR_HARD_REG_SET (set_up_by_prologue.set);
576 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
577 STACK_POINTER_REGNUM);
578 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
579 if (frame_pointer_needed)
580 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
581 HARD_FRAME_POINTER_REGNUM);
582 if (pic_offset_table_rtx
583 && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
584 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
585 PIC_OFFSET_TABLE_REGNUM);
586 if (crtl->drap_reg)
587 add_to_hard_reg_set (&set_up_by_prologue.set,
588 GET_MODE (crtl->drap_reg),
589 REGNO (crtl->drap_reg));
590 if (targetm.set_up_by_prologue)
591 targetm.set_up_by_prologue (&set_up_by_prologue);
593 /* We don't use a different max size depending on
594 optimize_bb_for_speed_p because increasing shrink-wrapping
595 opportunities by duplicating tail blocks can actually result
596 in an overall decrease in code size. */
597 max_grow_size = get_uncond_jump_length ();
598 max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
600 FOR_EACH_BB_FN (bb, cfun)
602 rtx_insn *insn;
603 unsigned size = 0;
605 FOR_BB_INSNS (bb, insn)
606 if (NONDEBUG_INSN_P (insn))
608 if (requires_stack_frame_p (insn, prologue_used,
609 set_up_by_prologue.set))
611 if (bb == (*entry_edge)->dest)
612 goto fail_shrinkwrap;
613 bitmap_set_bit (bb_flags, bb->index);
614 vec.quick_push (bb);
615 break;
617 else if (size <= max_grow_size)
619 size += get_attr_min_length (insn);
620 if (size > max_grow_size)
621 bitmap_set_bit (&bb_on_list, bb->index);
626 /* Blocks that really need a prologue, or are too big for tails. */
627 bitmap_ior_into (&bb_on_list, bb_flags);
629 /* For every basic block that needs a prologue, mark all blocks
630 reachable from it, so as to ensure they are also seen as
631 requiring a prologue. */
632 while (!vec.is_empty ())
634 basic_block tmp_bb = vec.pop ();
636 FOR_EACH_EDGE (e, ei, tmp_bb->succs)
637 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
638 && bitmap_set_bit (bb_flags, e->dest->index))
639 vec.quick_push (e->dest);
642 /* Find the set of basic blocks that need no prologue, have a
643 single successor, can be duplicated, meet a max size
644 requirement, and go to the exit via like blocks. */
645 vec.quick_push (EXIT_BLOCK_PTR_FOR_FN (cfun));
646 while (!vec.is_empty ())
648 basic_block tmp_bb = vec.pop ();
650 FOR_EACH_EDGE (e, ei, tmp_bb->preds)
651 if (single_succ_p (e->src)
652 && !bitmap_bit_p (&bb_on_list, e->src->index)
653 && can_duplicate_block_p (e->src))
655 edge pe;
656 edge_iterator pei;
658 /* If there is predecessor of e->src which doesn't
659 need prologue and the edge is complex,
660 we might not be able to redirect the branch
661 to a copy of e->src. */
662 FOR_EACH_EDGE (pe, pei, e->src->preds)
663 if ((pe->flags & EDGE_COMPLEX) != 0
664 && !bitmap_bit_p (bb_flags, pe->src->index))
665 break;
666 if (pe == NULL && bitmap_set_bit (&bb_tail, e->src->index))
667 vec.quick_push (e->src);
671 /* Now walk backwards from every block that is marked as needing
672 a prologue to compute the bb_antic_flags bitmap. Exclude
673 tail blocks; They can be duplicated to be used on paths not
674 needing a prologue. */
675 bitmap_clear (&bb_on_list);
676 bitmap_and_compl (&bb_antic_flags, bb_flags, &bb_tail);
677 FOR_EACH_BB_FN (bb, cfun)
679 if (!bitmap_bit_p (&bb_antic_flags, bb->index))
680 continue;
681 FOR_EACH_EDGE (e, ei, bb->preds)
682 if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
683 && bitmap_set_bit (&bb_on_list, e->src->index))
684 vec.quick_push (e->src);
686 while (!vec.is_empty ())
688 basic_block tmp_bb = vec.pop ();
689 bool all_set = true;
691 bitmap_clear_bit (&bb_on_list, tmp_bb->index);
692 FOR_EACH_EDGE (e, ei, tmp_bb->succs)
693 if (!bitmap_bit_p (&bb_antic_flags, e->dest->index))
695 all_set = false;
696 break;
699 if (all_set)
701 bitmap_set_bit (&bb_antic_flags, tmp_bb->index);
702 FOR_EACH_EDGE (e, ei, tmp_bb->preds)
703 if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
704 && bitmap_set_bit (&bb_on_list, e->src->index))
705 vec.quick_push (e->src);
708 /* Find exactly one edge that leads to a block in ANTIC from
709 a block that isn't. */
710 if (!bitmap_bit_p (&bb_antic_flags, (*entry_edge)->dest->index))
711 FOR_EACH_BB_FN (bb, cfun)
713 if (!bitmap_bit_p (&bb_antic_flags, bb->index))
714 continue;
715 FOR_EACH_EDGE (e, ei, bb->preds)
716 if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
718 if (*entry_edge != orig_entry_edge)
720 *entry_edge = orig_entry_edge;
721 if (dump_file)
722 fprintf (dump_file, "More than one candidate edge.\n");
723 goto fail_shrinkwrap;
725 if (dump_file)
726 fprintf (dump_file, "Found candidate edge for "
727 "shrink-wrapping, %d->%d.\n", e->src->index,
728 e->dest->index);
729 *entry_edge = e;
733 if (*entry_edge != orig_entry_edge)
735 /* Test whether the prologue is known to clobber any register
736 (other than FP or SP) which are live on the edge. */
737 CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
738 if (frame_pointer_needed)
739 CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
740 REG_SET_TO_HARD_REG_SET (live_on_edge,
741 df_get_live_in ((*entry_edge)->dest));
742 if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered))
744 *entry_edge = orig_entry_edge;
745 if (dump_file)
746 fprintf (dump_file,
747 "Shrink-wrapping aborted due to clobber.\n");
750 if (*entry_edge != orig_entry_edge)
752 crtl->shrink_wrapped = true;
753 if (dump_file)
754 fprintf (dump_file, "Performing shrink-wrapping.\n");
756 /* Find tail blocks reachable from both blocks needing a
757 prologue and blocks not needing a prologue. */
758 if (!bitmap_empty_p (&bb_tail))
759 FOR_EACH_BB_FN (bb, cfun)
761 bool some_pro, some_no_pro;
762 if (!bitmap_bit_p (&bb_tail, bb->index))
763 continue;
764 some_pro = some_no_pro = false;
765 FOR_EACH_EDGE (e, ei, bb->preds)
767 if (bitmap_bit_p (bb_flags, e->src->index))
768 some_pro = true;
769 else
770 some_no_pro = true;
772 if (some_pro && some_no_pro)
773 vec.quick_push (bb);
774 else
775 bitmap_clear_bit (&bb_tail, bb->index);
777 /* Find the head of each tail. */
778 while (!vec.is_empty ())
780 basic_block tbb = vec.pop ();
782 if (!bitmap_bit_p (&bb_tail, tbb->index))
783 continue;
785 while (single_succ_p (tbb))
787 tbb = single_succ (tbb);
788 bitmap_clear_bit (&bb_tail, tbb->index);
791 /* Now duplicate the tails. */
792 if (!bitmap_empty_p (&bb_tail))
793 FOR_EACH_BB_REVERSE_FN (bb, cfun)
795 basic_block copy_bb, tbb;
796 rtx_insn *insert_point;
797 int eflags;
799 if (!bitmap_clear_bit (&bb_tail, bb->index))
800 continue;
802 /* Create a copy of BB, instructions and all, for
803 use on paths that don't need a prologue.
804 Ideal placement of the copy is on a fall-thru edge
805 or after a block that would jump to the copy. */
806 FOR_EACH_EDGE (e, ei, bb->preds)
807 if (!bitmap_bit_p (bb_flags, e->src->index)
808 && single_succ_p (e->src))
809 break;
810 if (e)
812 /* Make sure we insert after any barriers. */
813 rtx_insn *end = get_last_bb_insn (e->src);
814 copy_bb = create_basic_block (NEXT_INSN (end),
815 NULL_RTX, e->src);
816 BB_COPY_PARTITION (copy_bb, e->src);
818 else
820 /* Otherwise put the copy at the end of the function. */
821 copy_bb = create_basic_block (NULL_RTX, NULL_RTX,
822 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
823 BB_COPY_PARTITION (copy_bb, bb);
826 insert_point = emit_note_after (NOTE_INSN_DELETED,
827 BB_END (copy_bb));
828 emit_barrier_after (BB_END (copy_bb));
830 tbb = bb;
831 while (1)
833 dup_block_and_redirect (tbb, copy_bb, insert_point,
834 bb_flags);
835 tbb = single_succ (tbb);
836 if (tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
837 break;
838 e = split_block (copy_bb, PREV_INSN (insert_point));
839 copy_bb = e->dest;
842 /* Quiet verify_flow_info by (ab)using EDGE_FAKE.
843 We have yet to add a simple_return to the tails,
844 as we'd like to first convert_jumps_to_returns in
845 case the block is no longer used after that. */
846 eflags = EDGE_FAKE;
847 if (CALL_P (PREV_INSN (insert_point))
848 && SIBLING_CALL_P (PREV_INSN (insert_point)))
849 eflags = EDGE_SIBCALL | EDGE_ABNORMAL;
850 make_single_succ_edge (copy_bb, EXIT_BLOCK_PTR_FOR_FN (cfun),
851 eflags);
853 /* verify_flow_info doesn't like a note after a
854 sibling call. */
855 delete_insn (insert_point);
856 if (bitmap_empty_p (&bb_tail))
857 break;
861 fail_shrinkwrap:
862 bitmap_clear (&bb_tail);
863 bitmap_clear (&bb_antic_flags);
864 bitmap_clear (&bb_on_list);
865 vec.release ();
869 /* If we're allowed to generate a simple return instruction, then by
870 definition we don't need a full epilogue. If the last basic
871 block before the exit block does not contain active instructions,
872 examine its predecessors and try to emit (conditional) return
873 instructions. */
875 edge
876 get_unconverted_simple_return (edge exit_fallthru_edge, bitmap_head bb_flags,
877 vec<edge> *unconverted_simple_returns,
878 rtx_insn **returnjump)
880 if (optimize)
882 unsigned i, last;
884 /* convert_jumps_to_returns may add to preds of the exit block
885 (but won't remove). Stop at end of current preds. */
886 last = EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds);
887 for (i = 0; i < last; i++)
889 edge e = EDGE_I (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds, i);
890 if (LABEL_P (BB_HEAD (e->src))
891 && !bitmap_bit_p (&bb_flags, e->src->index)
892 && !active_insn_between (BB_HEAD (e->src), BB_END (e->src)))
893 *unconverted_simple_returns
894 = convert_jumps_to_returns (e->src, true,
895 *unconverted_simple_returns);
899 if (exit_fallthru_edge != NULL
900 && EDGE_COUNT (exit_fallthru_edge->src->preds) != 0
901 && !bitmap_bit_p (&bb_flags, exit_fallthru_edge->src->index))
903 basic_block last_bb;
905 last_bb = emit_return_for_exit (exit_fallthru_edge, true);
906 *returnjump = BB_END (last_bb);
907 exit_fallthru_edge = NULL;
909 return exit_fallthru_edge;
912 /* If there were branches to an empty LAST_BB which we tried to
913 convert to conditional simple_returns, but couldn't for some
914 reason, create a block to hold a simple_return insn and redirect
915 those remaining edges. */
917 void
918 convert_to_simple_return (edge entry_edge, edge orig_entry_edge,
919 bitmap_head bb_flags, rtx_insn *returnjump,
920 vec<edge> unconverted_simple_returns)
922 edge e;
923 edge_iterator ei;
925 if (!unconverted_simple_returns.is_empty ())
927 basic_block simple_return_block_hot = NULL;
928 basic_block simple_return_block_cold = NULL;
929 edge pending_edge_hot = NULL;
930 edge pending_edge_cold = NULL;
931 basic_block exit_pred;
932 int i;
934 gcc_assert (entry_edge != orig_entry_edge);
936 /* See if we can reuse the last insn that was emitted for the
937 epilogue. */
938 if (returnjump != NULL_RTX
939 && JUMP_LABEL (returnjump) == simple_return_rtx)
941 e = split_block (BLOCK_FOR_INSN (returnjump), PREV_INSN (returnjump));
942 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
943 simple_return_block_hot = e->dest;
944 else
945 simple_return_block_cold = e->dest;
948 /* Also check returns we might need to add to tail blocks. */
949 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
950 if (EDGE_COUNT (e->src->preds) != 0
951 && (e->flags & EDGE_FAKE) != 0
952 && !bitmap_bit_p (&bb_flags, e->src->index))
954 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
955 pending_edge_hot = e;
956 else
957 pending_edge_cold = e;
960 /* Save a pointer to the exit's predecessor BB for use in
961 inserting new BBs at the end of the function. Do this
962 after the call to split_block above which may split
963 the original exit pred. */
964 exit_pred = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
966 FOR_EACH_VEC_ELT (unconverted_simple_returns, i, e)
968 basic_block *pdest_bb;
969 edge pending;
971 if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
973 pdest_bb = &simple_return_block_hot;
974 pending = pending_edge_hot;
976 else
978 pdest_bb = &simple_return_block_cold;
979 pending = pending_edge_cold;
982 if (*pdest_bb == NULL && pending != NULL)
984 emit_return_into_block (true, pending->src);
985 pending->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
986 *pdest_bb = pending->src;
988 else if (*pdest_bb == NULL)
990 basic_block bb;
991 rtx_insn *start;
993 bb = create_basic_block (NULL, NULL, exit_pred);
994 BB_COPY_PARTITION (bb, e->src);
995 start = emit_jump_insn_after (gen_simple_return (),
996 BB_END (bb));
997 JUMP_LABEL (start) = simple_return_rtx;
998 emit_barrier_after (start);
1000 *pdest_bb = bb;
1001 make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
1003 redirect_edge_and_branch_force (e, *pdest_bb);
1005 unconverted_simple_returns.release ();
1008 if (entry_edge != orig_entry_edge)
1010 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1011 if (EDGE_COUNT (e->src->preds) != 0
1012 && (e->flags & EDGE_FAKE) != 0
1013 && !bitmap_bit_p (&bb_flags, e->src->index))
1015 emit_return_into_block (true, e->src);
1016 e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
1021 #endif