* config/visium/visium.c (visium_gimplify_va_arg): Emit a big-endian
[official-gcc.git] / gcc / shrink-wrap.c
blob7285775e5e000198817456c84c235af8094ed349
1 /* Shrink-wrapping related optimizations.
2 Copyright (C) 1987-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file handles shrink-wrapping related optimizations. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "cfghooks.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "insn-config.h"
34 #include "emit-rtl.h"
35 #include "output.h"
36 #include "tree-pass.h"
37 #include "cfgrtl.h"
38 #include "cfgbuild.h"
39 #include "params.h"
40 #include "bb-reorder.h"
41 #include "shrink-wrap.h"
42 #include "regcprop.h"
43 #include "rtl-iter.h"
44 #include "valtrack.h"
47 /* Return true if INSN requires the stack frame to be set up.
48 PROLOGUE_USED contains the hard registers used in the function
49 prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the
50 prologue to set up for the function. */
51 bool
52 requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
53 HARD_REG_SET set_up_by_prologue)
55 df_ref def, use;
56 HARD_REG_SET hardregs;
57 unsigned regno;
59 if (CALL_P (insn))
60 return !SIBLING_CALL_P (insn);
62 /* We need a frame to get the unique CFA expected by the unwinder. */
63 if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
64 return true;
66 CLEAR_HARD_REG_SET (hardregs);
67 FOR_EACH_INSN_DEF (def, insn)
69 rtx dreg = DF_REF_REG (def);
71 if (!REG_P (dreg))
72 continue;
74 add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg));
76 if (hard_reg_set_intersect_p (hardregs, prologue_used))
77 return true;
78 AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
79 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
80 if (TEST_HARD_REG_BIT (hardregs, regno)
81 && df_regs_ever_live_p (regno))
82 return true;
84 FOR_EACH_INSN_USE (use, insn)
86 rtx reg = DF_REF_REG (use);
88 if (!REG_P (reg))
89 continue;
91 add_to_hard_reg_set (&hardregs, GET_MODE (reg),
92 REGNO (reg));
94 if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
95 return true;
97 return false;
100 /* See whether there has a single live edge from BB, which dest uses
101 [REGNO, END_REGNO). Return the live edge if its dest bb has
102 one or two predecessors. Otherwise return NULL. */
104 static edge
105 live_edge_for_reg (basic_block bb, int regno, int end_regno)
107 edge e, live_edge;
108 edge_iterator ei;
109 bitmap live;
110 int i;
112 live_edge = NULL;
113 FOR_EACH_EDGE (e, ei, bb->succs)
115 live = df_get_live_in (e->dest);
116 for (i = regno; i < end_regno; i++)
117 if (REGNO_REG_SET_P (live, i))
119 if (live_edge && live_edge != e)
120 return NULL;
121 live_edge = e;
125 /* We can sometimes encounter dead code. Don't try to move it
126 into the exit block. */
127 if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
128 return NULL;
130 /* Reject targets of abnormal edges. This is needed for correctness
131 on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
132 exception edges even though it is generally treated as call-saved
133 for the majority of the compilation. Moving across abnormal edges
134 isn't going to be interesting for shrink-wrap usage anyway. */
135 if (live_edge->flags & EDGE_ABNORMAL)
136 return NULL;
138 /* When live_edge->dest->preds == 2, we can create a new block on
139 the edge to make it meet the requirement. */
140 if (EDGE_COUNT (live_edge->dest->preds) > 2)
141 return NULL;
143 return live_edge;
146 /* Try to move INSN from BB to a successor. Return true on success.
147 USES and DEFS are the set of registers that are used and defined
148 after INSN in BB. SPLIT_P indicates whether a live edge from BB
149 is splitted or not. */
151 static bool
152 move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
153 const HARD_REG_SET uses,
154 const HARD_REG_SET defs,
155 bool *split_p,
156 struct dead_debug_local *debug)
158 rtx set, src, dest;
159 bitmap live_out, live_in, bb_uses, bb_defs;
160 unsigned int i, dregno, end_dregno;
161 unsigned int sregno = FIRST_PSEUDO_REGISTER;
162 unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
163 basic_block next_block;
164 edge live_edge;
165 rtx_insn *dinsn;
166 df_ref def;
168 /* Look for a simple register assignment. We don't use single_set here
169 because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
170 destinations. */
171 if (!INSN_P (insn))
172 return false;
173 set = PATTERN (insn);
174 if (GET_CODE (set) != SET)
175 return false;
176 src = SET_SRC (set);
177 dest = SET_DEST (set);
179 /* For the destination, we want only a register. Also disallow STACK
180 or FRAME related adjustments. They are likely part of the prologue,
181 so keep them in the entry block. */
182 if (!REG_P (dest)
183 || dest == stack_pointer_rtx
184 || dest == frame_pointer_rtx
185 || dest == hard_frame_pointer_rtx)
186 return false;
188 /* For the source, we want one of:
189 (1) A (non-overlapping) register
190 (2) A constant,
191 (3) An expression involving no more than one register.
193 That last point comes from the code following, which was originally
194 written to handle only register move operations, and still only handles
195 a single source register when checking for overlaps. Happily, the
196 same checks can be applied to expressions like (plus reg const). */
198 if (CONSTANT_P (src))
200 else if (!REG_P (src))
202 rtx src_inner = NULL_RTX;
204 if (can_throw_internal (insn))
205 return false;
207 subrtx_var_iterator::array_type array;
208 FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
210 rtx x = *iter;
211 switch (GET_RTX_CLASS (GET_CODE (x)))
213 case RTX_CONST_OBJ:
214 case RTX_COMPARE:
215 case RTX_COMM_COMPARE:
216 case RTX_BIN_ARITH:
217 case RTX_COMM_ARITH:
218 case RTX_UNARY:
219 case RTX_TERNARY:
220 /* Constant or expression. Continue. */
221 break;
223 case RTX_OBJ:
224 case RTX_EXTRA:
225 switch (GET_CODE (x))
227 case UNSPEC:
228 case SUBREG:
229 case STRICT_LOW_PART:
230 case PC:
231 case LO_SUM:
232 /* Ok. Continue. */
233 break;
235 case REG:
236 /* Fail if we see a second inner register. */
237 if (src_inner != NULL)
238 return false;
239 src_inner = x;
240 break;
242 default:
243 return false;
245 break;
247 default:
248 return false;
252 if (src_inner != NULL)
253 src = src_inner;
256 /* Make sure that the source register isn't defined later in BB. */
257 if (REG_P (src))
259 sregno = REGNO (src);
260 end_sregno = END_REGNO (src);
261 if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
262 return false;
265 /* Make sure that the destination register isn't referenced later in BB. */
266 dregno = REGNO (dest);
267 end_dregno = END_REGNO (dest);
268 if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
269 || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
270 return false;
272 /* See whether there is a successor block to which we could move INSN. */
273 live_edge = live_edge_for_reg (bb, dregno, end_dregno);
274 if (!live_edge)
275 return false;
277 next_block = live_edge->dest;
278 /* Create a new basic block on the edge. */
279 if (EDGE_COUNT (next_block->preds) == 2)
281 /* split_edge for a block with only one successor is meaningless. */
282 if (EDGE_COUNT (bb->succs) == 1)
283 return false;
285 /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */
286 if (!df_live)
287 return false;
289 basic_block old_dest = live_edge->dest;
290 next_block = split_edge (live_edge);
292 /* We create a new basic block. Call df_grow_bb_info to make sure
293 all data structures are allocated. */
294 df_grow_bb_info (df_live);
296 bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
297 df_get_live_in (old_dest));
298 df_set_bb_dirty (next_block);
300 /* We should not split more than once for a function. */
301 if (*split_p)
302 return false;
304 *split_p = true;
307 /* At this point we are committed to moving INSN, but let's try to
308 move it as far as we can. */
311 if (MAY_HAVE_DEBUG_INSNS)
313 FOR_BB_INSNS_REVERSE (bb, dinsn)
314 if (DEBUG_INSN_P (dinsn))
316 df_ref use;
317 FOR_EACH_INSN_USE (use, dinsn)
318 if (refers_to_regno_p (dregno, end_dregno,
319 DF_REF_REG (use), (rtx *) NULL))
320 dead_debug_add (debug, use, DF_REF_REGNO (use));
322 else if (dinsn == insn)
323 break;
325 live_out = df_get_live_out (bb);
326 live_in = df_get_live_in (next_block);
327 bb = next_block;
329 /* Check whether BB uses DEST or clobbers DEST. We need to add
330 INSN to BB if so. Either way, DEST is no longer live on entry,
331 except for any part that overlaps SRC (next loop). */
332 bb_uses = &DF_LR_BB_INFO (bb)->use;
333 bb_defs = &DF_LR_BB_INFO (bb)->def;
334 if (df_live)
336 for (i = dregno; i < end_dregno; i++)
338 if (*split_p
339 || REGNO_REG_SET_P (bb_uses, i)
340 || REGNO_REG_SET_P (bb_defs, i)
341 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
342 next_block = NULL;
343 CLEAR_REGNO_REG_SET (live_out, i);
344 CLEAR_REGNO_REG_SET (live_in, i);
347 /* Check whether BB clobbers SRC. We need to add INSN to BB if so.
348 Either way, SRC is now live on entry. */
349 for (i = sregno; i < end_sregno; i++)
351 if (*split_p
352 || REGNO_REG_SET_P (bb_defs, i)
353 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
354 next_block = NULL;
355 SET_REGNO_REG_SET (live_out, i);
356 SET_REGNO_REG_SET (live_in, i);
359 else
361 /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
362 DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e.
363 at -O1, just give up searching NEXT_BLOCK. */
364 next_block = NULL;
365 for (i = dregno; i < end_dregno; i++)
367 CLEAR_REGNO_REG_SET (live_out, i);
368 CLEAR_REGNO_REG_SET (live_in, i);
371 for (i = sregno; i < end_sregno; i++)
373 SET_REGNO_REG_SET (live_out, i);
374 SET_REGNO_REG_SET (live_in, i);
378 /* If we don't need to add the move to BB, look for a single
379 successor block. */
380 if (next_block)
382 live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
383 if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
384 break;
385 next_block = live_edge->dest;
388 while (next_block);
390 /* For the new created basic block, there is no dataflow info at all.
391 So skip the following dataflow update and check. */
392 if (!(*split_p))
394 /* BB now defines DEST. It only uses the parts of DEST that overlap SRC
395 (next loop). */
396 for (i = dregno; i < end_dregno; i++)
398 CLEAR_REGNO_REG_SET (bb_uses, i);
399 SET_REGNO_REG_SET (bb_defs, i);
402 /* BB now uses SRC. */
403 for (i = sregno; i < end_sregno; i++)
404 SET_REGNO_REG_SET (bb_uses, i);
407 /* Insert debug temps for dead REGs used in subsequent debug insns. */
408 if (debug->used && !bitmap_empty_p (debug->used))
409 FOR_EACH_INSN_DEF (def, insn)
410 dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn,
411 DEBUG_TEMP_BEFORE_WITH_VALUE);
413 emit_insn_after (PATTERN (insn), bb_note (bb));
414 delete_insn (insn);
415 return true;
418 /* Look for register copies in the first block of the function, and move
419 them down into successor blocks if the register is used only on one
420 path. This exposes more opportunities for shrink-wrapping. These
421 kinds of sets often occur when incoming argument registers are moved
422 to call-saved registers because their values are live across one or
423 more calls during the function. */
425 static void
426 prepare_shrink_wrap (basic_block entry_block)
428 rtx_insn *insn, *curr;
429 rtx x;
430 HARD_REG_SET uses, defs;
431 df_ref def, use;
432 bool split_p = false;
433 unsigned int i;
434 struct dead_debug_local debug;
436 if (JUMP_P (BB_END (entry_block)))
438 /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
439 to sink the copies from parameter to callee saved register out of
440 entry block. copyprop_hardreg_forward_bb_without_debug_insn is called
441 to release some dependences. */
442 copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
445 dead_debug_local_init (&debug, NULL, NULL);
446 CLEAR_HARD_REG_SET (uses);
447 CLEAR_HARD_REG_SET (defs);
449 FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
450 if (NONDEBUG_INSN_P (insn)
451 && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
452 &split_p, &debug))
454 /* Add all defined registers to DEFs. */
455 FOR_EACH_INSN_DEF (def, insn)
457 x = DF_REF_REG (def);
458 if (REG_P (x) && HARD_REGISTER_P (x))
459 for (i = REGNO (x); i < END_REGNO (x); i++)
460 SET_HARD_REG_BIT (defs, i);
463 /* Add all used registers to USESs. */
464 FOR_EACH_INSN_USE (use, insn)
466 x = DF_REF_REG (use);
467 if (REG_P (x) && HARD_REGISTER_P (x))
468 for (i = REGNO (x); i < END_REGNO (x); i++)
469 SET_HARD_REG_BIT (uses, i);
473 dead_debug_local_finish (&debug, NULL);
476 /* Return whether basic block PRO can get the prologue. It can not if it
477 has incoming complex edges that need a prologue inserted (we make a new
478 block for the prologue, so those edges would need to be redirected, which
479 does not work). It also can not if there exist registers live on entry
480 to PRO that are clobbered by the prologue. */
482 static bool
483 can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered)
485 edge e;
486 edge_iterator ei;
487 FOR_EACH_EDGE (e, ei, pro->preds)
488 if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
489 && !dominated_by_p (CDI_DOMINATORS, e->src, pro))
490 return false;
492 HARD_REG_SET live;
493 REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro));
494 if (hard_reg_set_intersect_p (live, prologue_clobbered))
495 return false;
497 return true;
500 /* Return whether we can duplicate basic block BB for shrink wrapping. We
501 cannot if the block cannot be duplicated at all, or if any of its incoming
502 edges are complex and come from a block that does not require a prologue
503 (we cannot redirect such edges), or if the block is too big to copy.
504 PRO is the basic block before which we would put the prologue, MAX_SIZE is
505 the maximum size block we allow to be copied. */
507 static bool
508 can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size)
510 if (!can_duplicate_block_p (bb))
511 return false;
513 edge e;
514 edge_iterator ei;
515 FOR_EACH_EDGE (e, ei, bb->preds)
516 if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
517 && !dominated_by_p (CDI_DOMINATORS, e->src, pro))
518 return false;
520 unsigned size = 0;
522 rtx_insn *insn;
523 FOR_BB_INSNS (bb, insn)
524 if (NONDEBUG_INSN_P (insn))
526 size += get_attr_min_length (insn);
527 if (size > max_size)
528 return false;
531 return true;
534 /* Do whatever needs to be done for exits that run without prologue.
535 Sibcalls need nothing done. Normal exits get a simple_return inserted. */
537 static void
538 handle_simple_exit (edge e)
541 if (e->flags & EDGE_SIBCALL)
543 /* Tell function.c to take no further action on this edge. */
544 e->flags |= EDGE_IGNORE;
546 e->flags &= ~EDGE_FALLTHRU;
547 emit_barrier_after_bb (e->src);
548 return;
551 /* If the basic block the edge comes from has multiple successors,
552 split the edge. */
553 if (EDGE_COUNT (e->src->succs) > 1)
555 basic_block old_bb = e->src;
556 rtx_insn *end = BB_END (old_bb);
557 rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end);
558 basic_block new_bb = create_basic_block (note, note, old_bb);
559 BB_COPY_PARTITION (new_bb, old_bb);
560 BB_END (old_bb) = end;
562 redirect_edge_succ (e, new_bb);
563 e->flags |= EDGE_FALLTHRU;
565 e = make_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
568 e->flags &= ~EDGE_FALLTHRU;
569 rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (),
570 BB_END (e->src));
571 JUMP_LABEL (ret) = simple_return_rtx;
572 emit_barrier_after_bb (e->src);
574 if (dump_file)
575 fprintf (dump_file, "Made simple_return with UID %d in bb %d\n",
576 INSN_UID (ret), e->src->index);
579 /* Try to perform a kind of shrink-wrapping, making sure the
580 prologue/epilogue is emitted only around those parts of the
581 function that require it.
583 There will be exactly one prologue, and it will be executed either
584 zero or one time, on any path. Depending on where the prologue is
585 placed, some of the basic blocks can be reached via both paths with
586 and without a prologue. Such blocks will be duplicated here, and the
587 edges changed to match.
589 Paths that go to the exit without going through the prologue will use
590 a simple_return instead of the epilogue. We maximize the number of
591 those, making sure to only duplicate blocks that can be duplicated.
592 If the prologue can then still be placed in multiple locations, we
593 place it as early as possible.
595 An example, where we duplicate blocks with control flow (legend:
596 _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should
597 be taken to point down or to the right, to simplify the diagram; here,
598 block 3 needs a prologue, the rest does not):
604 |\ |\
605 | 3 becomes | 3
606 |/ | \
607 4 7 4
608 |\ |\ |\
609 | 5 | 8 | 5
610 |/ |/ |/
611 6 9 6
612 | | |
613 R S R
616 (bb 4 is duplicated to 7, and so on; the prologue is inserted on the
617 edge 2->3).
619 Another example, where part of a loop is duplicated (again, bb 3 is
620 the only block that needs a prologue):
623 B 3<-- B ->3<--
624 | | | | | | |
625 | v | becomes | | v |
626 2---4--- 2---5-- 4---
627 | | |
628 R S R
631 (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3).
633 ENTRY_EDGE is the edge where the prologue will be placed, possibly
634 changed by this function. PROLOGUE_SEQ is the prologue we will insert. */
636 void
637 try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq)
639 /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes
640 no sense to shrink-wrap: then do not shrink-wrap! */
642 if (!SHRINK_WRAPPING_ENABLED)
643 return;
645 if (crtl->profile && !targetm.profile_before_prologue ())
646 return;
648 if (crtl->calls_eh_return)
649 return;
651 bool empty_prologue = true;
652 for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
653 if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END))
655 empty_prologue = false;
656 break;
658 if (empty_prologue)
659 return;
661 /* Move some code down to expose more shrink-wrapping opportunities. */
663 basic_block entry = (*entry_edge)->dest;
664 prepare_shrink_wrap (entry);
666 if (dump_file)
667 fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
669 /* Compute the registers set and used in the prologue. */
671 HARD_REG_SET prologue_clobbered, prologue_used;
672 CLEAR_HARD_REG_SET (prologue_clobbered);
673 CLEAR_HARD_REG_SET (prologue_used);
674 for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
675 if (NONDEBUG_INSN_P (insn))
677 HARD_REG_SET this_used;
678 CLEAR_HARD_REG_SET (this_used);
679 note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used);
680 AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
681 IOR_HARD_REG_SET (prologue_used, this_used);
682 note_stores (PATTERN (insn), record_hard_reg_sets, &prologue_clobbered);
684 CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
685 if (frame_pointer_needed)
686 CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
688 /* Find out what registers are set up by the prologue; any use of these
689 cannot happen before the prologue. */
691 struct hard_reg_set_container set_up_by_prologue;
692 CLEAR_HARD_REG_SET (set_up_by_prologue.set);
693 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM);
694 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
695 if (frame_pointer_needed)
696 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
697 HARD_FRAME_POINTER_REGNUM);
698 if (pic_offset_table_rtx
699 && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
700 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
701 PIC_OFFSET_TABLE_REGNUM);
702 if (crtl->drap_reg)
703 add_to_hard_reg_set (&set_up_by_prologue.set,
704 GET_MODE (crtl->drap_reg),
705 REGNO (crtl->drap_reg));
706 if (targetm.set_up_by_prologue)
707 targetm.set_up_by_prologue (&set_up_by_prologue);
709 /* We will insert the prologue before the basic block PRO. PRO should
710 dominate all basic blocks that need the prologue to be executed
711 before them. First, make PRO the "tightest wrap" possible. */
713 calculate_dominance_info (CDI_DOMINATORS);
715 basic_block pro = 0;
717 basic_block bb;
718 edge e;
719 edge_iterator ei;
720 FOR_EACH_BB_FN (bb, cfun)
722 rtx_insn *insn;
723 FOR_BB_INSNS (bb, insn)
724 if (NONDEBUG_INSN_P (insn)
725 && requires_stack_frame_p (insn, prologue_used,
726 set_up_by_prologue.set))
728 if (dump_file)
729 fprintf (dump_file, "Block %d needs the prologue.\n", bb->index);
730 pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb);
731 break;
735 /* If nothing needs a prologue, just put it at the start. This really
736 shouldn't happen, but we cannot fix it here. */
738 if (pro == 0)
740 if (dump_file)
741 fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; "
742 "putting it at the start.\n");
743 pro = entry;
746 if (dump_file)
747 fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n",
748 pro->index);
750 /* Now see if we can put the prologue at the start of PRO. Putting it
751 there might require duplicating a block that cannot be duplicated,
752 or in some cases we cannot insert the prologue there at all. If PRO
753 wont't do, try again with the immediate dominator of PRO, and so on.
755 The blocks that need duplicating are those reachable from PRO but
756 not dominated by it. We keep in BB_WITH a bitmap of the blocks
757 reachable from PRO that we already found, and in VEC a stack of
758 those we still need to consider (to find successors). */
760 bitmap bb_with = BITMAP_ALLOC (NULL);
761 bitmap_set_bit (bb_with, pro->index);
763 vec<basic_block> vec;
764 vec.create (n_basic_blocks_for_fn (cfun));
765 vec.quick_push (pro);
767 unsigned max_grow_size = get_uncond_jump_length ();
768 max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
770 while (!vec.is_empty () && pro != entry)
772 while (pro != entry && !can_get_prologue (pro, prologue_clobbered))
774 pro = get_immediate_dominator (CDI_DOMINATORS, pro);
776 if (bitmap_set_bit (bb_with, pro->index))
777 vec.quick_push (pro);
780 basic_block bb = vec.pop ();
781 if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size))
782 while (!dominated_by_p (CDI_DOMINATORS, bb, pro))
784 gcc_assert (pro != entry);
786 pro = get_immediate_dominator (CDI_DOMINATORS, pro);
788 if (bitmap_set_bit (bb_with, pro->index))
789 vec.quick_push (pro);
792 FOR_EACH_EDGE (e, ei, bb->succs)
793 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
794 && bitmap_set_bit (bb_with, e->dest->index))
795 vec.quick_push (e->dest);
798 if (dump_file)
799 fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n",
800 pro->index);
802 /* If we can move PRO back without having to duplicate more blocks, do so.
803 We do this because putting the prologue earlier is better for scheduling.
805 We can move back to a block PRE if every path from PRE will eventually
806 need a prologue, that is, PRO is a post-dominator of PRE. PRE needs
807 to dominate every block reachable from itself. We keep in BB_TMP a
808 bitmap of the blocks reachable from PRE that we already found, and in
809 VEC a stack of those we still need to consider.
811 Any block reachable from PRE is also reachable from all predecessors
812 of PRE, so if we find we need to move PRE back further we can leave
813 everything not considered so far on the stack. Any block dominated
814 by PRE is also dominated by all other dominators of PRE, so anything
815 found good for some PRE does not need to be reconsidered later.
817 We don't need to update BB_WITH because none of the new blocks found
818 can jump to a block that does not need the prologue. */
820 if (pro != entry)
822 calculate_dominance_info (CDI_POST_DOMINATORS);
824 bitmap bb_tmp = BITMAP_ALLOC (NULL);
825 bitmap_copy (bb_tmp, bb_with);
826 basic_block last_ok = pro;
827 vec.truncate (0);
829 while (pro != entry)
831 basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro);
832 if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro))
833 break;
835 if (bitmap_set_bit (bb_tmp, pre->index))
836 vec.quick_push (pre);
838 bool ok = true;
839 while (!vec.is_empty ())
841 if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre))
843 ok = false;
844 break;
847 basic_block bb = vec.pop ();
848 FOR_EACH_EDGE (e, ei, bb->succs)
849 if (bitmap_set_bit (bb_tmp, e->dest->index))
850 vec.quick_push (e->dest);
853 if (ok && can_get_prologue (pre, prologue_clobbered))
854 last_ok = pre;
856 pro = pre;
859 pro = last_ok;
861 BITMAP_FREE (bb_tmp);
862 free_dominance_info (CDI_POST_DOMINATORS);
865 vec.release ();
867 if (dump_file)
868 fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n",
869 pro->index);
871 if (pro == entry)
873 BITMAP_FREE (bb_with);
874 free_dominance_info (CDI_DOMINATORS);
875 return;
878 /* Compute what fraction of the frequency and count of the blocks that run
879 both with and without prologue are for running with prologue. This gives
880 the correct answer for reducible flow graphs; for irreducible flow graphs
881 our profile is messed up beyond repair anyway. */
883 gcov_type num = 0;
884 gcov_type den = 0;
886 FOR_EACH_EDGE (e, ei, pro->preds)
887 if (!dominated_by_p (CDI_DOMINATORS, e->src, pro))
889 num += EDGE_FREQUENCY (e);
890 den += e->src->frequency;
893 if (den == 0)
894 den = 1;
896 /* All is okay, so do it. */
898 crtl->shrink_wrapped = true;
899 if (dump_file)
900 fprintf (dump_file, "Performing shrink-wrapping.\n");
902 /* Copy the blocks that can run both with and without prologue. The
903 originals run with prologue, the copies without. Store a pointer to
904 the copy in the ->aux field of the original. */
906 FOR_EACH_BB_FN (bb, cfun)
907 if (bitmap_bit_p (bb_with, bb->index)
908 && !dominated_by_p (CDI_DOMINATORS, bb, pro))
910 basic_block dup = duplicate_block (bb, 0, 0);
912 bb->aux = dup;
914 if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup)))
915 emit_barrier_after_bb (dup);
917 if (EDGE_COUNT (dup->succs) == 0)
918 emit_barrier_after_bb (dup);
920 if (dump_file)
921 fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index);
923 bb->frequency = RDIV (num * bb->frequency, den);
924 dup->frequency -= bb->frequency;
925 bb->count = RDIV (num * bb->count, den);
926 dup->count -= bb->count;
929 /* Now change the edges to point to the copies, where appropriate. */
931 FOR_EACH_BB_FN (bb, cfun)
932 if (!dominated_by_p (CDI_DOMINATORS, bb, pro))
934 basic_block src = bb;
935 if (bitmap_bit_p (bb_with, bb->index))
936 src = (basic_block) bb->aux;
938 FOR_EACH_EDGE (e, ei, src->succs)
940 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
941 continue;
943 if (bitmap_bit_p (bb_with, e->dest->index)
944 && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
946 if (dump_file)
947 fprintf (dump_file, "Redirecting edge %d->%d to %d\n",
948 e->src->index, e->dest->index,
949 ((basic_block) e->dest->aux)->index);
950 redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
952 else if (e->flags & EDGE_FALLTHRU
953 && bitmap_bit_p (bb_with, bb->index))
954 force_nonfallthru (e);
958 /* Also redirect the function entry edge if necessary. */
960 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
961 if (bitmap_bit_p (bb_with, e->dest->index)
962 && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
964 basic_block split_bb = split_edge (e);
965 e = single_succ_edge (split_bb);
966 redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
969 /* Make a simple_return for those exits that run without prologue. */
971 FOR_EACH_BB_REVERSE_FN (bb, cfun)
972 if (!bitmap_bit_p (bb_with, bb->index))
973 FOR_EACH_EDGE (e, ei, bb->succs)
974 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
975 handle_simple_exit (e);
977 /* Finally, we want a single edge to put the prologue on. Make a new
978 block before the PRO block; the edge beteen them is the edge we want.
979 Then redirect those edges into PRO that come from blocks without the
980 prologue, to point to the new block instead. The new prologue block
981 is put at the end of the insn chain. */
983 basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
984 BB_COPY_PARTITION (new_bb, pro);
985 if (dump_file)
986 fprintf (dump_file, "Made prologue block %d\n", new_bb->index);
988 for (ei = ei_start (pro->preds); (e = ei_safe_edge (ei)); )
990 if (bitmap_bit_p (bb_with, e->src->index)
991 || dominated_by_p (CDI_DOMINATORS, e->src, pro))
993 ei_next (&ei);
994 continue;
997 new_bb->count += RDIV (e->src->count * e->probability, REG_BR_PROB_BASE);
998 new_bb->frequency += EDGE_FREQUENCY (e);
1000 redirect_edge_and_branch_force (e, new_bb);
1001 if (dump_file)
1002 fprintf (dump_file, "Redirected edge from %d\n", e->src->index);
1005 *entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU);
1006 force_nonfallthru (*entry_edge);
1008 BITMAP_FREE (bb_with);
1009 free_dominance_info (CDI_DOMINATORS);
1012 /* Separate shrink-wrapping
1014 Instead of putting all of the prologue and epilogue in one spot, we
1015 can put parts of it in places where those components are executed less
1016 frequently. The following code does this, for prologue and epilogue
1017 components that can be put in more than one location, and where those
1018 components can be executed more than once (the epilogue component will
1019 always be executed before the prologue component is executed a second
1020 time).
1022 What exactly is a component is target-dependent. The more usual
1023 components are simple saves/restores to/from the frame of callee-saved
1024 registers. This code treats components abstractly (as an sbitmap),
1025 letting the target handle all details.
1027 Prologue components are placed in such a way that for every component
1028 the prologue is executed as infrequently as possible. We do this by
1029 walking the dominator tree, comparing the cost of placing a prologue
1030 component before a block to the sum of costs determined for all subtrees
1031 of that block.
1033 From this placement, we then determine for each component all blocks
1034 where at least one of this block's dominators (including itself) will
1035 get a prologue inserted. That then is how the components are placed.
1036 We could place the epilogue components a bit smarter (we can save a
1037 bit of code size sometimes); this is a possible future improvement.
1039 Prologues and epilogues are preferably placed into a block, either at
1040 the beginning or end of it, if it is needed for all predecessor resp.
1041 successor edges; or placed on the edge otherwise.
1043 If the placement of any prologue/epilogue leads to a situation we cannot
1044 handle (for example, an abnormal edge would need to be split, or some
1045 targets want to use some specific registers that may not be available
1046 where we want to put them), separate shrink-wrapping for the components
1047 in that prologue/epilogue is aborted. */
1050 /* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the
1051 label LABEL. */
1052 static void
1053 dump_components (const char *label, sbitmap components)
1055 if (bitmap_empty_p (components))
1056 return;
1058 fprintf (dump_file, " [%s", label);
1060 for (unsigned int j = 0; j < components->n_bits; j++)
1061 if (bitmap_bit_p (components, j))
1062 fprintf (dump_file, " %u", j);
1064 fprintf (dump_file, "]");
1067 /* The data we collect for each bb. */
1068 struct sw {
1069 /* What components does this BB need? */
1070 sbitmap needs_components;
1072 /* What components does this BB have? This is the main decision this
1073 pass makes. */
1074 sbitmap has_components;
1076 /* The components for which we placed code at the start of the BB (instead
1077 of on all incoming edges). */
1078 sbitmap head_components;
1080 /* The components for which we placed code at the end of the BB (instead
1081 of on all outgoing edges). */
1082 sbitmap tail_components;
1084 /* The frequency of executing the prologue for this BB, if a prologue is
1085 placed on this BB. This is a pessimistic estimate (no prologue is
1086 needed for edges from blocks that have the component under consideration
1087 active already). */
1088 gcov_type own_cost;
1090 /* The frequency of executing the prologue for this BB and all BBs
1091 dominated by it. */
1092 gcov_type total_cost;
1095 /* A helper function for accessing the pass-specific info. */
1096 static inline struct sw *
1097 SW (basic_block bb)
1099 gcc_assert (bb->aux);
1100 return (struct sw *) bb->aux;
1103 /* Create the pass-specific data structures for separately shrink-wrapping
1104 with components COMPONENTS. */
1105 static void
1106 init_separate_shrink_wrap (sbitmap components)
1108 basic_block bb;
1109 FOR_ALL_BB_FN (bb, cfun)
1111 bb->aux = xcalloc (1, sizeof (struct sw));
1113 SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb);
1115 /* Mark all basic blocks without successor as needing all components.
1116 This avoids problems in at least cfgcleanup, sel-sched, and
1117 regrename (largely to do with all paths to such a block still
1118 needing the same dwarf CFI info). */
1119 if (EDGE_COUNT (bb->succs) == 0)
1120 bitmap_copy (SW (bb)->needs_components, components);
1122 if (dump_file)
1124 fprintf (dump_file, "bb %d components:", bb->index);
1125 dump_components ("has", SW (bb)->needs_components);
1126 fprintf (dump_file, "\n");
1129 SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components));
1130 SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components));
1131 SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components));
1132 bitmap_clear (SW (bb)->has_components);
1133 bitmap_clear (SW (bb)->head_components);
1134 bitmap_clear (SW (bb)->tail_components);
1138 /* Destroy the pass-specific data. */
1139 static void
1140 fini_separate_shrink_wrap (void)
1142 basic_block bb;
1143 FOR_ALL_BB_FN (bb, cfun)
1144 if (bb->aux)
1146 sbitmap_free (SW (bb)->needs_components);
1147 sbitmap_free (SW (bb)->has_components);
1148 sbitmap_free (SW (bb)->head_components);
1149 sbitmap_free (SW (bb)->tail_components);
1150 free (bb->aux);
1151 bb->aux = 0;
1155 /* Place the prologue for component WHICH, in the basic blocks dominated
1156 by HEAD. Do a DFS over the dominator tree, and set bit WHICH in the
1157 HAS_COMPONENTS of a block if either the block has that bit set in
1158 NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all
1159 dominator subtrees separately. */
1160 static void
1161 place_prologue_for_one_component (unsigned int which, basic_block head)
1163 /* The block we are currently dealing with. */
1164 basic_block bb = head;
1165 /* Is this the first time we visit this block, i.e. have we just gone
1166 down the tree. */
1167 bool first_visit = true;
1169 /* Walk the dominator tree, visit one block per iteration of this loop.
1170 Each basic block is visited twice: once before visiting any children
1171 of the block, and once after visiting all of them (leaf nodes are
1172 visited only once). As an optimization, we do not visit subtrees
1173 that can no longer influence the prologue placement. */
1174 for (;;)
1176 /* First visit of a block: set the (children) cost accumulator to zero;
1177 if the block does not have the component itself, walk down. */
1178 if (first_visit)
1180 /* Initialize the cost. The cost is the block execution frequency
1181 that does not come from backedges. Calculating this by simply
1182 adding the cost of all edges that aren't backedges does not
1183 work: this does not always add up to the block frequency at
1184 all, and even if it does, rounding error makes for bad
1185 decisions. */
1186 SW (bb)->own_cost = bb->frequency;
1188 edge e;
1189 edge_iterator ei;
1190 FOR_EACH_EDGE (e, ei, bb->preds)
1191 if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
1193 if (SW (bb)->own_cost > EDGE_FREQUENCY (e))
1194 SW (bb)->own_cost -= EDGE_FREQUENCY (e);
1195 else
1196 SW (bb)->own_cost = 0;
1199 SW (bb)->total_cost = 0;
1201 if (!bitmap_bit_p (SW (bb)->needs_components, which)
1202 && first_dom_son (CDI_DOMINATORS, bb))
1204 bb = first_dom_son (CDI_DOMINATORS, bb);
1205 continue;
1209 /* If this block does need the component itself, or it is cheaper to
1210 put the prologue here than in all the descendants that need it,
1211 mark it so. If this block's immediate post-dominator is dominated
1212 by this block, and that needs the prologue, we can put it on this
1213 block as well (earlier is better). */
1214 if (bitmap_bit_p (SW (bb)->needs_components, which)
1215 || SW (bb)->total_cost > SW (bb)->own_cost)
1217 SW (bb)->total_cost = SW (bb)->own_cost;
1218 bitmap_set_bit (SW (bb)->has_components, which);
1220 else
1222 basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1223 if (dominated_by_p (CDI_DOMINATORS, kid, bb)
1224 && bitmap_bit_p (SW (kid)->has_components, which))
1226 SW (bb)->total_cost = SW (bb)->own_cost;
1227 bitmap_set_bit (SW (bb)->has_components, which);
1231 /* We are back where we started, so we are done now. */
1232 if (bb == head)
1233 return;
1235 /* We now know the cost of the subtree rooted at the current block.
1236 Accumulate this cost in the parent. */
1237 basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb);
1238 SW (parent)->total_cost += SW (bb)->total_cost;
1240 /* Don't walk the tree down unless necessary. */
1241 if (next_dom_son (CDI_DOMINATORS, bb)
1242 && SW (parent)->total_cost <= SW (parent)->own_cost)
1244 bb = next_dom_son (CDI_DOMINATORS, bb);
1245 first_visit = true;
1247 else
1249 bb = parent;
1250 first_visit = false;
1255 /* Mark HAS_COMPONENTS for every block dominated by at least one block with
1256 HAS_COMPONENTS set for the respective components, starting at HEAD. */
1257 static void
1258 spread_components (basic_block head)
1260 basic_block bb = head;
1261 bool first_visit = true;
1262 /* This keeps a tally of all components active. */
1263 sbitmap components = SW (head)->has_components;
1265 for (;;)
1267 if (first_visit)
1269 bitmap_ior (SW (bb)->has_components, SW (bb)->has_components,
1270 components);
1272 if (first_dom_son (CDI_DOMINATORS, bb))
1274 components = SW (bb)->has_components;
1275 bb = first_dom_son (CDI_DOMINATORS, bb);
1276 continue;
1280 components = SW (bb)->has_components;
1282 if (next_dom_son (CDI_DOMINATORS, bb))
1284 bb = next_dom_son (CDI_DOMINATORS, bb);
1285 basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb);
1286 components = SW (parent)->has_components;
1287 first_visit = true;
1289 else
1291 if (bb == head)
1292 return;
1293 bb = get_immediate_dominator (CDI_DOMINATORS, bb);
1294 first_visit = false;
1299 /* If we cannot handle placing some component's prologues or epilogues where
1300 we decided we should place them, unmark that component in COMPONENTS so
1301 that it is not wrapped separately. */
1302 static void
1303 disqualify_problematic_components (sbitmap components)
1305 sbitmap pro = sbitmap_alloc (SBITMAP_SIZE (components));
1306 sbitmap epi = sbitmap_alloc (SBITMAP_SIZE (components));
1308 basic_block bb;
1309 FOR_EACH_BB_FN (bb, cfun)
1311 edge e;
1312 edge_iterator ei;
1313 FOR_EACH_EDGE (e, ei, bb->succs)
1315 /* Find which components we want pro/epilogues for here. */
1316 bitmap_and_compl (epi, SW (e->src)->has_components,
1317 SW (e->dest)->has_components);
1318 bitmap_and_compl (pro, SW (e->dest)->has_components,
1319 SW (e->src)->has_components);
1321 /* Ask the target what it thinks about things. */
1322 if (!bitmap_empty_p (epi))
1323 targetm.shrink_wrap.disqualify_components (components, e, epi,
1324 false);
1325 if (!bitmap_empty_p (pro))
1326 targetm.shrink_wrap.disqualify_components (components, e, pro,
1327 true);
1329 /* If this edge doesn't need splitting, we're fine. */
1330 if (single_pred_p (e->dest)
1331 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1332 continue;
1334 /* If the edge can be split, that is fine too. */
1335 if ((e->flags & EDGE_ABNORMAL) == 0)
1336 continue;
1338 /* We also can handle sibcalls. */
1339 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1341 gcc_assert (e->flags & EDGE_SIBCALL);
1342 continue;
1345 /* Remove from consideration those components we would need
1346 pro/epilogues for on edges where we cannot insert them. */
1347 bitmap_and_compl (components, components, epi);
1348 bitmap_and_compl (components, components, pro);
1350 if (dump_file && !bitmap_subset_p (epi, components))
1352 fprintf (dump_file, " BAD epi %d->%d", e->src->index,
1353 e->dest->index);
1354 if (e->flags & EDGE_EH)
1355 fprintf (dump_file, " for EH");
1356 dump_components ("epi", epi);
1357 fprintf (dump_file, "\n");
1360 if (dump_file && !bitmap_subset_p (pro, components))
1362 fprintf (dump_file, " BAD pro %d->%d", e->src->index,
1363 e->dest->index);
1364 if (e->flags & EDGE_EH)
1365 fprintf (dump_file, " for EH");
1366 dump_components ("pro", pro);
1367 fprintf (dump_file, "\n");
1372 sbitmap_free (pro);
1373 sbitmap_free (epi);
1376 /* Place code for prologues and epilogues for COMPONENTS where we can put
1377 that code at the start of basic blocks. */
1378 static void
1379 emit_common_heads_for_components (sbitmap components)
1381 sbitmap pro = sbitmap_alloc (SBITMAP_SIZE (components));
1382 sbitmap epi = sbitmap_alloc (SBITMAP_SIZE (components));
1383 sbitmap tmp = sbitmap_alloc (SBITMAP_SIZE (components));
1385 basic_block bb;
1386 FOR_EACH_BB_FN (bb, cfun)
1388 /* Find which prologue resp. epilogue components are needed for all
1389 predecessor edges to this block. */
1391 /* First, select all possible components. */
1392 bitmap_copy (epi, components);
1393 bitmap_copy (pro, components);
1395 edge e;
1396 edge_iterator ei;
1397 FOR_EACH_EDGE (e, ei, bb->preds)
1399 if (e->flags & EDGE_ABNORMAL)
1401 bitmap_clear (epi);
1402 bitmap_clear (pro);
1403 break;
1406 /* Deselect those epilogue components that should not be inserted
1407 for this edge. */
1408 bitmap_and_compl (tmp, SW (e->src)->has_components,
1409 SW (e->dest)->has_components);
1410 bitmap_and (epi, epi, tmp);
1412 /* Similar, for the prologue. */
1413 bitmap_and_compl (tmp, SW (e->dest)->has_components,
1414 SW (e->src)->has_components);
1415 bitmap_and (pro, pro, tmp);
1418 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1419 fprintf (dump_file, " bb %d", bb->index);
1421 if (dump_file && !bitmap_empty_p (epi))
1422 dump_components ("epi", epi);
1423 if (dump_file && !bitmap_empty_p (pro))
1424 dump_components ("pro", pro);
1426 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1427 fprintf (dump_file, "\n");
1429 /* Place code after the BB note. */
1430 if (!bitmap_empty_p (pro))
1432 start_sequence ();
1433 targetm.shrink_wrap.emit_prologue_components (pro);
1434 rtx_insn *seq = get_insns ();
1435 end_sequence ();
1437 emit_insn_after (seq, bb_note (bb));
1439 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro);
1442 if (!bitmap_empty_p (epi))
1444 start_sequence ();
1445 targetm.shrink_wrap.emit_epilogue_components (epi);
1446 rtx_insn *seq = get_insns ();
1447 end_sequence ();
1449 emit_insn_after (seq, bb_note (bb));
1451 bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi);
1455 sbitmap_free (pro);
1456 sbitmap_free (epi);
1457 sbitmap_free (tmp);
1460 /* Place code for prologues and epilogues for COMPONENTS where we can put
1461 that code at the end of basic blocks. */
1462 static void
1463 emit_common_tails_for_components (sbitmap components)
1465 sbitmap pro = sbitmap_alloc (SBITMAP_SIZE (components));
1466 sbitmap epi = sbitmap_alloc (SBITMAP_SIZE (components));
1467 sbitmap tmp = sbitmap_alloc (SBITMAP_SIZE (components));
1469 basic_block bb;
1470 FOR_EACH_BB_FN (bb, cfun)
1472 /* Find which prologue resp. epilogue components are needed for all
1473 successor edges from this block. */
1474 if (EDGE_COUNT (bb->succs) == 0)
1475 continue;
1477 /* First, select all possible components. */
1478 bitmap_copy (epi, components);
1479 bitmap_copy (pro, components);
1481 edge e;
1482 edge_iterator ei;
1483 FOR_EACH_EDGE (e, ei, bb->succs)
1485 if (e->flags & EDGE_ABNORMAL)
1487 bitmap_clear (epi);
1488 bitmap_clear (pro);
1489 break;
1492 /* Deselect those epilogue components that should not be inserted
1493 for this edge, and also those that are already put at the head
1494 of the successor block. */
1495 bitmap_and_compl (tmp, SW (e->src)->has_components,
1496 SW (e->dest)->has_components);
1497 bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
1498 bitmap_and (epi, epi, tmp);
1500 /* Similarly, for the prologue. */
1501 bitmap_and_compl (tmp, SW (e->dest)->has_components,
1502 SW (e->src)->has_components);
1503 bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
1504 bitmap_and (pro, pro, tmp);
1507 /* If the last insn of this block is a control flow insn we cannot
1508 put anything after it. We can put our code before it instead,
1509 but only if that jump insn is a simple jump. */
1510 rtx_insn *last_insn = BB_END (bb);
1511 if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn))
1513 bitmap_clear (epi);
1514 bitmap_clear (pro);
1517 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1518 fprintf (dump_file, " bb %d", bb->index);
1520 if (dump_file && !bitmap_empty_p (epi))
1521 dump_components ("epi", epi);
1522 if (dump_file && !bitmap_empty_p (pro))
1523 dump_components ("pro", pro);
1525 if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1526 fprintf (dump_file, "\n");
1528 /* Put the code at the end of the BB, but before any final jump. */
1529 if (!bitmap_empty_p (epi))
1531 start_sequence ();
1532 targetm.shrink_wrap.emit_epilogue_components (epi);
1533 rtx_insn *seq = get_insns ();
1534 end_sequence ();
1536 if (control_flow_insn_p (last_insn))
1537 emit_insn_before (seq, last_insn);
1538 else
1539 emit_insn_after (seq, last_insn);
1541 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi);
1544 if (!bitmap_empty_p (pro))
1546 start_sequence ();
1547 targetm.shrink_wrap.emit_prologue_components (pro);
1548 rtx_insn *seq = get_insns ();
1549 end_sequence ();
1551 if (control_flow_insn_p (last_insn))
1552 emit_insn_before (seq, last_insn);
1553 else
1554 emit_insn_after (seq, last_insn);
1556 bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro);
1560 sbitmap_free (pro);
1561 sbitmap_free (epi);
1562 sbitmap_free (tmp);
1565 /* Place prologues and epilogues for COMPONENTS on edges, if we haven't already
1566 placed them inside blocks directly. */
1567 static void
1568 insert_prologue_epilogue_for_components (sbitmap components)
1570 sbitmap pro = sbitmap_alloc (SBITMAP_SIZE (components));
1571 sbitmap epi = sbitmap_alloc (SBITMAP_SIZE (components));
1573 basic_block bb;
1574 FOR_EACH_BB_FN (bb, cfun)
1576 if (!bb->aux)
1577 continue;
1579 edge e;
1580 edge_iterator ei;
1581 FOR_EACH_EDGE (e, ei, bb->succs)
1583 /* Find which pro/epilogue components are needed on this edge. */
1584 bitmap_and_compl (epi, SW (e->src)->has_components,
1585 SW (e->dest)->has_components);
1586 bitmap_and_compl (pro, SW (e->dest)->has_components,
1587 SW (e->src)->has_components);
1588 bitmap_and (epi, epi, components);
1589 bitmap_and (pro, pro, components);
1591 /* Deselect those we already have put at the head or tail of the
1592 edge's dest resp. src. */
1593 bitmap_and_compl (epi, epi, SW (e->dest)->head_components);
1594 bitmap_and_compl (pro, pro, SW (e->dest)->head_components);
1595 bitmap_and_compl (epi, epi, SW (e->src)->tail_components);
1596 bitmap_and_compl (pro, pro, SW (e->src)->tail_components);
1598 if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro))
1600 if (dump_file)
1602 fprintf (dump_file, " %d->%d", e->src->index,
1603 e->dest->index);
1604 dump_components ("epi", epi);
1605 dump_components ("pro", pro);
1606 fprintf (dump_file, "\n");
1609 /* Put the epilogue components in place. */
1610 start_sequence ();
1611 targetm.shrink_wrap.emit_epilogue_components (epi);
1612 rtx_insn *seq = get_insns ();
1613 end_sequence ();
1615 if (e->flags & EDGE_SIBCALL)
1617 gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun));
1619 rtx_insn *insn = BB_END (e->src);
1620 gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn));
1621 emit_insn_before (seq, insn);
1623 else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1625 gcc_assert (e->flags & EDGE_FALLTHRU);
1626 basic_block new_bb = split_edge (e);
1627 emit_insn_after (seq, BB_END (new_bb));
1629 else
1630 insert_insn_on_edge (seq, e);
1632 /* Put the prologue components in place. */
1633 start_sequence ();
1634 targetm.shrink_wrap.emit_prologue_components (pro);
1635 seq = get_insns ();
1636 end_sequence ();
1638 insert_insn_on_edge (seq, e);
1643 sbitmap_free (pro);
1644 sbitmap_free (epi);
1646 commit_edge_insertions ();
1649 /* The main entry point to this subpass. FIRST_BB is where the prologue
1650 would be normally put. */
1651 void
1652 try_shrink_wrapping_separate (basic_block first_bb)
1654 if (HAVE_cc0)
1655 return;
1657 if (!(SHRINK_WRAPPING_ENABLED
1658 && flag_shrink_wrap_separate
1659 && optimize_function_for_speed_p (cfun)
1660 && targetm.shrink_wrap.get_separate_components))
1661 return;
1663 /* We don't handle "strange" functions. */
1664 if (cfun->calls_alloca
1665 || cfun->calls_setjmp
1666 || cfun->can_throw_non_call_exceptions
1667 || crtl->calls_eh_return
1668 || crtl->has_nonlocal_goto
1669 || crtl->saves_all_registers)
1670 return;
1672 /* Ask the target what components there are. If it returns NULL, don't
1673 do anything. */
1674 sbitmap components = targetm.shrink_wrap.get_separate_components ();
1675 if (!components)
1676 return;
1678 /* We need LIVE info. */
1679 df_live_add_problem ();
1680 df_live_set_all_dirty ();
1681 df_analyze ();
1683 calculate_dominance_info (CDI_DOMINATORS);
1684 calculate_dominance_info (CDI_POST_DOMINATORS);
1686 init_separate_shrink_wrap (components);
1688 sbitmap_iterator sbi;
1689 unsigned int j;
1690 EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi)
1691 place_prologue_for_one_component (j, first_bb);
1693 spread_components (first_bb);
1695 disqualify_problematic_components (components);
1697 /* Don't separately shrink-wrap anything where the "main" prologue will
1698 go; the target code can often optimize things if it is presented with
1699 all components together (say, if it generates store-multiple insns). */
1700 bitmap_and_compl (components, components, SW (first_bb)->has_components);
1702 if (bitmap_empty_p (components))
1704 if (dump_file)
1705 fprintf (dump_file, "Not wrapping anything separately.\n");
1707 else
1709 if (dump_file)
1711 fprintf (dump_file, "The components we wrap separately are");
1712 dump_components ("sep", components);
1713 fprintf (dump_file, "\n");
1715 fprintf (dump_file, "... Inserting common heads...\n");
1718 emit_common_heads_for_components (components);
1720 if (dump_file)
1721 fprintf (dump_file, "... Inserting common tails...\n");
1723 emit_common_tails_for_components (components);
1725 if (dump_file)
1726 fprintf (dump_file, "... Inserting the more difficult ones...\n");
1728 insert_prologue_epilogue_for_components (components);
1730 if (dump_file)
1731 fprintf (dump_file, "... Done.\n");
1733 targetm.shrink_wrap.set_handled_components (components);
1735 crtl->shrink_wrapped_separate = true;
1738 fini_separate_shrink_wrap ();
1740 sbitmap_free (components);
1741 free_dominance_info (CDI_DOMINATORS);
1742 free_dominance_info (CDI_POST_DOMINATORS);
1744 if (crtl->shrink_wrapped_separate)
1746 df_live_set_all_dirty ();
1747 df_analyze ();