Support slim switch for cfg graph dump
[official-gcc.git] / gcc / reorg.c
blob1bc73b04d09f4d642f764f1c221e6eeb7475c263
1 /* Perform instruction reorganizations for delay slot filling.
2 Copyright (C) 1992-2013 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu).
4 Hacked by Michael Tiemann (tiemann@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Instruction reorganization pass.
24 This pass runs after register allocation and final jump
25 optimization. It should be the last pass to run before peephole.
26 It serves primarily to fill delay slots of insns, typically branch
27 and call insns. Other insns typically involve more complicated
28 interactions of data dependencies and resource constraints, and
29 are better handled by scheduling before register allocation (by the
30 function `schedule_insns').
32 The Branch Penalty is the number of extra cycles that are needed to
33 execute a branch insn. On an ideal machine, branches take a single
34 cycle, and the Branch Penalty is 0. Several RISC machines approach
35 branch delays differently:
37 The MIPS has a single branch delay slot. Most insns
38 (except other branches) can be used to fill this slot. When the
39 slot is filled, two insns execute in two cycles, reducing the
40 branch penalty to zero.
42 The SPARC always has a branch delay slot, but its effects can be
43 annulled when the branch is not taken. This means that failing to
44 find other sources of insns, we can hoist an insn from the branch
45 target that would only be safe to execute knowing that the branch
46 is taken.
48 The HP-PA always has a branch delay slot. For unconditional branches
49 its effects can be annulled when the branch is taken. The effects
50 of the delay slot in a conditional branch can be nullified for forward
51 taken branches, or for untaken backward branches. This means
52 we can hoist insns from the fall-through path for forward branches or
53 steal insns from the target of backward branches.
55 The TMS320C3x and C4x have three branch delay slots. When the three
56 slots are filled, the branch penalty is zero. Most insns can fill the
57 delay slots except jump insns.
59 Three techniques for filling delay slots have been implemented so far:
61 (1) `fill_simple_delay_slots' is the simplest, most efficient way
62 to fill delay slots. This pass first looks for insns which come
63 from before the branch and which are safe to execute after the
64 branch. Then it searches after the insn requiring delay slots or,
65 in the case of a branch, for insns that are after the point at
66 which the branch merges into the fallthrough code, if such a point
67 exists. When such insns are found, the branch penalty decreases
68 and no code expansion takes place.
70 (2) `fill_eager_delay_slots' is more complicated: it is used for
71 scheduling conditional jumps, or for scheduling jumps which cannot
72 be filled using (1). A machine need not have annulled jumps to use
73 this strategy, but it helps (by keeping more options open).
74 `fill_eager_delay_slots' tries to guess the direction the branch
75 will go; if it guesses right 100% of the time, it can reduce the
76 branch penalty as much as `fill_simple_delay_slots' does. If it
77 guesses wrong 100% of the time, it might as well schedule nops. When
78 `fill_eager_delay_slots' takes insns from the fall-through path of
79 the jump, usually there is no code expansion; when it takes insns
80 from the branch target, there is code expansion if it is not the
81 only way to reach that target.
83 (3) `relax_delay_slots' uses a set of rules to simplify code that
84 has been reorganized by (1) and (2). It finds cases where
85 conditional test can be eliminated, jumps can be threaded, extra
86 insns can be eliminated, etc. It is the job of (1) and (2) to do a
87 good job of scheduling locally; `relax_delay_slots' takes care of
88 making the various individual schedules work well together. It is
89 especially tuned to handle the control flow interactions of branch
90 insns. It does nothing for insns with delay slots that do not
91 branch.
93 On machines that use CC0, we are very conservative. We will not make
94 a copy of an insn involving CC0 since we want to maintain a 1-1
95 correspondence between the insn that sets and uses CC0. The insns are
96 allowed to be separated by placing an insn that sets CC0 (but not an insn
97 that uses CC0; we could do this, but it doesn't seem worthwhile) in a
98 delay slot. In that case, we point each insn at the other with REG_CC_USER
99 and REG_CC_SETTER notes. Note that these restrictions affect very few
100 machines because most RISC machines with delay slots will not use CC0
101 (the RT is the only known exception at this point). */
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "tm.h"
107 #include "diagnostic-core.h"
108 #include "rtl.h"
109 #include "tm_p.h"
110 #include "expr.h"
111 #include "function.h"
112 #include "insn-config.h"
113 #include "conditions.h"
114 #include "hard-reg-set.h"
115 #include "basic-block.h"
116 #include "regs.h"
117 #include "recog.h"
118 #include "flags.h"
119 #include "obstack.h"
120 #include "insn-attr.h"
121 #include "resource.h"
122 #include "except.h"
123 #include "params.h"
124 #include "target.h"
125 #include "tree-pass.h"
126 #include "emit-rtl.h"
128 #ifdef DELAY_SLOTS
130 #ifndef ANNUL_IFTRUE_SLOTS
131 #define eligible_for_annul_true(INSN, SLOTS, TRIAL, FLAGS) 0
132 #endif
133 #ifndef ANNUL_IFFALSE_SLOTS
134 #define eligible_for_annul_false(INSN, SLOTS, TRIAL, FLAGS) 0
135 #endif
137 /* Insns which have delay slots that have not yet been filled. */
139 static struct obstack unfilled_slots_obstack;
140 static rtx *unfilled_firstobj;
142 /* Define macros to refer to the first and last slot containing unfilled
143 insns. These are used because the list may move and its address
144 should be recomputed at each use. */
146 #define unfilled_slots_base \
147 ((rtx *) obstack_base (&unfilled_slots_obstack))
149 #define unfilled_slots_next \
150 ((rtx *) obstack_next_free (&unfilled_slots_obstack))
152 /* Points to the label before the end of the function, or before a
153 return insn. */
154 static rtx function_return_label;
155 /* Likewise for a simple_return. */
156 static rtx function_simple_return_label;
158 /* Mapping between INSN_UID's and position in the code since INSN_UID's do
159 not always monotonically increase. */
160 static int *uid_to_ruid;
162 /* Highest valid index in `uid_to_ruid'. */
163 static int max_uid;
165 static int stop_search_p (rtx, int);
166 static int resource_conflicts_p (struct resources *, struct resources *);
167 static int insn_references_resource_p (rtx, struct resources *, bool);
168 static int insn_sets_resource_p (rtx, struct resources *, bool);
169 static rtx find_end_label (rtx);
170 static rtx emit_delay_sequence (rtx, rtx, int);
171 static rtx add_to_delay_list (rtx, rtx);
172 static rtx delete_from_delay_slot (rtx);
173 static void delete_scheduled_jump (rtx);
174 static void note_delay_statistics (int, int);
175 #if defined(ANNUL_IFFALSE_SLOTS) || defined(ANNUL_IFTRUE_SLOTS)
176 static rtx optimize_skip (rtx);
177 #endif
178 static int get_jump_flags (rtx, rtx);
179 static int mostly_true_jump (rtx);
180 static rtx get_branch_condition (rtx, rtx);
181 static int condition_dominates_p (rtx, rtx);
182 static int redirect_with_delay_slots_safe_p (rtx, rtx, rtx);
183 static int redirect_with_delay_list_safe_p (rtx, rtx, rtx);
184 static int check_annul_list_true_false (int, rtx);
185 static rtx steal_delay_list_from_target (rtx, rtx, rtx, rtx,
186 struct resources *,
187 struct resources *,
188 struct resources *,
189 int, int *, int *, rtx *);
190 static rtx steal_delay_list_from_fallthrough (rtx, rtx, rtx, rtx,
191 struct resources *,
192 struct resources *,
193 struct resources *,
194 int, int *, int *);
195 static void try_merge_delay_insns (rtx, rtx);
196 static rtx redundant_insn (rtx, rtx, rtx);
197 static int own_thread_p (rtx, rtx, int);
198 static void update_block (rtx, rtx);
199 static int reorg_redirect_jump (rtx, rtx);
200 static void update_reg_dead_notes (rtx, rtx);
201 static void fix_reg_dead_note (rtx, rtx);
202 static void update_reg_unused_notes (rtx, rtx);
203 static void fill_simple_delay_slots (int);
204 static rtx fill_slots_from_thread (rtx, rtx, rtx, rtx,
205 int, int, int, int,
206 int *, rtx);
207 static void fill_eager_delay_slots (void);
208 static void relax_delay_slots (rtx);
209 static void make_return_insns (rtx);
211 /* A wrapper around next_active_insn which takes care to return ret_rtx
212 unchanged. */
214 static rtx
215 first_active_target_insn (rtx insn)
217 if (ANY_RETURN_P (insn))
218 return insn;
219 return next_active_insn (insn);
222 /* Return true iff INSN is a simplejump, or any kind of return insn. */
224 static bool
225 simplejump_or_return_p (rtx insn)
227 return (JUMP_P (insn)
228 && (simplejump_p (insn) || ANY_RETURN_P (PATTERN (insn))));
231 /* Return TRUE if this insn should stop the search for insn to fill delay
232 slots. LABELS_P indicates that labels should terminate the search.
233 In all cases, jumps terminate the search. */
235 static int
236 stop_search_p (rtx insn, int labels_p)
238 if (insn == 0)
239 return 1;
241 /* If the insn can throw an exception that is caught within the function,
242 it may effectively perform a jump from the viewpoint of the function.
243 Therefore act like for a jump. */
244 if (can_throw_internal (insn))
245 return 1;
247 switch (GET_CODE (insn))
249 case NOTE:
250 case CALL_INSN:
251 return 0;
253 case CODE_LABEL:
254 return labels_p;
256 case JUMP_INSN:
257 case BARRIER:
258 return 1;
260 case INSN:
261 /* OK unless it contains a delay slot or is an `asm' insn of some type.
262 We don't know anything about these. */
263 return (GET_CODE (PATTERN (insn)) == SEQUENCE
264 || GET_CODE (PATTERN (insn)) == ASM_INPUT
265 || asm_noperands (PATTERN (insn)) >= 0);
267 default:
268 gcc_unreachable ();
272 /* Return TRUE if any resources are marked in both RES1 and RES2 or if either
273 resource set contains a volatile memory reference. Otherwise, return FALSE. */
275 static int
276 resource_conflicts_p (struct resources *res1, struct resources *res2)
278 if ((res1->cc && res2->cc) || (res1->memory && res2->memory)
279 || (res1->unch_memory && res2->unch_memory)
280 || res1->volatil || res2->volatil)
281 return 1;
283 return hard_reg_set_intersect_p (res1->regs, res2->regs);
286 /* Return TRUE if any resource marked in RES, a `struct resources', is
287 referenced by INSN. If INCLUDE_DELAYED_EFFECTS is set, return if the called
288 routine is using those resources.
290 We compute this by computing all the resources referenced by INSN and
291 seeing if this conflicts with RES. It might be faster to directly check
292 ourselves, and this is the way it used to work, but it means duplicating
293 a large block of complex code. */
295 static int
296 insn_references_resource_p (rtx insn, struct resources *res,
297 bool include_delayed_effects)
299 struct resources insn_res;
301 CLEAR_RESOURCE (&insn_res);
302 mark_referenced_resources (insn, &insn_res, include_delayed_effects);
303 return resource_conflicts_p (&insn_res, res);
306 /* Return TRUE if INSN modifies resources that are marked in RES.
307 INCLUDE_DELAYED_EFFECTS is set if the actions of that routine should be
308 included. CC0 is only modified if it is explicitly set; see comments
309 in front of mark_set_resources for details. */
311 static int
312 insn_sets_resource_p (rtx insn, struct resources *res,
313 bool include_delayed_effects)
315 struct resources insn_sets;
317 CLEAR_RESOURCE (&insn_sets);
318 mark_set_resources (insn, &insn_sets, 0,
319 (include_delayed_effects
320 ? MARK_SRC_DEST_CALL
321 : MARK_SRC_DEST));
322 return resource_conflicts_p (&insn_sets, res);
325 /* Find a label at the end of the function or before a RETURN. If there
326 is none, try to make one. If that fails, returns 0.
328 The property of such a label is that it is placed just before the
329 epilogue or a bare RETURN insn, so that another bare RETURN can be
330 turned into a jump to the label unconditionally. In particular, the
331 label cannot be placed before a RETURN insn with a filled delay slot.
333 ??? There may be a problem with the current implementation. Suppose
334 we start with a bare RETURN insn and call find_end_label. It may set
335 function_return_label just before the RETURN. Suppose the machinery
336 is able to fill the delay slot of the RETURN insn afterwards. Then
337 function_return_label is no longer valid according to the property
338 described above and find_end_label will still return it unmodified.
339 Note that this is probably mitigated by the following observation:
340 once function_return_label is made, it is very likely the target of
341 a jump, so filling the delay slot of the RETURN will be much more
342 difficult.
343 KIND is either simple_return_rtx or ret_rtx, indicating which type of
344 return we're looking for. */
346 static rtx
347 find_end_label (rtx kind)
349 rtx insn;
350 rtx *plabel;
352 if (kind == ret_rtx)
353 plabel = &function_return_label;
354 else
356 gcc_assert (kind == simple_return_rtx);
357 plabel = &function_simple_return_label;
360 /* If we found one previously, return it. */
361 if (*plabel)
362 return *plabel;
364 /* Otherwise, see if there is a label at the end of the function. If there
365 is, it must be that RETURN insns aren't needed, so that is our return
366 label and we don't have to do anything else. */
368 insn = get_last_insn ();
369 while (NOTE_P (insn)
370 || (NONJUMP_INSN_P (insn)
371 && (GET_CODE (PATTERN (insn)) == USE
372 || GET_CODE (PATTERN (insn)) == CLOBBER)))
373 insn = PREV_INSN (insn);
375 /* When a target threads its epilogue we might already have a
376 suitable return insn. If so put a label before it for the
377 function_return_label. */
378 if (BARRIER_P (insn)
379 && JUMP_P (PREV_INSN (insn))
380 && PATTERN (PREV_INSN (insn)) == kind)
382 rtx temp = PREV_INSN (PREV_INSN (insn));
383 rtx label = gen_label_rtx ();
384 LABEL_NUSES (label) = 0;
386 /* Put the label before any USE insns that may precede the RETURN
387 insn. */
388 while (GET_CODE (temp) == USE)
389 temp = PREV_INSN (temp);
391 emit_label_after (label, temp);
392 *plabel = label;
395 else if (LABEL_P (insn))
396 *plabel = insn;
397 else
399 rtx label = gen_label_rtx ();
400 LABEL_NUSES (label) = 0;
401 /* If the basic block reorder pass moves the return insn to
402 some other place try to locate it again and put our
403 function_return_label there. */
404 while (insn && ! (JUMP_P (insn) && (PATTERN (insn) == kind)))
405 insn = PREV_INSN (insn);
406 if (insn)
408 insn = PREV_INSN (insn);
410 /* Put the label before any USE insns that may precede the
411 RETURN insn. */
412 while (GET_CODE (insn) == USE)
413 insn = PREV_INSN (insn);
415 emit_label_after (label, insn);
417 else
419 #ifdef HAVE_epilogue
420 if (HAVE_epilogue
421 #ifdef HAVE_return
422 && ! HAVE_return
423 #endif
425 /* The RETURN insn has its delay slot filled so we cannot
426 emit the label just before it. Since we already have
427 an epilogue and cannot emit a new RETURN, we cannot
428 emit the label at all. */
429 return NULL_RTX;
430 #endif /* HAVE_epilogue */
432 /* Otherwise, make a new label and emit a RETURN and BARRIER,
433 if needed. */
434 emit_label (label);
435 #ifdef HAVE_return
436 if (HAVE_return)
438 /* The return we make may have delay slots too. */
439 rtx insn = gen_return ();
440 insn = emit_jump_insn (insn);
441 set_return_jump_label (insn);
442 emit_barrier ();
443 if (num_delay_slots (insn) > 0)
444 obstack_ptr_grow (&unfilled_slots_obstack, insn);
446 #endif
448 *plabel = label;
451 /* Show one additional use for this label so it won't go away until
452 we are done. */
453 ++LABEL_NUSES (*plabel);
455 return *plabel;
458 /* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace
459 the pattern of INSN with the SEQUENCE.
461 Returns the SEQUENCE that replaces INSN. */
463 static rtx
464 emit_delay_sequence (rtx insn, rtx list, int length)
466 /* Allocate the rtvec to hold the insns and the SEQUENCE. */
467 rtvec seqv = rtvec_alloc (length + 1);
468 rtx seq = gen_rtx_SEQUENCE (VOIDmode, seqv);
469 rtx seq_insn = make_insn_raw (seq);
471 /* If DELAY_INSN has a location, use it for SEQ_INSN. If DELAY_INSN does
472 not have a location, but one of the delayed insns does, we pick up a
473 location from there later. */
474 INSN_LOCATION (seq_insn) = INSN_LOCATION (insn);
476 /* Unlink INSN from the insn chain, so that we can put it into
477 the SEQUENCE. Remember where we want to emit SEQUENCE in AFTER. */
478 rtx after = PREV_INSN (insn);
479 remove_insn (insn);
480 NEXT_INSN (insn) = PREV_INSN (insn) = NULL;
482 /* Build our SEQUENCE and rebuild the insn chain. */
483 int i = 1;
484 start_sequence ();
485 XVECEXP (seq, 0, 0) = emit_insn (insn);
486 for (rtx li = list; li; li = XEXP (li, 1), i++)
488 rtx tem = XEXP (li, 0);
489 rtx note, next;
491 /* Show that this copy of the insn isn't deleted. */
492 INSN_DELETED_P (tem) = 0;
494 /* Unlink insn from its original place, and re-emit it into
495 the sequence. */
496 NEXT_INSN (tem) = PREV_INSN (tem) = NULL;
497 XVECEXP (seq, 0, i) = emit_insn (tem);
499 /* SPARC assembler, for instance, emit warning when debug info is output
500 into the delay slot. */
501 if (INSN_LOCATION (tem) && !INSN_LOCATION (seq_insn))
502 INSN_LOCATION (seq_insn) = INSN_LOCATION (tem);
503 INSN_LOCATION (tem) = 0;
505 for (note = REG_NOTES (tem); note; note = next)
507 next = XEXP (note, 1);
508 switch (REG_NOTE_KIND (note))
510 case REG_DEAD:
511 /* Remove any REG_DEAD notes because we can't rely on them now
512 that the insn has been moved. */
513 remove_note (tem, note);
514 break;
516 case REG_LABEL_OPERAND:
517 case REG_LABEL_TARGET:
518 /* Keep the label reference count up to date. */
519 if (LABEL_P (XEXP (note, 0)))
520 LABEL_NUSES (XEXP (note, 0)) ++;
521 break;
523 default:
524 break;
528 end_sequence ();
529 gcc_assert (i == length + 1);
531 /* Splice our SEQUENCE into the insn stream where INSN used to be. */
532 add_insn_after (seq_insn, after, NULL);
534 return seq_insn;
537 /* Add INSN to DELAY_LIST and return the head of the new list. The list must
538 be in the order in which the insns are to be executed. */
540 static rtx
541 add_to_delay_list (rtx insn, rtx delay_list)
543 /* If we have an empty list, just make a new list element. If
544 INSN has its block number recorded, clear it since we may
545 be moving the insn to a new block. */
547 if (delay_list == 0)
549 clear_hashed_info_for_insn (insn);
550 return gen_rtx_INSN_LIST (VOIDmode, insn, NULL_RTX);
553 /* Otherwise this must be an INSN_LIST. Add INSN to the end of the
554 list. */
555 XEXP (delay_list, 1) = add_to_delay_list (insn, XEXP (delay_list, 1));
557 return delay_list;
560 /* Delete INSN from the delay slot of the insn that it is in, which may
561 produce an insn with no delay slots. Return the new insn. */
563 static rtx
564 delete_from_delay_slot (rtx insn)
566 rtx trial, seq_insn, seq, prev;
567 rtx delay_list = 0;
568 int i;
569 int had_barrier = 0;
571 /* We first must find the insn containing the SEQUENCE with INSN in its
572 delay slot. Do this by finding an insn, TRIAL, where
573 PREV_INSN (NEXT_INSN (TRIAL)) != TRIAL. */
575 for (trial = insn;
576 PREV_INSN (NEXT_INSN (trial)) == trial;
577 trial = NEXT_INSN (trial))
580 seq_insn = PREV_INSN (NEXT_INSN (trial));
581 seq = PATTERN (seq_insn);
583 if (NEXT_INSN (seq_insn) && BARRIER_P (NEXT_INSN (seq_insn)))
584 had_barrier = 1;
586 /* Create a delay list consisting of all the insns other than the one
587 we are deleting (unless we were the only one). */
588 if (XVECLEN (seq, 0) > 2)
589 for (i = 1; i < XVECLEN (seq, 0); i++)
590 if (XVECEXP (seq, 0, i) != insn)
591 delay_list = add_to_delay_list (XVECEXP (seq, 0, i), delay_list);
593 /* Delete the old SEQUENCE, re-emit the insn that used to have the delay
594 list, and rebuild the delay list if non-empty. */
595 prev = PREV_INSN (seq_insn);
596 trial = XVECEXP (seq, 0, 0);
597 delete_related_insns (seq_insn);
598 add_insn_after (trial, prev, NULL);
600 /* If there was a barrier after the old SEQUENCE, remit it. */
601 if (had_barrier)
602 emit_barrier_after (trial);
604 /* If there are any delay insns, remit them. Otherwise clear the
605 annul flag. */
606 if (delay_list)
607 trial = emit_delay_sequence (trial, delay_list, XVECLEN (seq, 0) - 2);
608 else if (JUMP_P (trial))
609 INSN_ANNULLED_BRANCH_P (trial) = 0;
611 INSN_FROM_TARGET_P (insn) = 0;
613 /* Show we need to fill this insn again. */
614 obstack_ptr_grow (&unfilled_slots_obstack, trial);
616 return trial;
619 /* Delete INSN, a JUMP_INSN. If it is a conditional jump, we must track down
620 the insn that sets CC0 for it and delete it too. */
622 static void
623 delete_scheduled_jump (rtx insn)
625 /* Delete the insn that sets cc0 for us. On machines without cc0, we could
626 delete the insn that sets the condition code, but it is hard to find it.
627 Since this case is rare anyway, don't bother trying; there would likely
628 be other insns that became dead anyway, which we wouldn't know to
629 delete. */
631 #ifdef HAVE_cc0
632 if (reg_mentioned_p (cc0_rtx, insn))
634 rtx note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
636 /* If a reg-note was found, it points to an insn to set CC0. This
637 insn is in the delay list of some other insn. So delete it from
638 the delay list it was in. */
639 if (note)
641 if (! FIND_REG_INC_NOTE (XEXP (note, 0), NULL_RTX)
642 && sets_cc0_p (PATTERN (XEXP (note, 0))) == 1)
643 delete_from_delay_slot (XEXP (note, 0));
645 else
647 /* The insn setting CC0 is our previous insn, but it may be in
648 a delay slot. It will be the last insn in the delay slot, if
649 it is. */
650 rtx trial = previous_insn (insn);
651 if (NOTE_P (trial))
652 trial = prev_nonnote_insn (trial);
653 if (sets_cc0_p (PATTERN (trial)) != 1
654 || FIND_REG_INC_NOTE (trial, NULL_RTX))
655 return;
656 if (PREV_INSN (NEXT_INSN (trial)) == trial)
657 delete_related_insns (trial);
658 else
659 delete_from_delay_slot (trial);
662 #endif
664 delete_related_insns (insn);
667 /* Counters for delay-slot filling. */
669 #define NUM_REORG_FUNCTIONS 2
670 #define MAX_DELAY_HISTOGRAM 3
671 #define MAX_REORG_PASSES 2
673 static int num_insns_needing_delays[NUM_REORG_FUNCTIONS][MAX_REORG_PASSES];
675 static int num_filled_delays[NUM_REORG_FUNCTIONS][MAX_DELAY_HISTOGRAM+1][MAX_REORG_PASSES];
677 static int reorg_pass_number;
679 static void
680 note_delay_statistics (int slots_filled, int index)
682 num_insns_needing_delays[index][reorg_pass_number]++;
683 if (slots_filled > MAX_DELAY_HISTOGRAM)
684 slots_filled = MAX_DELAY_HISTOGRAM;
685 num_filled_delays[index][slots_filled][reorg_pass_number]++;
688 #if defined(ANNUL_IFFALSE_SLOTS) || defined(ANNUL_IFTRUE_SLOTS)
690 /* Optimize the following cases:
692 1. When a conditional branch skips over only one instruction,
693 use an annulling branch and put that insn in the delay slot.
694 Use either a branch that annuls when the condition if true or
695 invert the test with a branch that annuls when the condition is
696 false. This saves insns, since otherwise we must copy an insn
697 from the L1 target.
699 (orig) (skip) (otherwise)
700 Bcc.n L1 Bcc',a L1 Bcc,a L1'
701 insn insn insn2
702 L1: L1: L1:
703 insn2 insn2 insn2
704 insn3 insn3 L1':
705 insn3
707 2. When a conditional branch skips over only one instruction,
708 and after that, it unconditionally branches somewhere else,
709 perform the similar optimization. This saves executing the
710 second branch in the case where the inverted condition is true.
712 Bcc.n L1 Bcc',a L2
713 insn insn
714 L1: L1:
715 Bra L2 Bra L2
717 INSN is a JUMP_INSN.
719 This should be expanded to skip over N insns, where N is the number
720 of delay slots required. */
722 static rtx
723 optimize_skip (rtx insn)
725 rtx trial = next_nonnote_insn (insn);
726 rtx next_trial = next_active_insn (trial);
727 rtx delay_list = 0;
728 int flags;
730 flags = get_jump_flags (insn, JUMP_LABEL (insn));
732 if (trial == 0
733 || !NONJUMP_INSN_P (trial)
734 || GET_CODE (PATTERN (trial)) == SEQUENCE
735 || recog_memoized (trial) < 0
736 || (! eligible_for_annul_false (insn, 0, trial, flags)
737 && ! eligible_for_annul_true (insn, 0, trial, flags))
738 || can_throw_internal (trial))
739 return 0;
741 /* There are two cases where we are just executing one insn (we assume
742 here that a branch requires only one insn; this should be generalized
743 at some point): Where the branch goes around a single insn or where
744 we have one insn followed by a branch to the same label we branch to.
745 In both of these cases, inverting the jump and annulling the delay
746 slot give the same effect in fewer insns. */
747 if (next_trial == next_active_insn (JUMP_LABEL (insn))
748 || (next_trial != 0
749 && simplejump_or_return_p (next_trial)
750 && JUMP_LABEL (insn) == JUMP_LABEL (next_trial)))
752 if (eligible_for_annul_false (insn, 0, trial, flags))
754 if (invert_jump (insn, JUMP_LABEL (insn), 1))
755 INSN_FROM_TARGET_P (trial) = 1;
756 else if (! eligible_for_annul_true (insn, 0, trial, flags))
757 return 0;
760 delay_list = add_to_delay_list (trial, NULL_RTX);
761 next_trial = next_active_insn (trial);
762 update_block (trial, trial);
763 delete_related_insns (trial);
765 /* Also, if we are targeting an unconditional
766 branch, thread our jump to the target of that branch. Don't
767 change this into a RETURN here, because it may not accept what
768 we have in the delay slot. We'll fix this up later. */
769 if (next_trial && simplejump_or_return_p (next_trial))
771 rtx target_label = JUMP_LABEL (next_trial);
772 if (ANY_RETURN_P (target_label))
773 target_label = find_end_label (target_label);
775 if (target_label)
777 /* Recompute the flags based on TARGET_LABEL since threading
778 the jump to TARGET_LABEL may change the direction of the
779 jump (which may change the circumstances in which the
780 delay slot is nullified). */
781 flags = get_jump_flags (insn, target_label);
782 if (eligible_for_annul_true (insn, 0, trial, flags))
783 reorg_redirect_jump (insn, target_label);
787 INSN_ANNULLED_BRANCH_P (insn) = 1;
790 return delay_list;
792 #endif
794 /* Encode and return branch direction and prediction information for
795 INSN assuming it will jump to LABEL.
797 Non conditional branches return no direction information and
798 are predicted as very likely taken. */
800 static int
801 get_jump_flags (rtx insn, rtx label)
803 int flags;
805 /* get_jump_flags can be passed any insn with delay slots, these may
806 be INSNs, CALL_INSNs, or JUMP_INSNs. Only JUMP_INSNs have branch
807 direction information, and only if they are conditional jumps.
809 If LABEL is a return, then there is no way to determine the branch
810 direction. */
811 if (JUMP_P (insn)
812 && (condjump_p (insn) || condjump_in_parallel_p (insn))
813 && !ANY_RETURN_P (label)
814 && INSN_UID (insn) <= max_uid
815 && INSN_UID (label) <= max_uid)
816 flags
817 = (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)])
818 ? ATTR_FLAG_forward : ATTR_FLAG_backward;
819 /* No valid direction information. */
820 else
821 flags = 0;
823 return flags;
826 /* Return truth value of the statement that this branch
827 is mostly taken. If we think that the branch is extremely likely
828 to be taken, we return 2. If the branch is slightly more likely to be
829 taken, return 1. If the branch is slightly less likely to be taken,
830 return 0 and if the branch is highly unlikely to be taken, return -1. */
832 static int
833 mostly_true_jump (rtx jump_insn)
835 /* If branch probabilities are available, then use that number since it
836 always gives a correct answer. */
837 rtx note = find_reg_note (jump_insn, REG_BR_PROB, 0);
838 if (note)
840 int prob = INTVAL (XEXP (note, 0));
842 if (prob >= REG_BR_PROB_BASE * 9 / 10)
843 return 2;
844 else if (prob >= REG_BR_PROB_BASE / 2)
845 return 1;
846 else if (prob >= REG_BR_PROB_BASE / 10)
847 return 0;
848 else
849 return -1;
852 /* If there is no note, assume branches are not taken.
853 This should be rare. */
854 return 0;
857 /* Return the condition under which INSN will branch to TARGET. If TARGET
858 is zero, return the condition under which INSN will return. If INSN is
859 an unconditional branch, return const_true_rtx. If INSN isn't a simple
860 type of jump, or it doesn't go to TARGET, return 0. */
862 static rtx
863 get_branch_condition (rtx insn, rtx target)
865 rtx pat = PATTERN (insn);
866 rtx src;
868 if (condjump_in_parallel_p (insn))
869 pat = XVECEXP (pat, 0, 0);
871 if (ANY_RETURN_P (pat) && pat == target)
872 return const_true_rtx;
874 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
875 return 0;
877 src = SET_SRC (pat);
878 if (GET_CODE (src) == LABEL_REF && XEXP (src, 0) == target)
879 return const_true_rtx;
881 else if (GET_CODE (src) == IF_THEN_ELSE
882 && XEXP (src, 2) == pc_rtx
883 && ((GET_CODE (XEXP (src, 1)) == LABEL_REF
884 && XEXP (XEXP (src, 1), 0) == target)
885 || (ANY_RETURN_P (XEXP (src, 1)) && XEXP (src, 1) == target)))
886 return XEXP (src, 0);
888 else if (GET_CODE (src) == IF_THEN_ELSE
889 && XEXP (src, 1) == pc_rtx
890 && ((GET_CODE (XEXP (src, 2)) == LABEL_REF
891 && XEXP (XEXP (src, 2), 0) == target)
892 || (ANY_RETURN_P (XEXP (src, 2)) && XEXP (src, 2) == target)))
894 enum rtx_code rev;
895 rev = reversed_comparison_code (XEXP (src, 0), insn);
896 if (rev != UNKNOWN)
897 return gen_rtx_fmt_ee (rev, GET_MODE (XEXP (src, 0)),
898 XEXP (XEXP (src, 0), 0),
899 XEXP (XEXP (src, 0), 1));
902 return 0;
905 /* Return nonzero if CONDITION is more strict than the condition of
906 INSN, i.e., if INSN will always branch if CONDITION is true. */
908 static int
909 condition_dominates_p (rtx condition, rtx insn)
911 rtx other_condition = get_branch_condition (insn, JUMP_LABEL (insn));
912 enum rtx_code code = GET_CODE (condition);
913 enum rtx_code other_code;
915 if (rtx_equal_p (condition, other_condition)
916 || other_condition == const_true_rtx)
917 return 1;
919 else if (condition == const_true_rtx || other_condition == 0)
920 return 0;
922 other_code = GET_CODE (other_condition);
923 if (GET_RTX_LENGTH (code) != 2 || GET_RTX_LENGTH (other_code) != 2
924 || ! rtx_equal_p (XEXP (condition, 0), XEXP (other_condition, 0))
925 || ! rtx_equal_p (XEXP (condition, 1), XEXP (other_condition, 1)))
926 return 0;
928 return comparison_dominates_p (code, other_code);
931 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
932 any insns already in the delay slot of JUMP. */
934 static int
935 redirect_with_delay_slots_safe_p (rtx jump, rtx newlabel, rtx seq)
937 int flags, i;
938 rtx pat = PATTERN (seq);
940 /* Make sure all the delay slots of this jump would still
941 be valid after threading the jump. If they are still
942 valid, then return nonzero. */
944 flags = get_jump_flags (jump, newlabel);
945 for (i = 1; i < XVECLEN (pat, 0); i++)
946 if (! (
947 #ifdef ANNUL_IFFALSE_SLOTS
948 (INSN_ANNULLED_BRANCH_P (jump)
949 && INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
950 ? eligible_for_annul_false (jump, i - 1,
951 XVECEXP (pat, 0, i), flags) :
952 #endif
953 #ifdef ANNUL_IFTRUE_SLOTS
954 (INSN_ANNULLED_BRANCH_P (jump)
955 && ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
956 ? eligible_for_annul_true (jump, i - 1,
957 XVECEXP (pat, 0, i), flags) :
958 #endif
959 eligible_for_delay (jump, i - 1, XVECEXP (pat, 0, i), flags)))
960 break;
962 return (i == XVECLEN (pat, 0));
965 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
966 any insns we wish to place in the delay slot of JUMP. */
968 static int
969 redirect_with_delay_list_safe_p (rtx jump, rtx newlabel, rtx delay_list)
971 int flags, i;
972 rtx li;
974 /* Make sure all the insns in DELAY_LIST would still be
975 valid after threading the jump. If they are still
976 valid, then return nonzero. */
978 flags = get_jump_flags (jump, newlabel);
979 for (li = delay_list, i = 0; li; li = XEXP (li, 1), i++)
980 if (! (
981 #ifdef ANNUL_IFFALSE_SLOTS
982 (INSN_ANNULLED_BRANCH_P (jump)
983 && INSN_FROM_TARGET_P (XEXP (li, 0)))
984 ? eligible_for_annul_false (jump, i, XEXP (li, 0), flags) :
985 #endif
986 #ifdef ANNUL_IFTRUE_SLOTS
987 (INSN_ANNULLED_BRANCH_P (jump)
988 && ! INSN_FROM_TARGET_P (XEXP (li, 0)))
989 ? eligible_for_annul_true (jump, i, XEXP (li, 0), flags) :
990 #endif
991 eligible_for_delay (jump, i, XEXP (li, 0), flags)))
992 break;
994 return (li == NULL);
997 /* DELAY_LIST is a list of insns that have already been placed into delay
998 slots. See if all of them have the same annulling status as ANNUL_TRUE_P.
999 If not, return 0; otherwise return 1. */
1001 static int
1002 check_annul_list_true_false (int annul_true_p, rtx delay_list)
1004 rtx temp;
1006 if (delay_list)
1008 for (temp = delay_list; temp; temp = XEXP (temp, 1))
1010 rtx trial = XEXP (temp, 0);
1012 if ((annul_true_p && INSN_FROM_TARGET_P (trial))
1013 || (!annul_true_p && !INSN_FROM_TARGET_P (trial)))
1014 return 0;
1018 return 1;
1021 /* INSN branches to an insn whose pattern SEQ is a SEQUENCE. Given that
1022 the condition tested by INSN is CONDITION and the resources shown in
1023 OTHER_NEEDED are needed after INSN, see whether INSN can take all the insns
1024 from SEQ's delay list, in addition to whatever insns it may execute
1025 (in DELAY_LIST). SETS and NEEDED are denote resources already set and
1026 needed while searching for delay slot insns. Return the concatenated
1027 delay list if possible, otherwise, return 0.
1029 SLOTS_TO_FILL is the total number of slots required by INSN, and
1030 PSLOTS_FILLED points to the number filled so far (also the number of
1031 insns in DELAY_LIST). It is updated with the number that have been
1032 filled from the SEQUENCE, if any.
1034 PANNUL_P points to a nonzero value if we already know that we need
1035 to annul INSN. If this routine determines that annulling is needed,
1036 it may set that value nonzero.
1038 PNEW_THREAD points to a location that is to receive the place at which
1039 execution should continue. */
1041 static rtx
1042 steal_delay_list_from_target (rtx insn, rtx condition, rtx seq,
1043 rtx delay_list, struct resources *sets,
1044 struct resources *needed,
1045 struct resources *other_needed,
1046 int slots_to_fill, int *pslots_filled,
1047 int *pannul_p, rtx *pnew_thread)
1049 rtx temp;
1050 int slots_remaining = slots_to_fill - *pslots_filled;
1051 int total_slots_filled = *pslots_filled;
1052 rtx new_delay_list = 0;
1053 int must_annul = *pannul_p;
1054 int used_annul = 0;
1055 int i;
1056 struct resources cc_set;
1058 /* We can't do anything if there are more delay slots in SEQ than we
1059 can handle, or if we don't know that it will be a taken branch.
1060 We know that it will be a taken branch if it is either an unconditional
1061 branch or a conditional branch with a stricter branch condition.
1063 Also, exit if the branch has more than one set, since then it is computing
1064 other results that can't be ignored, e.g. the HPPA mov&branch instruction.
1065 ??? It may be possible to move other sets into INSN in addition to
1066 moving the instructions in the delay slots.
1068 We can not steal the delay list if one of the instructions in the
1069 current delay_list modifies the condition codes and the jump in the
1070 sequence is a conditional jump. We can not do this because we can
1071 not change the direction of the jump because the condition codes
1072 will effect the direction of the jump in the sequence. */
1074 CLEAR_RESOURCE (&cc_set);
1075 for (temp = delay_list; temp; temp = XEXP (temp, 1))
1077 rtx trial = XEXP (temp, 0);
1079 mark_set_resources (trial, &cc_set, 0, MARK_SRC_DEST_CALL);
1080 if (insn_references_resource_p (XVECEXP (seq , 0, 0), &cc_set, false))
1081 return delay_list;
1084 if (XVECLEN (seq, 0) - 1 > slots_remaining
1085 || ! condition_dominates_p (condition, XVECEXP (seq, 0, 0))
1086 || ! single_set (XVECEXP (seq, 0, 0)))
1087 return delay_list;
1089 #ifdef MD_CAN_REDIRECT_BRANCH
1090 /* On some targets, branches with delay slots can have a limited
1091 displacement. Give the back end a chance to tell us we can't do
1092 this. */
1093 if (! MD_CAN_REDIRECT_BRANCH (insn, XVECEXP (seq, 0, 0)))
1094 return delay_list;
1095 #endif
1097 for (i = 1; i < XVECLEN (seq, 0); i++)
1099 rtx trial = XVECEXP (seq, 0, i);
1100 int flags;
1102 if (insn_references_resource_p (trial, sets, false)
1103 || insn_sets_resource_p (trial, needed, false)
1104 || insn_sets_resource_p (trial, sets, false)
1105 #ifdef HAVE_cc0
1106 /* If TRIAL sets CC0, we can't copy it, so we can't steal this
1107 delay list. */
1108 || find_reg_note (trial, REG_CC_USER, NULL_RTX)
1109 #endif
1110 /* If TRIAL is from the fallthrough code of an annulled branch insn
1111 in SEQ, we cannot use it. */
1112 || (INSN_ANNULLED_BRANCH_P (XVECEXP (seq, 0, 0))
1113 && ! INSN_FROM_TARGET_P (trial)))
1114 return delay_list;
1116 /* If this insn was already done (usually in a previous delay slot),
1117 pretend we put it in our delay slot. */
1118 if (redundant_insn (trial, insn, new_delay_list))
1119 continue;
1121 /* We will end up re-vectoring this branch, so compute flags
1122 based on jumping to the new label. */
1123 flags = get_jump_flags (insn, JUMP_LABEL (XVECEXP (seq, 0, 0)));
1125 if (! must_annul
1126 && ((condition == const_true_rtx
1127 || (! insn_sets_resource_p (trial, other_needed, false)
1128 && ! may_trap_or_fault_p (PATTERN (trial)))))
1129 ? eligible_for_delay (insn, total_slots_filled, trial, flags)
1130 : (must_annul || (delay_list == NULL && new_delay_list == NULL))
1131 && (must_annul = 1,
1132 check_annul_list_true_false (0, delay_list)
1133 && check_annul_list_true_false (0, new_delay_list)
1134 && eligible_for_annul_false (insn, total_slots_filled,
1135 trial, flags)))
1137 if (must_annul)
1138 used_annul = 1;
1139 temp = copy_delay_slot_insn (trial);
1140 INSN_FROM_TARGET_P (temp) = 1;
1141 new_delay_list = add_to_delay_list (temp, new_delay_list);
1142 total_slots_filled++;
1144 if (--slots_remaining == 0)
1145 break;
1147 else
1148 return delay_list;
1151 /* Show the place to which we will be branching. */
1152 *pnew_thread = first_active_target_insn (JUMP_LABEL (XVECEXP (seq, 0, 0)));
1154 /* Add any new insns to the delay list and update the count of the
1155 number of slots filled. */
1156 *pslots_filled = total_slots_filled;
1157 if (used_annul)
1158 *pannul_p = 1;
1160 if (delay_list == 0)
1161 return new_delay_list;
1163 for (temp = new_delay_list; temp; temp = XEXP (temp, 1))
1164 delay_list = add_to_delay_list (XEXP (temp, 0), delay_list);
1166 return delay_list;
1169 /* Similar to steal_delay_list_from_target except that SEQ is on the
1170 fallthrough path of INSN. Here we only do something if the delay insn
1171 of SEQ is an unconditional branch. In that case we steal its delay slot
1172 for INSN since unconditional branches are much easier to fill. */
1174 static rtx
1175 steal_delay_list_from_fallthrough (rtx insn, rtx condition, rtx seq,
1176 rtx delay_list, struct resources *sets,
1177 struct resources *needed,
1178 struct resources *other_needed,
1179 int slots_to_fill, int *pslots_filled,
1180 int *pannul_p)
1182 int i;
1183 int flags;
1184 int must_annul = *pannul_p;
1185 int used_annul = 0;
1187 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1189 /* We can't do anything if SEQ's delay insn isn't an
1190 unconditional branch. */
1192 if (! simplejump_or_return_p (XVECEXP (seq, 0, 0)))
1193 return delay_list;
1195 for (i = 1; i < XVECLEN (seq, 0); i++)
1197 rtx trial = XVECEXP (seq, 0, i);
1199 /* If TRIAL sets CC0, stealing it will move it too far from the use
1200 of CC0. */
1201 if (insn_references_resource_p (trial, sets, false)
1202 || insn_sets_resource_p (trial, needed, false)
1203 || insn_sets_resource_p (trial, sets, false)
1204 #ifdef HAVE_cc0
1205 || sets_cc0_p (PATTERN (trial))
1206 #endif
1209 break;
1211 /* If this insn was already done, we don't need it. */
1212 if (redundant_insn (trial, insn, delay_list))
1214 delete_from_delay_slot (trial);
1215 continue;
1218 if (! must_annul
1219 && ((condition == const_true_rtx
1220 || (! insn_sets_resource_p (trial, other_needed, false)
1221 && ! may_trap_or_fault_p (PATTERN (trial)))))
1222 ? eligible_for_delay (insn, *pslots_filled, trial, flags)
1223 : (must_annul || delay_list == NULL) && (must_annul = 1,
1224 check_annul_list_true_false (1, delay_list)
1225 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
1227 if (must_annul)
1228 used_annul = 1;
1229 delete_from_delay_slot (trial);
1230 delay_list = add_to_delay_list (trial, delay_list);
1232 if (++(*pslots_filled) == slots_to_fill)
1233 break;
1235 else
1236 break;
1239 if (used_annul)
1240 *pannul_p = 1;
1241 return delay_list;
1244 /* Try merging insns starting at THREAD which match exactly the insns in
1245 INSN's delay list.
1247 If all insns were matched and the insn was previously annulling, the
1248 annul bit will be cleared.
1250 For each insn that is merged, if the branch is or will be non-annulling,
1251 we delete the merged insn. */
1253 static void
1254 try_merge_delay_insns (rtx insn, rtx thread)
1256 rtx trial, next_trial;
1257 rtx delay_insn = XVECEXP (PATTERN (insn), 0, 0);
1258 int annul_p = JUMP_P (delay_insn) && INSN_ANNULLED_BRANCH_P (delay_insn);
1259 int slot_number = 1;
1260 int num_slots = XVECLEN (PATTERN (insn), 0);
1261 rtx next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1262 struct resources set, needed;
1263 rtx merged_insns = 0;
1264 int i;
1265 int flags;
1267 flags = get_jump_flags (delay_insn, JUMP_LABEL (delay_insn));
1269 CLEAR_RESOURCE (&needed);
1270 CLEAR_RESOURCE (&set);
1272 /* If this is not an annulling branch, take into account anything needed in
1273 INSN's delay slot. This prevents two increments from being incorrectly
1274 folded into one. If we are annulling, this would be the correct
1275 thing to do. (The alternative, looking at things set in NEXT_TO_MATCH
1276 will essentially disable this optimization. This method is somewhat of
1277 a kludge, but I don't see a better way.) */
1278 if (! annul_p)
1279 for (i = 1 ; i < num_slots; i++)
1280 if (XVECEXP (PATTERN (insn), 0, i))
1281 mark_referenced_resources (XVECEXP (PATTERN (insn), 0, i), &needed,
1282 true);
1284 for (trial = thread; !stop_search_p (trial, 1); trial = next_trial)
1286 rtx pat = PATTERN (trial);
1287 rtx oldtrial = trial;
1289 next_trial = next_nonnote_insn (trial);
1291 /* TRIAL must be a CALL_INSN or INSN. Skip USE and CLOBBER. */
1292 if (NONJUMP_INSN_P (trial)
1293 && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER))
1294 continue;
1296 if (GET_CODE (next_to_match) == GET_CODE (trial)
1297 #ifdef HAVE_cc0
1298 /* We can't share an insn that sets cc0. */
1299 && ! sets_cc0_p (pat)
1300 #endif
1301 && ! insn_references_resource_p (trial, &set, true)
1302 && ! insn_sets_resource_p (trial, &set, true)
1303 && ! insn_sets_resource_p (trial, &needed, true)
1304 && (trial = try_split (pat, trial, 0)) != 0
1305 /* Update next_trial, in case try_split succeeded. */
1306 && (next_trial = next_nonnote_insn (trial))
1307 /* Likewise THREAD. */
1308 && (thread = oldtrial == thread ? trial : thread)
1309 && rtx_equal_p (PATTERN (next_to_match), PATTERN (trial))
1310 /* Have to test this condition if annul condition is different
1311 from (and less restrictive than) non-annulling one. */
1312 && eligible_for_delay (delay_insn, slot_number - 1, trial, flags))
1315 if (! annul_p)
1317 update_block (trial, thread);
1318 if (trial == thread)
1319 thread = next_active_insn (thread);
1321 delete_related_insns (trial);
1322 INSN_FROM_TARGET_P (next_to_match) = 0;
1324 else
1325 merged_insns = gen_rtx_INSN_LIST (VOIDmode, trial, merged_insns);
1327 if (++slot_number == num_slots)
1328 break;
1330 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1333 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
1334 mark_referenced_resources (trial, &needed, true);
1337 /* See if we stopped on a filled insn. If we did, try to see if its
1338 delay slots match. */
1339 if (slot_number != num_slots
1340 && trial && NONJUMP_INSN_P (trial)
1341 && GET_CODE (PATTERN (trial)) == SEQUENCE
1342 && !(JUMP_P (XVECEXP (PATTERN (trial), 0, 0))
1343 && INSN_ANNULLED_BRANCH_P (XVECEXP (PATTERN (trial), 0, 0))))
1345 rtx pat = PATTERN (trial);
1346 rtx filled_insn = XVECEXP (pat, 0, 0);
1348 /* Account for resources set/needed by the filled insn. */
1349 mark_set_resources (filled_insn, &set, 0, MARK_SRC_DEST_CALL);
1350 mark_referenced_resources (filled_insn, &needed, true);
1352 for (i = 1; i < XVECLEN (pat, 0); i++)
1354 rtx dtrial = XVECEXP (pat, 0, i);
1356 if (! insn_references_resource_p (dtrial, &set, true)
1357 && ! insn_sets_resource_p (dtrial, &set, true)
1358 && ! insn_sets_resource_p (dtrial, &needed, true)
1359 #ifdef HAVE_cc0
1360 && ! sets_cc0_p (PATTERN (dtrial))
1361 #endif
1362 && rtx_equal_p (PATTERN (next_to_match), PATTERN (dtrial))
1363 && eligible_for_delay (delay_insn, slot_number - 1, dtrial, flags))
1365 if (! annul_p)
1367 rtx new_rtx;
1369 update_block (dtrial, thread);
1370 new_rtx = delete_from_delay_slot (dtrial);
1371 if (INSN_DELETED_P (thread))
1372 thread = new_rtx;
1373 INSN_FROM_TARGET_P (next_to_match) = 0;
1375 else
1376 merged_insns = gen_rtx_INSN_LIST (SImode, dtrial,
1377 merged_insns);
1379 if (++slot_number == num_slots)
1380 break;
1382 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1384 else
1386 /* Keep track of the set/referenced resources for the delay
1387 slots of any trial insns we encounter. */
1388 mark_set_resources (dtrial, &set, 0, MARK_SRC_DEST_CALL);
1389 mark_referenced_resources (dtrial, &needed, true);
1394 /* If all insns in the delay slot have been matched and we were previously
1395 annulling the branch, we need not any more. In that case delete all the
1396 merged insns. Also clear the INSN_FROM_TARGET_P bit of each insn in
1397 the delay list so that we know that it isn't only being used at the
1398 target. */
1399 if (slot_number == num_slots && annul_p)
1401 for (; merged_insns; merged_insns = XEXP (merged_insns, 1))
1403 if (GET_MODE (merged_insns) == SImode)
1405 rtx new_rtx;
1407 update_block (XEXP (merged_insns, 0), thread);
1408 new_rtx = delete_from_delay_slot (XEXP (merged_insns, 0));
1409 if (INSN_DELETED_P (thread))
1410 thread = new_rtx;
1412 else
1414 update_block (XEXP (merged_insns, 0), thread);
1415 delete_related_insns (XEXP (merged_insns, 0));
1419 INSN_ANNULLED_BRANCH_P (delay_insn) = 0;
1421 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1422 INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i)) = 0;
1426 /* See if INSN is redundant with an insn in front of TARGET. Often this
1427 is called when INSN is a candidate for a delay slot of TARGET.
1428 DELAY_LIST are insns that will be placed in delay slots of TARGET in front
1429 of INSN. Often INSN will be redundant with an insn in a delay slot of
1430 some previous insn. This happens when we have a series of branches to the
1431 same label; in that case the first insn at the target might want to go
1432 into each of the delay slots.
1434 If we are not careful, this routine can take up a significant fraction
1435 of the total compilation time (4%), but only wins rarely. Hence we
1436 speed this routine up by making two passes. The first pass goes back
1437 until it hits a label and sees if it finds an insn with an identical
1438 pattern. Only in this (relatively rare) event does it check for
1439 data conflicts.
1441 We do not split insns we encounter. This could cause us not to find a
1442 redundant insn, but the cost of splitting seems greater than the possible
1443 gain in rare cases. */
1445 static rtx
1446 redundant_insn (rtx insn, rtx target, rtx delay_list)
1448 rtx target_main = target;
1449 rtx ipat = PATTERN (insn);
1450 rtx trial, pat;
1451 struct resources needed, set;
1452 int i;
1453 unsigned insns_to_search;
1455 /* If INSN has any REG_UNUSED notes, it can't match anything since we
1456 are allowed to not actually assign to such a register. */
1457 if (find_reg_note (insn, REG_UNUSED, NULL_RTX) != 0)
1458 return 0;
1460 /* Scan backwards looking for a match. */
1461 for (trial = PREV_INSN (target),
1462 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1463 trial && insns_to_search > 0;
1464 trial = PREV_INSN (trial))
1466 if (LABEL_P (trial))
1467 return 0;
1469 if (!INSN_P (trial))
1470 continue;
1471 --insns_to_search;
1473 pat = PATTERN (trial);
1474 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1475 continue;
1477 if (GET_CODE (pat) == SEQUENCE)
1479 /* Stop for a CALL and its delay slots because it is difficult to
1480 track its resource needs correctly. */
1481 if (CALL_P (XVECEXP (pat, 0, 0)))
1482 return 0;
1484 /* Stop for an INSN or JUMP_INSN with delayed effects and its delay
1485 slots because it is difficult to track its resource needs
1486 correctly. */
1488 #ifdef INSN_SETS_ARE_DELAYED
1489 if (INSN_SETS_ARE_DELAYED (XVECEXP (pat, 0, 0)))
1490 return 0;
1491 #endif
1493 #ifdef INSN_REFERENCES_ARE_DELAYED
1494 if (INSN_REFERENCES_ARE_DELAYED (XVECEXP (pat, 0, 0)))
1495 return 0;
1496 #endif
1498 /* See if any of the insns in the delay slot match, updating
1499 resource requirements as we go. */
1500 for (i = XVECLEN (pat, 0) - 1; i > 0; i--)
1501 if (GET_CODE (XVECEXP (pat, 0, i)) == GET_CODE (insn)
1502 && rtx_equal_p (PATTERN (XVECEXP (pat, 0, i)), ipat)
1503 && ! find_reg_note (XVECEXP (pat, 0, i), REG_UNUSED, NULL_RTX))
1504 break;
1506 /* If found a match, exit this loop early. */
1507 if (i > 0)
1508 break;
1511 else if (GET_CODE (trial) == GET_CODE (insn) && rtx_equal_p (pat, ipat)
1512 && ! find_reg_note (trial, REG_UNUSED, NULL_RTX))
1513 break;
1516 /* If we didn't find an insn that matches, return 0. */
1517 if (trial == 0)
1518 return 0;
1520 /* See what resources this insn sets and needs. If they overlap, or
1521 if this insn references CC0, it can't be redundant. */
1523 CLEAR_RESOURCE (&needed);
1524 CLEAR_RESOURCE (&set);
1525 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
1526 mark_referenced_resources (insn, &needed, true);
1528 /* If TARGET is a SEQUENCE, get the main insn. */
1529 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1530 target_main = XVECEXP (PATTERN (target), 0, 0);
1532 if (resource_conflicts_p (&needed, &set)
1533 #ifdef HAVE_cc0
1534 || reg_mentioned_p (cc0_rtx, ipat)
1535 #endif
1536 /* The insn requiring the delay may not set anything needed or set by
1537 INSN. */
1538 || insn_sets_resource_p (target_main, &needed, true)
1539 || insn_sets_resource_p (target_main, &set, true))
1540 return 0;
1542 /* Insns we pass may not set either NEEDED or SET, so merge them for
1543 simpler tests. */
1544 needed.memory |= set.memory;
1545 needed.unch_memory |= set.unch_memory;
1546 IOR_HARD_REG_SET (needed.regs, set.regs);
1548 /* This insn isn't redundant if it conflicts with an insn that either is
1549 or will be in a delay slot of TARGET. */
1551 while (delay_list)
1553 if (insn_sets_resource_p (XEXP (delay_list, 0), &needed, true))
1554 return 0;
1555 delay_list = XEXP (delay_list, 1);
1558 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1559 for (i = 1; i < XVECLEN (PATTERN (target), 0); i++)
1560 if (insn_sets_resource_p (XVECEXP (PATTERN (target), 0, i), &needed,
1561 true))
1562 return 0;
1564 /* Scan backwards until we reach a label or an insn that uses something
1565 INSN sets or sets something insn uses or sets. */
1567 for (trial = PREV_INSN (target),
1568 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1569 trial && !LABEL_P (trial) && insns_to_search > 0;
1570 trial = PREV_INSN (trial))
1572 if (!INSN_P (trial))
1573 continue;
1574 --insns_to_search;
1576 pat = PATTERN (trial);
1577 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1578 continue;
1580 if (GET_CODE (pat) == SEQUENCE)
1582 bool annul_p = false;
1583 rtx control = XVECEXP (pat, 0, 0);
1585 /* If this is a CALL_INSN and its delay slots, it is hard to track
1586 the resource needs properly, so give up. */
1587 if (CALL_P (control))
1588 return 0;
1590 /* If this is an INSN or JUMP_INSN with delayed effects, it
1591 is hard to track the resource needs properly, so give up. */
1593 #ifdef INSN_SETS_ARE_DELAYED
1594 if (INSN_SETS_ARE_DELAYED (control))
1595 return 0;
1596 #endif
1598 #ifdef INSN_REFERENCES_ARE_DELAYED
1599 if (INSN_REFERENCES_ARE_DELAYED (control))
1600 return 0;
1601 #endif
1603 if (JUMP_P (control))
1604 annul_p = INSN_ANNULLED_BRANCH_P (control);
1606 /* See if any of the insns in the delay slot match, updating
1607 resource requirements as we go. */
1608 for (i = XVECLEN (pat, 0) - 1; i > 0; i--)
1610 rtx candidate = XVECEXP (pat, 0, i);
1612 /* If an insn will be annulled if the branch is false, it isn't
1613 considered as a possible duplicate insn. */
1614 if (rtx_equal_p (PATTERN (candidate), ipat)
1615 && ! (annul_p && INSN_FROM_TARGET_P (candidate)))
1617 /* Show that this insn will be used in the sequel. */
1618 INSN_FROM_TARGET_P (candidate) = 0;
1619 return candidate;
1622 /* Unless this is an annulled insn from the target of a branch,
1623 we must stop if it sets anything needed or set by INSN. */
1624 if ((!annul_p || !INSN_FROM_TARGET_P (candidate))
1625 && insn_sets_resource_p (candidate, &needed, true))
1626 return 0;
1629 /* If the insn requiring the delay slot conflicts with INSN, we
1630 must stop. */
1631 if (insn_sets_resource_p (control, &needed, true))
1632 return 0;
1634 else
1636 /* See if TRIAL is the same as INSN. */
1637 pat = PATTERN (trial);
1638 if (rtx_equal_p (pat, ipat))
1639 return trial;
1641 /* Can't go any further if TRIAL conflicts with INSN. */
1642 if (insn_sets_resource_p (trial, &needed, true))
1643 return 0;
1647 return 0;
1650 /* Return 1 if THREAD can only be executed in one way. If LABEL is nonzero,
1651 it is the target of the branch insn being scanned. If ALLOW_FALLTHROUGH
1652 is nonzero, we are allowed to fall into this thread; otherwise, we are
1653 not.
1655 If LABEL is used more than one or we pass a label other than LABEL before
1656 finding an active insn, we do not own this thread. */
1658 static int
1659 own_thread_p (rtx thread, rtx label, int allow_fallthrough)
1661 rtx active_insn;
1662 rtx insn;
1664 /* We don't own the function end. */
1665 if (thread == 0 || ANY_RETURN_P (thread))
1666 return 0;
1668 /* Get the first active insn, or THREAD, if it is an active insn. */
1669 active_insn = next_active_insn (PREV_INSN (thread));
1671 for (insn = thread; insn != active_insn; insn = NEXT_INSN (insn))
1672 if (LABEL_P (insn)
1673 && (insn != label || LABEL_NUSES (insn) != 1))
1674 return 0;
1676 if (allow_fallthrough)
1677 return 1;
1679 /* Ensure that we reach a BARRIER before any insn or label. */
1680 for (insn = prev_nonnote_insn (thread);
1681 insn == 0 || !BARRIER_P (insn);
1682 insn = prev_nonnote_insn (insn))
1683 if (insn == 0
1684 || LABEL_P (insn)
1685 || (NONJUMP_INSN_P (insn)
1686 && GET_CODE (PATTERN (insn)) != USE
1687 && GET_CODE (PATTERN (insn)) != CLOBBER))
1688 return 0;
1690 return 1;
1693 /* Called when INSN is being moved from a location near the target of a jump.
1694 We leave a marker of the form (use (INSN)) immediately in front
1695 of WHERE for mark_target_live_regs. These markers will be deleted when
1696 reorg finishes.
1698 We used to try to update the live status of registers if WHERE is at
1699 the start of a basic block, but that can't work since we may remove a
1700 BARRIER in relax_delay_slots. */
1702 static void
1703 update_block (rtx insn, rtx where)
1705 /* Ignore if this was in a delay slot and it came from the target of
1706 a branch. */
1707 if (INSN_FROM_TARGET_P (insn))
1708 return;
1710 emit_insn_before (gen_rtx_USE (VOIDmode, insn), where);
1712 /* INSN might be making a value live in a block where it didn't use to
1713 be. So recompute liveness information for this block. */
1715 incr_ticks_for_insn (insn);
1718 /* Similar to REDIRECT_JUMP except that we update the BB_TICKS entry for
1719 the basic block containing the jump. */
1721 static int
1722 reorg_redirect_jump (rtx jump, rtx nlabel)
1724 incr_ticks_for_insn (jump);
1725 return redirect_jump (jump, nlabel, 1);
1728 /* Called when INSN is being moved forward into a delay slot of DELAYED_INSN.
1729 We check every instruction between INSN and DELAYED_INSN for REG_DEAD notes
1730 that reference values used in INSN. If we find one, then we move the
1731 REG_DEAD note to INSN.
1733 This is needed to handle the case where a later insn (after INSN) has a
1734 REG_DEAD note for a register used by INSN, and this later insn subsequently
1735 gets moved before a CODE_LABEL because it is a redundant insn. In this
1736 case, mark_target_live_regs may be confused into thinking the register
1737 is dead because it sees a REG_DEAD note immediately before a CODE_LABEL. */
1739 static void
1740 update_reg_dead_notes (rtx insn, rtx delayed_insn)
1742 rtx p, link, next;
1744 for (p = next_nonnote_insn (insn); p != delayed_insn;
1745 p = next_nonnote_insn (p))
1746 for (link = REG_NOTES (p); link; link = next)
1748 next = XEXP (link, 1);
1750 if (REG_NOTE_KIND (link) != REG_DEAD
1751 || !REG_P (XEXP (link, 0)))
1752 continue;
1754 if (reg_referenced_p (XEXP (link, 0), PATTERN (insn)))
1756 /* Move the REG_DEAD note from P to INSN. */
1757 remove_note (p, link);
1758 XEXP (link, 1) = REG_NOTES (insn);
1759 REG_NOTES (insn) = link;
1764 /* Called when an insn redundant with start_insn is deleted. If there
1765 is a REG_DEAD note for the target of start_insn between start_insn
1766 and stop_insn, then the REG_DEAD note needs to be deleted since the
1767 value no longer dies there.
1769 If the REG_DEAD note isn't deleted, then mark_target_live_regs may be
1770 confused into thinking the register is dead. */
1772 static void
1773 fix_reg_dead_note (rtx start_insn, rtx stop_insn)
1775 rtx p, link, next;
1777 for (p = next_nonnote_insn (start_insn); p != stop_insn;
1778 p = next_nonnote_insn (p))
1779 for (link = REG_NOTES (p); link; link = next)
1781 next = XEXP (link, 1);
1783 if (REG_NOTE_KIND (link) != REG_DEAD
1784 || !REG_P (XEXP (link, 0)))
1785 continue;
1787 if (reg_set_p (XEXP (link, 0), PATTERN (start_insn)))
1789 remove_note (p, link);
1790 return;
1795 /* Delete any REG_UNUSED notes that exist on INSN but not on REDUNDANT_INSN.
1797 This handles the case of udivmodXi4 instructions which optimize their
1798 output depending on whether any REG_UNUSED notes are present.
1799 we must make sure that INSN calculates as many results as REDUNDANT_INSN
1800 does. */
1802 static void
1803 update_reg_unused_notes (rtx insn, rtx redundant_insn)
1805 rtx link, next;
1807 for (link = REG_NOTES (insn); link; link = next)
1809 next = XEXP (link, 1);
1811 if (REG_NOTE_KIND (link) != REG_UNUSED
1812 || !REG_P (XEXP (link, 0)))
1813 continue;
1815 if (! find_regno_note (redundant_insn, REG_UNUSED,
1816 REGNO (XEXP (link, 0))))
1817 remove_note (insn, link);
1821 /* Return the label before INSN, or put a new label there. */
1823 static rtx
1824 get_label_before (rtx insn)
1826 rtx label;
1828 /* Find an existing label at this point
1829 or make a new one if there is none. */
1830 label = prev_nonnote_insn (insn);
1832 if (label == 0 || !LABEL_P (label))
1834 rtx prev = PREV_INSN (insn);
1836 label = gen_label_rtx ();
1837 emit_label_after (label, prev);
1838 LABEL_NUSES (label) = 0;
1840 return label;
1843 /* Scan a function looking for insns that need a delay slot and find insns to
1844 put into the delay slot.
1846 NON_JUMPS_P is nonzero if we are to only try to fill non-jump insns (such
1847 as calls). We do these first since we don't want jump insns (that are
1848 easier to fill) to get the only insns that could be used for non-jump insns.
1849 When it is zero, only try to fill JUMP_INSNs.
1851 When slots are filled in this manner, the insns (including the
1852 delay_insn) are put together in a SEQUENCE rtx. In this fashion,
1853 it is possible to tell whether a delay slot has really been filled
1854 or not. `final' knows how to deal with this, by communicating
1855 through FINAL_SEQUENCE. */
1857 static void
1858 fill_simple_delay_slots (int non_jumps_p)
1860 rtx insn, pat, trial, next_trial;
1861 int i;
1862 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
1863 struct resources needed, set;
1864 int slots_to_fill, slots_filled;
1865 rtx delay_list;
1867 for (i = 0; i < num_unfilled_slots; i++)
1869 int flags;
1870 /* Get the next insn to fill. If it has already had any slots assigned,
1871 we can't do anything with it. Maybe we'll improve this later. */
1873 insn = unfilled_slots_base[i];
1874 if (insn == 0
1875 || INSN_DELETED_P (insn)
1876 || (NONJUMP_INSN_P (insn)
1877 && GET_CODE (PATTERN (insn)) == SEQUENCE)
1878 || (JUMP_P (insn) && non_jumps_p)
1879 || (!JUMP_P (insn) && ! non_jumps_p))
1880 continue;
1882 /* It may have been that this insn used to need delay slots, but
1883 now doesn't; ignore in that case. This can happen, for example,
1884 on the HP PA RISC, where the number of delay slots depends on
1885 what insns are nearby. */
1886 slots_to_fill = num_delay_slots (insn);
1888 /* Some machine description have defined instructions to have
1889 delay slots only in certain circumstances which may depend on
1890 nearby insns (which change due to reorg's actions).
1892 For example, the PA port normally has delay slots for unconditional
1893 jumps.
1895 However, the PA port claims such jumps do not have a delay slot
1896 if they are immediate successors of certain CALL_INSNs. This
1897 allows the port to favor filling the delay slot of the call with
1898 the unconditional jump. */
1899 if (slots_to_fill == 0)
1900 continue;
1902 /* This insn needs, or can use, some delay slots. SLOTS_TO_FILL
1903 says how many. After initialization, first try optimizing
1905 call _foo call _foo
1906 nop add %o7,.-L1,%o7
1907 b,a L1
1910 If this case applies, the delay slot of the call is filled with
1911 the unconditional jump. This is done first to avoid having the
1912 delay slot of the call filled in the backward scan. Also, since
1913 the unconditional jump is likely to also have a delay slot, that
1914 insn must exist when it is subsequently scanned.
1916 This is tried on each insn with delay slots as some machines
1917 have insns which perform calls, but are not represented as
1918 CALL_INSNs. */
1920 slots_filled = 0;
1921 delay_list = 0;
1923 if (JUMP_P (insn))
1924 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1925 else
1926 flags = get_jump_flags (insn, NULL_RTX);
1928 if ((trial = next_active_insn (insn))
1929 && JUMP_P (trial)
1930 && simplejump_p (trial)
1931 && eligible_for_delay (insn, slots_filled, trial, flags)
1932 && no_labels_between_p (insn, trial)
1933 && ! can_throw_internal (trial))
1935 rtx *tmp;
1936 slots_filled++;
1937 delay_list = add_to_delay_list (trial, delay_list);
1939 /* TRIAL may have had its delay slot filled, then unfilled. When
1940 the delay slot is unfilled, TRIAL is placed back on the unfilled
1941 slots obstack. Unfortunately, it is placed on the end of the
1942 obstack, not in its original location. Therefore, we must search
1943 from entry i + 1 to the end of the unfilled slots obstack to
1944 try and find TRIAL. */
1945 tmp = &unfilled_slots_base[i + 1];
1946 while (*tmp != trial && tmp != unfilled_slots_next)
1947 tmp++;
1949 /* Remove the unconditional jump from consideration for delay slot
1950 filling and unthread it. */
1951 if (*tmp == trial)
1952 *tmp = 0;
1954 rtx next = NEXT_INSN (trial);
1955 rtx prev = PREV_INSN (trial);
1956 if (prev)
1957 NEXT_INSN (prev) = next;
1958 if (next)
1959 PREV_INSN (next) = prev;
1963 /* Now, scan backwards from the insn to search for a potential
1964 delay-slot candidate. Stop searching when a label or jump is hit.
1966 For each candidate, if it is to go into the delay slot (moved
1967 forward in execution sequence), it must not need or set any resources
1968 that were set by later insns and must not set any resources that
1969 are needed for those insns.
1971 The delay slot insn itself sets resources unless it is a call
1972 (in which case the called routine, not the insn itself, is doing
1973 the setting). */
1975 if (slots_filled < slots_to_fill)
1977 CLEAR_RESOURCE (&needed);
1978 CLEAR_RESOURCE (&set);
1979 mark_set_resources (insn, &set, 0, MARK_SRC_DEST);
1980 mark_referenced_resources (insn, &needed, false);
1982 for (trial = prev_nonnote_insn (insn); ! stop_search_p (trial, 1);
1983 trial = next_trial)
1985 next_trial = prev_nonnote_insn (trial);
1987 /* This must be an INSN or CALL_INSN. */
1988 pat = PATTERN (trial);
1990 /* Stand-alone USE and CLOBBER are just for flow. */
1991 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1992 continue;
1994 /* Check for resource conflict first, to avoid unnecessary
1995 splitting. */
1996 if (! insn_references_resource_p (trial, &set, true)
1997 && ! insn_sets_resource_p (trial, &set, true)
1998 && ! insn_sets_resource_p (trial, &needed, true)
1999 #ifdef HAVE_cc0
2000 /* Can't separate set of cc0 from its use. */
2001 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat))
2002 #endif
2003 && ! can_throw_internal (trial))
2005 trial = try_split (pat, trial, 1);
2006 next_trial = prev_nonnote_insn (trial);
2007 if (eligible_for_delay (insn, slots_filled, trial, flags))
2009 /* In this case, we are searching backward, so if we
2010 find insns to put on the delay list, we want
2011 to put them at the head, rather than the
2012 tail, of the list. */
2014 update_reg_dead_notes (trial, insn);
2015 delay_list = gen_rtx_INSN_LIST (VOIDmode,
2016 trial, delay_list);
2017 update_block (trial, trial);
2018 delete_related_insns (trial);
2019 if (slots_to_fill == ++slots_filled)
2020 break;
2021 continue;
2025 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2026 mark_referenced_resources (trial, &needed, true);
2030 /* If all needed slots haven't been filled, we come here. */
2032 /* Try to optimize case of jumping around a single insn. */
2033 #if defined(ANNUL_IFFALSE_SLOTS) || defined(ANNUL_IFTRUE_SLOTS)
2034 if (slots_filled != slots_to_fill
2035 && delay_list == 0
2036 && JUMP_P (insn)
2037 && (condjump_p (insn) || condjump_in_parallel_p (insn))
2038 && !ANY_RETURN_P (JUMP_LABEL (insn)))
2040 delay_list = optimize_skip (insn);
2041 if (delay_list)
2042 slots_filled += 1;
2044 #endif
2046 /* Try to get insns from beyond the insn needing the delay slot.
2047 These insns can neither set or reference resources set in insns being
2048 skipped, cannot set resources in the insn being skipped, and, if this
2049 is a CALL_INSN (or a CALL_INSN is passed), cannot trap (because the
2050 call might not return).
2052 There used to be code which continued past the target label if
2053 we saw all uses of the target label. This code did not work,
2054 because it failed to account for some instructions which were
2055 both annulled and marked as from the target. This can happen as a
2056 result of optimize_skip. Since this code was redundant with
2057 fill_eager_delay_slots anyways, it was just deleted. */
2059 if (slots_filled != slots_to_fill
2060 /* If this instruction could throw an exception which is
2061 caught in the same function, then it's not safe to fill
2062 the delay slot with an instruction from beyond this
2063 point. For example, consider:
2065 int i = 2;
2067 try {
2068 f();
2069 i = 3;
2070 } catch (...) {}
2072 return i;
2074 Even though `i' is a local variable, we must be sure not
2075 to put `i = 3' in the delay slot if `f' might throw an
2076 exception.
2078 Presumably, we should also check to see if we could get
2079 back to this function via `setjmp'. */
2080 && ! can_throw_internal (insn)
2081 && !JUMP_P (insn))
2083 int maybe_never = 0;
2084 rtx pat, trial_delay;
2086 CLEAR_RESOURCE (&needed);
2087 CLEAR_RESOURCE (&set);
2088 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
2089 mark_referenced_resources (insn, &needed, true);
2091 if (CALL_P (insn))
2092 maybe_never = 1;
2094 for (trial = next_nonnote_insn (insn); !stop_search_p (trial, 1);
2095 trial = next_trial)
2097 next_trial = next_nonnote_insn (trial);
2099 /* This must be an INSN or CALL_INSN. */
2100 pat = PATTERN (trial);
2102 /* Stand-alone USE and CLOBBER are just for flow. */
2103 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2104 continue;
2106 /* If this already has filled delay slots, get the insn needing
2107 the delay slots. */
2108 if (GET_CODE (pat) == SEQUENCE)
2109 trial_delay = XVECEXP (pat, 0, 0);
2110 else
2111 trial_delay = trial;
2113 /* Stop our search when seeing a jump. */
2114 if (JUMP_P (trial_delay))
2115 break;
2117 /* See if we have a resource problem before we try to split. */
2118 if (GET_CODE (pat) != SEQUENCE
2119 && ! insn_references_resource_p (trial, &set, true)
2120 && ! insn_sets_resource_p (trial, &set, true)
2121 && ! insn_sets_resource_p (trial, &needed, true)
2122 #ifdef HAVE_cc0
2123 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat))
2124 #endif
2125 && ! (maybe_never && may_trap_or_fault_p (pat))
2126 && (trial = try_split (pat, trial, 0))
2127 && eligible_for_delay (insn, slots_filled, trial, flags)
2128 && ! can_throw_internal(trial))
2130 next_trial = next_nonnote_insn (trial);
2131 delay_list = add_to_delay_list (trial, delay_list);
2132 #ifdef HAVE_cc0
2133 if (reg_mentioned_p (cc0_rtx, pat))
2134 link_cc0_insns (trial);
2135 #endif
2136 delete_related_insns (trial);
2137 if (slots_to_fill == ++slots_filled)
2138 break;
2139 continue;
2142 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2143 mark_referenced_resources (trial, &needed, true);
2145 /* Ensure we don't put insns between the setting of cc and the
2146 comparison by moving a setting of cc into an earlier delay
2147 slot since these insns could clobber the condition code. */
2148 set.cc = 1;
2150 /* If this is a call, we might not get here. */
2151 if (CALL_P (trial_delay))
2152 maybe_never = 1;
2155 /* If there are slots left to fill and our search was stopped by an
2156 unconditional branch, try the insn at the branch target. We can
2157 redirect the branch if it works.
2159 Don't do this if the insn at the branch target is a branch. */
2160 if (slots_to_fill != slots_filled
2161 && trial
2162 && jump_to_label_p (trial)
2163 && simplejump_p (trial)
2164 && (next_trial = next_active_insn (JUMP_LABEL (trial))) != 0
2165 && ! (NONJUMP_INSN_P (next_trial)
2166 && GET_CODE (PATTERN (next_trial)) == SEQUENCE)
2167 && !JUMP_P (next_trial)
2168 && ! insn_references_resource_p (next_trial, &set, true)
2169 && ! insn_sets_resource_p (next_trial, &set, true)
2170 && ! insn_sets_resource_p (next_trial, &needed, true)
2171 #ifdef HAVE_cc0
2172 && ! reg_mentioned_p (cc0_rtx, PATTERN (next_trial))
2173 #endif
2174 && ! (maybe_never && may_trap_or_fault_p (PATTERN (next_trial)))
2175 && (next_trial = try_split (PATTERN (next_trial), next_trial, 0))
2176 && eligible_for_delay (insn, slots_filled, next_trial, flags)
2177 && ! can_throw_internal (trial))
2179 /* See comment in relax_delay_slots about necessity of using
2180 next_real_insn here. */
2181 rtx new_label = next_real_insn (next_trial);
2183 if (new_label != 0)
2184 new_label = get_label_before (new_label);
2185 else
2186 new_label = find_end_label (simple_return_rtx);
2188 if (new_label)
2190 delay_list
2191 = add_to_delay_list (copy_delay_slot_insn (next_trial),
2192 delay_list);
2193 slots_filled++;
2194 reorg_redirect_jump (trial, new_label);
2199 /* If this is an unconditional jump, then try to get insns from the
2200 target of the jump. */
2201 if (JUMP_P (insn)
2202 && simplejump_p (insn)
2203 && slots_filled != slots_to_fill)
2204 delay_list
2205 = fill_slots_from_thread (insn, const_true_rtx,
2206 next_active_insn (JUMP_LABEL (insn)),
2207 NULL, 1, 1,
2208 own_thread_p (JUMP_LABEL (insn),
2209 JUMP_LABEL (insn), 0),
2210 slots_to_fill, &slots_filled,
2211 delay_list);
2213 if (delay_list)
2214 unfilled_slots_base[i]
2215 = emit_delay_sequence (insn, delay_list, slots_filled);
2217 if (slots_to_fill == slots_filled)
2218 unfilled_slots_base[i] = 0;
2220 note_delay_statistics (slots_filled, 0);
2224 /* Follow any unconditional jump at LABEL, for the purpose of redirecting JUMP;
2225 return the ultimate label reached by any such chain of jumps.
2226 Return a suitable return rtx if the chain ultimately leads to a
2227 return instruction.
2228 If LABEL is not followed by a jump, return LABEL.
2229 If the chain loops or we can't find end, return LABEL,
2230 since that tells caller to avoid changing the insn.
2231 If the returned label is obtained by following a REG_CROSSING_JUMP
2232 jump, set *CROSSING to true, otherwise set it to false. */
2234 static rtx
2235 follow_jumps (rtx label, rtx jump, bool *crossing)
2237 rtx insn;
2238 rtx next;
2239 rtx value = label;
2240 int depth;
2242 *crossing = false;
2243 if (ANY_RETURN_P (label))
2244 return label;
2245 for (depth = 0;
2246 (depth < 10
2247 && (insn = next_active_insn (value)) != 0
2248 && JUMP_P (insn)
2249 && JUMP_LABEL (insn) != NULL_RTX
2250 && ((any_uncondjump_p (insn) && onlyjump_p (insn))
2251 || ANY_RETURN_P (PATTERN (insn)))
2252 && (next = NEXT_INSN (insn))
2253 && BARRIER_P (next));
2254 depth++)
2256 rtx this_label = JUMP_LABEL (insn);
2257 rtx tem;
2259 /* If we have found a cycle, make the insn jump to itself. */
2260 if (this_label == label)
2261 return label;
2262 if (ANY_RETURN_P (this_label))
2263 return this_label;
2264 tem = next_active_insn (this_label);
2265 if (tem && JUMP_TABLE_DATA_P (tem))
2266 break;
2268 if (!targetm.can_follow_jump (jump, insn))
2269 break;
2270 if (!*crossing)
2271 *crossing
2272 = find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX) != NULL_RTX;
2273 value = this_label;
2275 if (depth == 10)
2276 return label;
2277 return value;
2280 /* Try to find insns to place in delay slots.
2282 INSN is the jump needing SLOTS_TO_FILL delay slots. It tests CONDITION
2283 or is an unconditional branch if CONDITION is const_true_rtx.
2284 *PSLOTS_FILLED is updated with the number of slots that we have filled.
2286 THREAD is a flow-of-control, either the insns to be executed if the
2287 branch is true or if the branch is false, THREAD_IF_TRUE says which.
2289 OPPOSITE_THREAD is the thread in the opposite direction. It is used
2290 to see if any potential delay slot insns set things needed there.
2292 LIKELY is nonzero if it is extremely likely that the branch will be
2293 taken and THREAD_IF_TRUE is set. This is used for the branch at the
2294 end of a loop back up to the top.
2296 OWN_THREAD and OWN_OPPOSITE_THREAD are true if we are the only user of the
2297 thread. I.e., it is the fallthrough code of our jump or the target of the
2298 jump when we are the only jump going there.
2300 If OWN_THREAD is false, it must be the "true" thread of a jump. In that
2301 case, we can only take insns from the head of the thread for our delay
2302 slot. We then adjust the jump to point after the insns we have taken. */
2304 static rtx
2305 fill_slots_from_thread (rtx insn, rtx condition, rtx thread,
2306 rtx opposite_thread, int likely, int thread_if_true,
2307 int own_thread, int slots_to_fill,
2308 int *pslots_filled, rtx delay_list)
2310 rtx new_thread;
2311 struct resources opposite_needed, set, needed;
2312 rtx trial;
2313 int lose = 0;
2314 int must_annul = 0;
2315 int flags;
2317 /* Validate our arguments. */
2318 gcc_assert(condition != const_true_rtx || thread_if_true);
2319 gcc_assert(own_thread || thread_if_true);
2321 flags = get_jump_flags (insn, JUMP_LABEL (insn));
2323 /* If our thread is the end of subroutine, we can't get any delay
2324 insns from that. */
2325 if (thread == NULL_RTX || ANY_RETURN_P (thread))
2326 return delay_list;
2328 /* If this is an unconditional branch, nothing is needed at the
2329 opposite thread. Otherwise, compute what is needed there. */
2330 if (condition == const_true_rtx)
2331 CLEAR_RESOURCE (&opposite_needed);
2332 else
2333 mark_target_live_regs (get_insns (), opposite_thread, &opposite_needed);
2335 /* If the insn at THREAD can be split, do it here to avoid having to
2336 update THREAD and NEW_THREAD if it is done in the loop below. Also
2337 initialize NEW_THREAD. */
2339 new_thread = thread = try_split (PATTERN (thread), thread, 0);
2341 /* Scan insns at THREAD. We are looking for an insn that can be removed
2342 from THREAD (it neither sets nor references resources that were set
2343 ahead of it and it doesn't set anything needs by the insns ahead of
2344 it) and that either can be placed in an annulling insn or aren't
2345 needed at OPPOSITE_THREAD. */
2347 CLEAR_RESOURCE (&needed);
2348 CLEAR_RESOURCE (&set);
2350 /* If we do not own this thread, we must stop as soon as we find
2351 something that we can't put in a delay slot, since all we can do
2352 is branch into THREAD at a later point. Therefore, labels stop
2353 the search if this is not the `true' thread. */
2355 for (trial = thread;
2356 ! stop_search_p (trial, ! thread_if_true) && (! lose || own_thread);
2357 trial = next_nonnote_insn (trial))
2359 rtx pat, old_trial;
2361 /* If we have passed a label, we no longer own this thread. */
2362 if (LABEL_P (trial))
2364 own_thread = 0;
2365 continue;
2368 pat = PATTERN (trial);
2369 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2370 continue;
2372 /* If TRIAL conflicts with the insns ahead of it, we lose. Also,
2373 don't separate or copy insns that set and use CC0. */
2374 if (! insn_references_resource_p (trial, &set, true)
2375 && ! insn_sets_resource_p (trial, &set, true)
2376 && ! insn_sets_resource_p (trial, &needed, true)
2377 #ifdef HAVE_cc0
2378 && ! (reg_mentioned_p (cc0_rtx, pat)
2379 && (! own_thread || ! sets_cc0_p (pat)))
2380 #endif
2381 && ! can_throw_internal (trial))
2383 rtx prior_insn;
2385 /* If TRIAL is redundant with some insn before INSN, we don't
2386 actually need to add it to the delay list; we can merely pretend
2387 we did. */
2388 if ((prior_insn = redundant_insn (trial, insn, delay_list)))
2390 fix_reg_dead_note (prior_insn, insn);
2391 if (own_thread)
2393 update_block (trial, thread);
2394 if (trial == thread)
2396 thread = next_active_insn (thread);
2397 if (new_thread == trial)
2398 new_thread = thread;
2401 delete_related_insns (trial);
2403 else
2405 update_reg_unused_notes (prior_insn, trial);
2406 new_thread = next_active_insn (trial);
2409 continue;
2412 /* There are two ways we can win: If TRIAL doesn't set anything
2413 needed at the opposite thread and can't trap, or if it can
2414 go into an annulled delay slot. */
2415 if (!must_annul
2416 && (condition == const_true_rtx
2417 || (! insn_sets_resource_p (trial, &opposite_needed, true)
2418 && ! may_trap_or_fault_p (pat)
2419 && ! RTX_FRAME_RELATED_P (trial))))
2421 old_trial = trial;
2422 trial = try_split (pat, trial, 0);
2423 if (new_thread == old_trial)
2424 new_thread = trial;
2425 if (thread == old_trial)
2426 thread = trial;
2427 pat = PATTERN (trial);
2428 if (eligible_for_delay (insn, *pslots_filled, trial, flags))
2429 goto winner;
2431 else if (0
2432 #ifdef ANNUL_IFTRUE_SLOTS
2433 || ! thread_if_true
2434 #endif
2435 #ifdef ANNUL_IFFALSE_SLOTS
2436 || thread_if_true
2437 #endif
2440 old_trial = trial;
2441 trial = try_split (pat, trial, 0);
2442 if (new_thread == old_trial)
2443 new_thread = trial;
2444 if (thread == old_trial)
2445 thread = trial;
2446 pat = PATTERN (trial);
2447 if ((must_annul || delay_list == NULL) && (thread_if_true
2448 ? check_annul_list_true_false (0, delay_list)
2449 && eligible_for_annul_false (insn, *pslots_filled, trial, flags)
2450 : check_annul_list_true_false (1, delay_list)
2451 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
2453 rtx temp;
2455 must_annul = 1;
2456 winner:
2458 #ifdef HAVE_cc0
2459 if (reg_mentioned_p (cc0_rtx, pat))
2460 link_cc0_insns (trial);
2461 #endif
2463 /* If we own this thread, delete the insn. If this is the
2464 destination of a branch, show that a basic block status
2465 may have been updated. In any case, mark the new
2466 starting point of this thread. */
2467 if (own_thread)
2469 rtx note;
2471 update_block (trial, thread);
2472 if (trial == thread)
2474 thread = next_active_insn (thread);
2475 if (new_thread == trial)
2476 new_thread = thread;
2479 /* We are moving this insn, not deleting it. We must
2480 temporarily increment the use count on any referenced
2481 label lest it be deleted by delete_related_insns. */
2482 for (note = REG_NOTES (trial);
2483 note != NULL_RTX;
2484 note = XEXP (note, 1))
2485 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2486 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2488 /* REG_LABEL_OPERAND could be
2489 NOTE_INSN_DELETED_LABEL too. */
2490 if (LABEL_P (XEXP (note, 0)))
2491 LABEL_NUSES (XEXP (note, 0))++;
2492 else
2493 gcc_assert (REG_NOTE_KIND (note)
2494 == REG_LABEL_OPERAND);
2496 if (jump_to_label_p (trial))
2497 LABEL_NUSES (JUMP_LABEL (trial))++;
2499 delete_related_insns (trial);
2501 for (note = REG_NOTES (trial);
2502 note != NULL_RTX;
2503 note = XEXP (note, 1))
2504 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2505 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2507 /* REG_LABEL_OPERAND could be
2508 NOTE_INSN_DELETED_LABEL too. */
2509 if (LABEL_P (XEXP (note, 0)))
2510 LABEL_NUSES (XEXP (note, 0))--;
2511 else
2512 gcc_assert (REG_NOTE_KIND (note)
2513 == REG_LABEL_OPERAND);
2515 if (jump_to_label_p (trial))
2516 LABEL_NUSES (JUMP_LABEL (trial))--;
2518 else
2519 new_thread = next_active_insn (trial);
2521 temp = own_thread ? trial : copy_delay_slot_insn (trial);
2522 if (thread_if_true)
2523 INSN_FROM_TARGET_P (temp) = 1;
2525 delay_list = add_to_delay_list (temp, delay_list);
2527 if (slots_to_fill == ++(*pslots_filled))
2529 /* Even though we have filled all the slots, we
2530 may be branching to a location that has a
2531 redundant insn. Skip any if so. */
2532 while (new_thread && ! own_thread
2533 && ! insn_sets_resource_p (new_thread, &set, true)
2534 && ! insn_sets_resource_p (new_thread, &needed,
2535 true)
2536 && ! insn_references_resource_p (new_thread,
2537 &set, true)
2538 && (prior_insn
2539 = redundant_insn (new_thread, insn,
2540 delay_list)))
2542 /* We know we do not own the thread, so no need
2543 to call update_block and delete_insn. */
2544 fix_reg_dead_note (prior_insn, insn);
2545 update_reg_unused_notes (prior_insn, new_thread);
2546 new_thread = next_active_insn (new_thread);
2548 break;
2551 continue;
2556 /* This insn can't go into a delay slot. */
2557 lose = 1;
2558 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2559 mark_referenced_resources (trial, &needed, true);
2561 /* Ensure we don't put insns between the setting of cc and the comparison
2562 by moving a setting of cc into an earlier delay slot since these insns
2563 could clobber the condition code. */
2564 set.cc = 1;
2566 /* If this insn is a register-register copy and the next insn has
2567 a use of our destination, change it to use our source. That way,
2568 it will become a candidate for our delay slot the next time
2569 through this loop. This case occurs commonly in loops that
2570 scan a list.
2572 We could check for more complex cases than those tested below,
2573 but it doesn't seem worth it. It might also be a good idea to try
2574 to swap the two insns. That might do better.
2576 We can't do this if the next insn modifies our destination, because
2577 that would make the replacement into the insn invalid. We also can't
2578 do this if it modifies our source, because it might be an earlyclobber
2579 operand. This latter test also prevents updating the contents of
2580 a PRE_INC. We also can't do this if there's overlap of source and
2581 destination. Overlap may happen for larger-than-register-size modes. */
2583 if (NONJUMP_INSN_P (trial) && GET_CODE (pat) == SET
2584 && REG_P (SET_SRC (pat))
2585 && REG_P (SET_DEST (pat))
2586 && !reg_overlap_mentioned_p (SET_DEST (pat), SET_SRC (pat)))
2588 rtx next = next_nonnote_insn (trial);
2590 if (next && NONJUMP_INSN_P (next)
2591 && GET_CODE (PATTERN (next)) != USE
2592 && ! reg_set_p (SET_DEST (pat), next)
2593 && ! reg_set_p (SET_SRC (pat), next)
2594 && reg_referenced_p (SET_DEST (pat), PATTERN (next))
2595 && ! modified_in_p (SET_DEST (pat), next))
2596 validate_replace_rtx (SET_DEST (pat), SET_SRC (pat), next);
2600 /* If we stopped on a branch insn that has delay slots, see if we can
2601 steal some of the insns in those slots. */
2602 if (trial && NONJUMP_INSN_P (trial)
2603 && GET_CODE (PATTERN (trial)) == SEQUENCE
2604 && JUMP_P (XVECEXP (PATTERN (trial), 0, 0)))
2606 /* If this is the `true' thread, we will want to follow the jump,
2607 so we can only do this if we have taken everything up to here. */
2608 if (thread_if_true && trial == new_thread)
2610 delay_list
2611 = steal_delay_list_from_target (insn, condition, PATTERN (trial),
2612 delay_list, &set, &needed,
2613 &opposite_needed, slots_to_fill,
2614 pslots_filled, &must_annul,
2615 &new_thread);
2616 /* If we owned the thread and are told that it branched
2617 elsewhere, make sure we own the thread at the new location. */
2618 if (own_thread && trial != new_thread)
2619 own_thread = own_thread_p (new_thread, new_thread, 0);
2621 else if (! thread_if_true)
2622 delay_list
2623 = steal_delay_list_from_fallthrough (insn, condition,
2624 PATTERN (trial),
2625 delay_list, &set, &needed,
2626 &opposite_needed, slots_to_fill,
2627 pslots_filled, &must_annul);
2630 /* If we haven't found anything for this delay slot and it is very
2631 likely that the branch will be taken, see if the insn at our target
2632 increments or decrements a register with an increment that does not
2633 depend on the destination register. If so, try to place the opposite
2634 arithmetic insn after the jump insn and put the arithmetic insn in the
2635 delay slot. If we can't do this, return. */
2636 if (delay_list == 0 && likely
2637 && new_thread && !ANY_RETURN_P (new_thread)
2638 && NONJUMP_INSN_P (new_thread)
2639 && !RTX_FRAME_RELATED_P (new_thread)
2640 && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
2641 && asm_noperands (PATTERN (new_thread)) < 0)
2643 rtx pat = PATTERN (new_thread);
2644 rtx dest;
2645 rtx src;
2647 trial = new_thread;
2648 pat = PATTERN (trial);
2650 if (!NONJUMP_INSN_P (trial)
2651 || GET_CODE (pat) != SET
2652 || ! eligible_for_delay (insn, 0, trial, flags)
2653 || can_throw_internal (trial))
2654 return 0;
2656 dest = SET_DEST (pat), src = SET_SRC (pat);
2657 if ((GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
2658 && rtx_equal_p (XEXP (src, 0), dest)
2659 && (!FLOAT_MODE_P (GET_MODE (src))
2660 || flag_unsafe_math_optimizations)
2661 && ! reg_overlap_mentioned_p (dest, XEXP (src, 1))
2662 && ! side_effects_p (pat))
2664 rtx other = XEXP (src, 1);
2665 rtx new_arith;
2666 rtx ninsn;
2668 /* If this is a constant adjustment, use the same code with
2669 the negated constant. Otherwise, reverse the sense of the
2670 arithmetic. */
2671 if (CONST_INT_P (other))
2672 new_arith = gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src), dest,
2673 negate_rtx (GET_MODE (src), other));
2674 else
2675 new_arith = gen_rtx_fmt_ee (GET_CODE (src) == PLUS ? MINUS : PLUS,
2676 GET_MODE (src), dest, other);
2678 ninsn = emit_insn_after (gen_rtx_SET (VOIDmode, dest, new_arith),
2679 insn);
2681 if (recog_memoized (ninsn) < 0
2682 || (extract_insn (ninsn), ! constrain_operands (1)))
2684 delete_related_insns (ninsn);
2685 return 0;
2688 if (own_thread)
2690 update_block (trial, thread);
2691 if (trial == thread)
2693 thread = next_active_insn (thread);
2694 if (new_thread == trial)
2695 new_thread = thread;
2697 delete_related_insns (trial);
2699 else
2700 new_thread = next_active_insn (trial);
2702 ninsn = own_thread ? trial : copy_delay_slot_insn (trial);
2703 if (thread_if_true)
2704 INSN_FROM_TARGET_P (ninsn) = 1;
2706 delay_list = add_to_delay_list (ninsn, NULL_RTX);
2707 (*pslots_filled)++;
2711 if (delay_list && must_annul)
2712 INSN_ANNULLED_BRANCH_P (insn) = 1;
2714 /* If we are to branch into the middle of this thread, find an appropriate
2715 label or make a new one if none, and redirect INSN to it. If we hit the
2716 end of the function, use the end-of-function label. */
2717 if (new_thread != thread)
2719 rtx label;
2720 bool crossing = false;
2722 gcc_assert (thread_if_true);
2724 if (new_thread && simplejump_or_return_p (new_thread)
2725 && redirect_with_delay_list_safe_p (insn,
2726 JUMP_LABEL (new_thread),
2727 delay_list))
2728 new_thread = follow_jumps (JUMP_LABEL (new_thread), insn, &crossing);
2730 if (ANY_RETURN_P (new_thread))
2731 label = find_end_label (new_thread);
2732 else if (LABEL_P (new_thread))
2733 label = new_thread;
2734 else
2735 label = get_label_before (new_thread);
2737 if (label)
2739 reorg_redirect_jump (insn, label);
2740 if (crossing)
2741 set_unique_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX);
2745 return delay_list;
2748 /* Make another attempt to find insns to place in delay slots.
2750 We previously looked for insns located in front of the delay insn
2751 and, for non-jump delay insns, located behind the delay insn.
2753 Here only try to schedule jump insns and try to move insns from either
2754 the target or the following insns into the delay slot. If annulling is
2755 supported, we will be likely to do this. Otherwise, we can do this only
2756 if safe. */
2758 static void
2759 fill_eager_delay_slots (void)
2761 rtx insn;
2762 int i;
2763 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
2765 for (i = 0; i < num_unfilled_slots; i++)
2767 rtx condition;
2768 rtx target_label, insn_at_target, fallthrough_insn;
2769 rtx delay_list = 0;
2770 int own_target;
2771 int own_fallthrough;
2772 int prediction, slots_to_fill, slots_filled;
2774 insn = unfilled_slots_base[i];
2775 if (insn == 0
2776 || INSN_DELETED_P (insn)
2777 || !JUMP_P (insn)
2778 || ! (condjump_p (insn) || condjump_in_parallel_p (insn)))
2779 continue;
2781 slots_to_fill = num_delay_slots (insn);
2782 /* Some machine description have defined instructions to have
2783 delay slots only in certain circumstances which may depend on
2784 nearby insns (which change due to reorg's actions).
2786 For example, the PA port normally has delay slots for unconditional
2787 jumps.
2789 However, the PA port claims such jumps do not have a delay slot
2790 if they are immediate successors of certain CALL_INSNs. This
2791 allows the port to favor filling the delay slot of the call with
2792 the unconditional jump. */
2793 if (slots_to_fill == 0)
2794 continue;
2796 slots_filled = 0;
2797 target_label = JUMP_LABEL (insn);
2798 condition = get_branch_condition (insn, target_label);
2800 if (condition == 0)
2801 continue;
2803 /* Get the next active fallthrough and target insns and see if we own
2804 them. Then see whether the branch is likely true. We don't need
2805 to do a lot of this for unconditional branches. */
2807 insn_at_target = first_active_target_insn (target_label);
2808 own_target = own_thread_p (target_label, target_label, 0);
2810 if (condition == const_true_rtx)
2812 own_fallthrough = 0;
2813 fallthrough_insn = 0;
2814 prediction = 2;
2816 else
2818 fallthrough_insn = next_active_insn (insn);
2819 own_fallthrough = own_thread_p (NEXT_INSN (insn), NULL_RTX, 1);
2820 prediction = mostly_true_jump (insn);
2823 /* If this insn is expected to branch, first try to get insns from our
2824 target, then our fallthrough insns. If it is not expected to branch,
2825 try the other order. */
2827 if (prediction > 0)
2829 delay_list
2830 = fill_slots_from_thread (insn, condition, insn_at_target,
2831 fallthrough_insn, prediction == 2, 1,
2832 own_target,
2833 slots_to_fill, &slots_filled, delay_list);
2835 if (delay_list == 0 && own_fallthrough)
2837 /* Even though we didn't find anything for delay slots,
2838 we might have found a redundant insn which we deleted
2839 from the thread that was filled. So we have to recompute
2840 the next insn at the target. */
2841 target_label = JUMP_LABEL (insn);
2842 insn_at_target = first_active_target_insn (target_label);
2844 delay_list
2845 = fill_slots_from_thread (insn, condition, fallthrough_insn,
2846 insn_at_target, 0, 0,
2847 own_fallthrough,
2848 slots_to_fill, &slots_filled,
2849 delay_list);
2852 else
2854 if (own_fallthrough)
2855 delay_list
2856 = fill_slots_from_thread (insn, condition, fallthrough_insn,
2857 insn_at_target, 0, 0,
2858 own_fallthrough,
2859 slots_to_fill, &slots_filled,
2860 delay_list);
2862 if (delay_list == 0)
2863 delay_list
2864 = fill_slots_from_thread (insn, condition, insn_at_target,
2865 next_active_insn (insn), 0, 1,
2866 own_target,
2867 slots_to_fill, &slots_filled,
2868 delay_list);
2871 if (delay_list)
2872 unfilled_slots_base[i]
2873 = emit_delay_sequence (insn, delay_list, slots_filled);
2875 if (slots_to_fill == slots_filled)
2876 unfilled_slots_base[i] = 0;
2878 note_delay_statistics (slots_filled, 1);
2882 static void delete_computation (rtx insn);
2884 /* Recursively delete prior insns that compute the value (used only by INSN
2885 which the caller is deleting) stored in the register mentioned by NOTE
2886 which is a REG_DEAD note associated with INSN. */
2888 static void
2889 delete_prior_computation (rtx note, rtx insn)
2891 rtx our_prev;
2892 rtx reg = XEXP (note, 0);
2894 for (our_prev = prev_nonnote_insn (insn);
2895 our_prev && (NONJUMP_INSN_P (our_prev)
2896 || CALL_P (our_prev));
2897 our_prev = prev_nonnote_insn (our_prev))
2899 rtx pat = PATTERN (our_prev);
2901 /* If we reach a CALL which is not calling a const function
2902 or the callee pops the arguments, then give up. */
2903 if (CALL_P (our_prev)
2904 && (! RTL_CONST_CALL_P (our_prev)
2905 || GET_CODE (pat) != SET || GET_CODE (SET_SRC (pat)) != CALL))
2906 break;
2908 /* If we reach a SEQUENCE, it is too complex to try to
2909 do anything with it, so give up. We can be run during
2910 and after reorg, so SEQUENCE rtl can legitimately show
2911 up here. */
2912 if (GET_CODE (pat) == SEQUENCE)
2913 break;
2915 if (GET_CODE (pat) == USE
2916 && NONJUMP_INSN_P (XEXP (pat, 0)))
2917 /* reorg creates USEs that look like this. We leave them
2918 alone because reorg needs them for its own purposes. */
2919 break;
2921 if (reg_set_p (reg, pat))
2923 if (side_effects_p (pat) && !CALL_P (our_prev))
2924 break;
2926 if (GET_CODE (pat) == PARALLEL)
2928 /* If we find a SET of something else, we can't
2929 delete the insn. */
2931 int i;
2933 for (i = 0; i < XVECLEN (pat, 0); i++)
2935 rtx part = XVECEXP (pat, 0, i);
2937 if (GET_CODE (part) == SET
2938 && SET_DEST (part) != reg)
2939 break;
2942 if (i == XVECLEN (pat, 0))
2943 delete_computation (our_prev);
2945 else if (GET_CODE (pat) == SET
2946 && REG_P (SET_DEST (pat)))
2948 int dest_regno = REGNO (SET_DEST (pat));
2949 int dest_endregno = END_REGNO (SET_DEST (pat));
2950 int regno = REGNO (reg);
2951 int endregno = END_REGNO (reg);
2953 if (dest_regno >= regno
2954 && dest_endregno <= endregno)
2955 delete_computation (our_prev);
2957 /* We may have a multi-word hard register and some, but not
2958 all, of the words of the register are needed in subsequent
2959 insns. Write REG_UNUSED notes for those parts that were not
2960 needed. */
2961 else if (dest_regno <= regno
2962 && dest_endregno >= endregno)
2964 int i;
2966 add_reg_note (our_prev, REG_UNUSED, reg);
2968 for (i = dest_regno; i < dest_endregno; i++)
2969 if (! find_regno_note (our_prev, REG_UNUSED, i))
2970 break;
2972 if (i == dest_endregno)
2973 delete_computation (our_prev);
2977 break;
2980 /* If PAT references the register that dies here, it is an
2981 additional use. Hence any prior SET isn't dead. However, this
2982 insn becomes the new place for the REG_DEAD note. */
2983 if (reg_overlap_mentioned_p (reg, pat))
2985 XEXP (note, 1) = REG_NOTES (our_prev);
2986 REG_NOTES (our_prev) = note;
2987 break;
2992 /* Delete INSN and recursively delete insns that compute values used only
2993 by INSN. This uses the REG_DEAD notes computed during flow analysis.
2995 Look at all our REG_DEAD notes. If a previous insn does nothing other
2996 than set a register that dies in this insn, we can delete that insn
2997 as well.
2999 On machines with CC0, if CC0 is used in this insn, we may be able to
3000 delete the insn that set it. */
3002 static void
3003 delete_computation (rtx insn)
3005 rtx note, next;
3007 #ifdef HAVE_cc0
3008 if (reg_referenced_p (cc0_rtx, PATTERN (insn)))
3010 rtx prev = prev_nonnote_insn (insn);
3011 /* We assume that at this stage
3012 CC's are always set explicitly
3013 and always immediately before the jump that
3014 will use them. So if the previous insn
3015 exists to set the CC's, delete it
3016 (unless it performs auto-increments, etc.). */
3017 if (prev && NONJUMP_INSN_P (prev)
3018 && sets_cc0_p (PATTERN (prev)))
3020 if (sets_cc0_p (PATTERN (prev)) > 0
3021 && ! side_effects_p (PATTERN (prev)))
3022 delete_computation (prev);
3023 else
3024 /* Otherwise, show that cc0 won't be used. */
3025 add_reg_note (prev, REG_UNUSED, cc0_rtx);
3028 #endif
3030 for (note = REG_NOTES (insn); note; note = next)
3032 next = XEXP (note, 1);
3034 if (REG_NOTE_KIND (note) != REG_DEAD
3035 /* Verify that the REG_NOTE is legitimate. */
3036 || !REG_P (XEXP (note, 0)))
3037 continue;
3039 delete_prior_computation (note, insn);
3042 delete_related_insns (insn);
3045 /* If all INSN does is set the pc, delete it,
3046 and delete the insn that set the condition codes for it
3047 if that's what the previous thing was. */
3049 static void
3050 delete_jump (rtx insn)
3052 rtx set = single_set (insn);
3054 if (set && GET_CODE (SET_DEST (set)) == PC)
3055 delete_computation (insn);
3058 static rtx
3059 label_before_next_insn (rtx x, rtx scan_limit)
3061 rtx insn = next_active_insn (x);
3062 while (insn)
3064 insn = PREV_INSN (insn);
3065 if (insn == scan_limit || insn == NULL_RTX)
3066 return NULL_RTX;
3067 if (LABEL_P (insn))
3068 break;
3070 return insn;
3074 /* Once we have tried two ways to fill a delay slot, make a pass over the
3075 code to try to improve the results and to do such things as more jump
3076 threading. */
3078 static void
3079 relax_delay_slots (rtx first)
3081 rtx insn, next, pat;
3082 rtx trial, delay_insn, target_label;
3084 /* Look at every JUMP_INSN and see if we can improve it. */
3085 for (insn = first; insn; insn = next)
3087 rtx other;
3088 bool crossing;
3090 next = next_active_insn (insn);
3092 /* If this is a jump insn, see if it now jumps to a jump, jumps to
3093 the next insn, or jumps to a label that is not the last of a
3094 group of consecutive labels. */
3095 if (JUMP_P (insn)
3096 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3097 && !ANY_RETURN_P (target_label = JUMP_LABEL (insn)))
3099 target_label
3100 = skip_consecutive_labels (follow_jumps (target_label, insn,
3101 &crossing));
3102 if (ANY_RETURN_P (target_label))
3103 target_label = find_end_label (target_label);
3105 if (target_label && next_active_insn (target_label) == next
3106 && ! condjump_in_parallel_p (insn))
3108 delete_jump (insn);
3109 continue;
3112 if (target_label && target_label != JUMP_LABEL (insn))
3114 reorg_redirect_jump (insn, target_label);
3115 if (crossing)
3116 set_unique_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX);
3119 /* See if this jump conditionally branches around an unconditional
3120 jump. If so, invert this jump and point it to the target of the
3121 second jump. */
3122 if (next && simplejump_or_return_p (next)
3123 && any_condjump_p (insn)
3124 && target_label
3125 && next_active_insn (target_label) == next_active_insn (next)
3126 && no_labels_between_p (insn, next))
3128 rtx label = JUMP_LABEL (next);
3130 /* Be careful how we do this to avoid deleting code or
3131 labels that are momentarily dead. See similar optimization
3132 in jump.c.
3134 We also need to ensure we properly handle the case when
3135 invert_jump fails. */
3137 ++LABEL_NUSES (target_label);
3138 if (!ANY_RETURN_P (label))
3139 ++LABEL_NUSES (label);
3141 if (invert_jump (insn, label, 1))
3143 delete_related_insns (next);
3144 next = insn;
3147 if (!ANY_RETURN_P (label))
3148 --LABEL_NUSES (label);
3150 if (--LABEL_NUSES (target_label) == 0)
3151 delete_related_insns (target_label);
3153 continue;
3157 /* If this is an unconditional jump and the previous insn is a
3158 conditional jump, try reversing the condition of the previous
3159 insn and swapping our targets. The next pass might be able to
3160 fill the slots.
3162 Don't do this if we expect the conditional branch to be true, because
3163 we would then be making the more common case longer. */
3165 if (simplejump_or_return_p (insn)
3166 && (other = prev_active_insn (insn)) != 0
3167 && any_condjump_p (other)
3168 && no_labels_between_p (other, insn)
3169 && 0 > mostly_true_jump (other))
3171 rtx other_target = JUMP_LABEL (other);
3172 target_label = JUMP_LABEL (insn);
3174 if (invert_jump (other, target_label, 0))
3175 reorg_redirect_jump (insn, other_target);
3178 /* Now look only at cases where we have a filled delay slot. */
3179 if (!NONJUMP_INSN_P (insn) || GET_CODE (PATTERN (insn)) != SEQUENCE)
3180 continue;
3182 pat = PATTERN (insn);
3183 delay_insn = XVECEXP (pat, 0, 0);
3185 /* See if the first insn in the delay slot is redundant with some
3186 previous insn. Remove it from the delay slot if so; then set up
3187 to reprocess this insn. */
3188 if (redundant_insn (XVECEXP (pat, 0, 1), delay_insn, 0))
3190 delete_from_delay_slot (XVECEXP (pat, 0, 1));
3191 next = prev_active_insn (next);
3192 continue;
3195 /* See if we have a RETURN insn with a filled delay slot followed
3196 by a RETURN insn with an unfilled a delay slot. If so, we can delete
3197 the first RETURN (but not its delay insn). This gives the same
3198 effect in fewer instructions.
3200 Only do so if optimizing for size since this results in slower, but
3201 smaller code. */
3202 if (optimize_function_for_size_p (cfun)
3203 && ANY_RETURN_P (PATTERN (delay_insn))
3204 && next
3205 && JUMP_P (next)
3206 && PATTERN (next) == PATTERN (delay_insn))
3208 rtx after;
3209 int i;
3211 /* Delete the RETURN and just execute the delay list insns.
3213 We do this by deleting the INSN containing the SEQUENCE, then
3214 re-emitting the insns separately, and then deleting the RETURN.
3215 This allows the count of the jump target to be properly
3216 decremented.
3218 Note that we need to change the INSN_UID of the re-emitted insns
3219 since it is used to hash the insns for mark_target_live_regs and
3220 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3222 Clear the from target bit, since these insns are no longer
3223 in delay slots. */
3224 for (i = 0; i < XVECLEN (pat, 0); i++)
3225 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3227 trial = PREV_INSN (insn);
3228 delete_related_insns (insn);
3229 gcc_assert (GET_CODE (pat) == SEQUENCE);
3230 add_insn_after (delay_insn, trial, NULL);
3231 after = delay_insn;
3232 for (i = 1; i < XVECLEN (pat, 0); i++)
3233 after = emit_copy_of_insn_after (XVECEXP (pat, 0, i), after);
3234 delete_scheduled_jump (delay_insn);
3235 continue;
3238 /* Now look only at the cases where we have a filled JUMP_INSN. */
3239 if (!JUMP_P (delay_insn)
3240 || !(condjump_p (delay_insn) || condjump_in_parallel_p (delay_insn)))
3241 continue;
3243 target_label = JUMP_LABEL (delay_insn);
3244 if (target_label && ANY_RETURN_P (target_label))
3245 continue;
3247 /* If this jump goes to another unconditional jump, thread it, but
3248 don't convert a jump into a RETURN here. */
3249 trial = skip_consecutive_labels (follow_jumps (target_label, delay_insn,
3250 &crossing));
3251 if (ANY_RETURN_P (trial))
3252 trial = find_end_label (trial);
3254 if (trial && trial != target_label
3255 && redirect_with_delay_slots_safe_p (delay_insn, trial, insn))
3257 reorg_redirect_jump (delay_insn, trial);
3258 target_label = trial;
3259 if (crossing)
3260 set_unique_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX);
3263 /* If the first insn at TARGET_LABEL is redundant with a previous
3264 insn, redirect the jump to the following insn and process again.
3265 We use next_real_insn instead of next_active_insn so we
3266 don't skip USE-markers, or we'll end up with incorrect
3267 liveness info. */
3268 trial = next_real_insn (target_label);
3269 if (trial && GET_CODE (PATTERN (trial)) != SEQUENCE
3270 && redundant_insn (trial, insn, 0)
3271 && ! can_throw_internal (trial))
3273 /* Figure out where to emit the special USE insn so we don't
3274 later incorrectly compute register live/death info. */
3275 rtx tmp = next_active_insn (trial);
3276 if (tmp == 0)
3277 tmp = find_end_label (simple_return_rtx);
3279 if (tmp)
3281 /* Insert the special USE insn and update dataflow info. */
3282 update_block (trial, tmp);
3284 /* Now emit a label before the special USE insn, and
3285 redirect our jump to the new label. */
3286 target_label = get_label_before (PREV_INSN (tmp));
3287 reorg_redirect_jump (delay_insn, target_label);
3288 next = insn;
3289 continue;
3293 /* Similarly, if it is an unconditional jump with one insn in its
3294 delay list and that insn is redundant, thread the jump. */
3295 if (trial && GET_CODE (PATTERN (trial)) == SEQUENCE
3296 && XVECLEN (PATTERN (trial), 0) == 2
3297 && JUMP_P (XVECEXP (PATTERN (trial), 0, 0))
3298 && simplejump_or_return_p (XVECEXP (PATTERN (trial), 0, 0))
3299 && redundant_insn (XVECEXP (PATTERN (trial), 0, 1), insn, 0))
3301 target_label = JUMP_LABEL (XVECEXP (PATTERN (trial), 0, 0));
3302 if (ANY_RETURN_P (target_label))
3303 target_label = find_end_label (target_label);
3305 if (target_label
3306 && redirect_with_delay_slots_safe_p (delay_insn, target_label,
3307 insn))
3309 reorg_redirect_jump (delay_insn, target_label);
3310 next = insn;
3311 continue;
3315 /* See if we have a simple (conditional) jump that is useless. */
3316 if (! INSN_ANNULLED_BRANCH_P (delay_insn)
3317 && ! condjump_in_parallel_p (delay_insn)
3318 && prev_active_insn (target_label) == insn
3319 && ! BARRIER_P (prev_nonnote_insn (target_label))
3320 #ifdef HAVE_cc0
3321 /* If the last insn in the delay slot sets CC0 for some insn,
3322 various code assumes that it is in a delay slot. We could
3323 put it back where it belonged and delete the register notes,
3324 but it doesn't seem worthwhile in this uncommon case. */
3325 && ! find_reg_note (XVECEXP (pat, 0, XVECLEN (pat, 0) - 1),
3326 REG_CC_USER, NULL_RTX)
3327 #endif
3330 rtx after;
3331 int i;
3333 /* All this insn does is execute its delay list and jump to the
3334 following insn. So delete the jump and just execute the delay
3335 list insns.
3337 We do this by deleting the INSN containing the SEQUENCE, then
3338 re-emitting the insns separately, and then deleting the jump.
3339 This allows the count of the jump target to be properly
3340 decremented.
3342 Note that we need to change the INSN_UID of the re-emitted insns
3343 since it is used to hash the insns for mark_target_live_regs and
3344 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3346 Clear the from target bit, since these insns are no longer
3347 in delay slots. */
3348 for (i = 0; i < XVECLEN (pat, 0); i++)
3349 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3351 trial = PREV_INSN (insn);
3352 delete_related_insns (insn);
3353 gcc_assert (GET_CODE (pat) == SEQUENCE);
3354 add_insn_after (delay_insn, trial, NULL);
3355 after = delay_insn;
3356 for (i = 1; i < XVECLEN (pat, 0); i++)
3357 after = emit_copy_of_insn_after (XVECEXP (pat, 0, i), after);
3358 delete_scheduled_jump (delay_insn);
3359 continue;
3362 /* See if this is an unconditional jump around a single insn which is
3363 identical to the one in its delay slot. In this case, we can just
3364 delete the branch and the insn in its delay slot. */
3365 if (next && NONJUMP_INSN_P (next)
3366 && label_before_next_insn (next, insn) == target_label
3367 && simplejump_p (insn)
3368 && XVECLEN (pat, 0) == 2
3369 && rtx_equal_p (PATTERN (next), PATTERN (XVECEXP (pat, 0, 1))))
3371 delete_related_insns (insn);
3372 continue;
3375 /* See if this jump (with its delay slots) conditionally branches
3376 around an unconditional jump (without delay slots). If so, invert
3377 this jump and point it to the target of the second jump. We cannot
3378 do this for annulled jumps, though. Again, don't convert a jump to
3379 a RETURN here. */
3380 if (! INSN_ANNULLED_BRANCH_P (delay_insn)
3381 && any_condjump_p (delay_insn)
3382 && next && simplejump_or_return_p (next)
3383 && next_active_insn (target_label) == next_active_insn (next)
3384 && no_labels_between_p (insn, next))
3386 rtx label = JUMP_LABEL (next);
3387 rtx old_label = JUMP_LABEL (delay_insn);
3389 if (ANY_RETURN_P (label))
3390 label = find_end_label (label);
3392 /* find_end_label can generate a new label. Check this first. */
3393 if (label
3394 && no_labels_between_p (insn, next)
3395 && redirect_with_delay_slots_safe_p (delay_insn, label, insn))
3397 /* Be careful how we do this to avoid deleting code or labels
3398 that are momentarily dead. See similar optimization in
3399 jump.c */
3400 if (old_label)
3401 ++LABEL_NUSES (old_label);
3403 if (invert_jump (delay_insn, label, 1))
3405 int i;
3407 /* Must update the INSN_FROM_TARGET_P bits now that
3408 the branch is reversed, so that mark_target_live_regs
3409 will handle the delay slot insn correctly. */
3410 for (i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
3412 rtx slot = XVECEXP (PATTERN (insn), 0, i);
3413 INSN_FROM_TARGET_P (slot) = ! INSN_FROM_TARGET_P (slot);
3416 delete_related_insns (next);
3417 next = insn;
3420 if (old_label && --LABEL_NUSES (old_label) == 0)
3421 delete_related_insns (old_label);
3422 continue;
3426 /* If we own the thread opposite the way this insn branches, see if we
3427 can merge its delay slots with following insns. */
3428 if (INSN_FROM_TARGET_P (XVECEXP (pat, 0, 1))
3429 && own_thread_p (NEXT_INSN (insn), 0, 1))
3430 try_merge_delay_insns (insn, next);
3431 else if (! INSN_FROM_TARGET_P (XVECEXP (pat, 0, 1))
3432 && own_thread_p (target_label, target_label, 0))
3433 try_merge_delay_insns (insn, next_active_insn (target_label));
3435 /* If we get here, we haven't deleted INSN. But we may have deleted
3436 NEXT, so recompute it. */
3437 next = next_active_insn (insn);
3442 /* Look for filled jumps to the end of function label. We can try to convert
3443 them into RETURN insns if the insns in the delay slot are valid for the
3444 RETURN as well. */
3446 static void
3447 make_return_insns (rtx first)
3449 rtx insn, jump_insn, pat;
3450 rtx real_return_label = function_return_label;
3451 rtx real_simple_return_label = function_simple_return_label;
3452 int slots, i;
3454 /* See if there is a RETURN insn in the function other than the one we
3455 made for END_OF_FUNCTION_LABEL. If so, set up anything we can't change
3456 into a RETURN to jump to it. */
3457 for (insn = first; insn; insn = NEXT_INSN (insn))
3458 if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
3460 rtx t = get_label_before (insn);
3461 if (PATTERN (insn) == ret_rtx)
3462 real_return_label = t;
3463 else
3464 real_simple_return_label = t;
3465 break;
3468 /* Show an extra usage of REAL_RETURN_LABEL so it won't go away if it
3469 was equal to END_OF_FUNCTION_LABEL. */
3470 if (real_return_label)
3471 LABEL_NUSES (real_return_label)++;
3472 if (real_simple_return_label)
3473 LABEL_NUSES (real_simple_return_label)++;
3475 /* Clear the list of insns to fill so we can use it. */
3476 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3478 for (insn = first; insn; insn = NEXT_INSN (insn))
3480 int flags;
3481 rtx kind, real_label;
3483 /* Only look at filled JUMP_INSNs that go to the end of function
3484 label. */
3485 if (!NONJUMP_INSN_P (insn)
3486 || GET_CODE (PATTERN (insn)) != SEQUENCE
3487 || !jump_to_label_p (XVECEXP (PATTERN (insn), 0, 0)))
3488 continue;
3490 if (JUMP_LABEL (XVECEXP (PATTERN (insn), 0, 0)) == function_return_label)
3492 kind = ret_rtx;
3493 real_label = real_return_label;
3495 else if (JUMP_LABEL (XVECEXP (PATTERN (insn), 0, 0))
3496 == function_simple_return_label)
3498 kind = simple_return_rtx;
3499 real_label = real_simple_return_label;
3501 else
3502 continue;
3504 pat = PATTERN (insn);
3505 jump_insn = XVECEXP (pat, 0, 0);
3507 /* If we can't make the jump into a RETURN, try to redirect it to the best
3508 RETURN and go on to the next insn. */
3509 if (!reorg_redirect_jump (jump_insn, kind))
3511 /* Make sure redirecting the jump will not invalidate the delay
3512 slot insns. */
3513 if (redirect_with_delay_slots_safe_p (jump_insn, real_label, insn))
3514 reorg_redirect_jump (jump_insn, real_label);
3515 continue;
3518 /* See if this RETURN can accept the insns current in its delay slot.
3519 It can if it has more or an equal number of slots and the contents
3520 of each is valid. */
3522 flags = get_jump_flags (jump_insn, JUMP_LABEL (jump_insn));
3523 slots = num_delay_slots (jump_insn);
3524 if (slots >= XVECLEN (pat, 0) - 1)
3526 for (i = 1; i < XVECLEN (pat, 0); i++)
3527 if (! (
3528 #ifdef ANNUL_IFFALSE_SLOTS
3529 (INSN_ANNULLED_BRANCH_P (jump_insn)
3530 && INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
3531 ? eligible_for_annul_false (jump_insn, i - 1,
3532 XVECEXP (pat, 0, i), flags) :
3533 #endif
3534 #ifdef ANNUL_IFTRUE_SLOTS
3535 (INSN_ANNULLED_BRANCH_P (jump_insn)
3536 && ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
3537 ? eligible_for_annul_true (jump_insn, i - 1,
3538 XVECEXP (pat, 0, i), flags) :
3539 #endif
3540 eligible_for_delay (jump_insn, i - 1,
3541 XVECEXP (pat, 0, i), flags)))
3542 break;
3544 else
3545 i = 0;
3547 if (i == XVECLEN (pat, 0))
3548 continue;
3550 /* We have to do something with this insn. If it is an unconditional
3551 RETURN, delete the SEQUENCE and output the individual insns,
3552 followed by the RETURN. Then set things up so we try to find
3553 insns for its delay slots, if it needs some. */
3554 if (ANY_RETURN_P (PATTERN (jump_insn)))
3556 rtx prev = PREV_INSN (insn);
3558 delete_related_insns (insn);
3559 for (i = 1; i < XVECLEN (pat, 0); i++)
3560 prev = emit_insn_after (PATTERN (XVECEXP (pat, 0, i)), prev);
3562 insn = emit_jump_insn_after (PATTERN (jump_insn), prev);
3563 emit_barrier_after (insn);
3565 if (slots)
3566 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3568 else
3569 /* It is probably more efficient to keep this with its current
3570 delay slot as a branch to a RETURN. */
3571 reorg_redirect_jump (jump_insn, real_label);
3574 /* Now delete REAL_RETURN_LABEL if we never used it. Then try to fill any
3575 new delay slots we have created. */
3576 if (real_return_label != NULL_RTX && --LABEL_NUSES (real_return_label) == 0)
3577 delete_related_insns (real_return_label);
3578 if (real_simple_return_label != NULL_RTX
3579 && --LABEL_NUSES (real_simple_return_label) == 0)
3580 delete_related_insns (real_simple_return_label);
3582 fill_simple_delay_slots (1);
3583 fill_simple_delay_slots (0);
3586 /* Try to find insns to place in delay slots. */
3588 void
3589 dbr_schedule (rtx first)
3591 rtx insn, next, epilogue_insn = 0;
3592 int i;
3593 bool need_return_insns;
3595 /* If the current function has no insns other than the prologue and
3596 epilogue, then do not try to fill any delay slots. */
3597 if (n_basic_blocks == NUM_FIXED_BLOCKS)
3598 return;
3600 /* Find the highest INSN_UID and allocate and initialize our map from
3601 INSN_UID's to position in code. */
3602 for (max_uid = 0, insn = first; insn; insn = NEXT_INSN (insn))
3604 if (INSN_UID (insn) > max_uid)
3605 max_uid = INSN_UID (insn);
3606 if (NOTE_P (insn)
3607 && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
3608 epilogue_insn = insn;
3611 uid_to_ruid = XNEWVEC (int, max_uid + 1);
3612 for (i = 0, insn = first; insn; i++, insn = NEXT_INSN (insn))
3613 uid_to_ruid[INSN_UID (insn)] = i;
3615 /* Initialize the list of insns that need filling. */
3616 if (unfilled_firstobj == 0)
3618 gcc_obstack_init (&unfilled_slots_obstack);
3619 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3622 for (insn = next_active_insn (first); insn; insn = next_active_insn (insn))
3624 rtx target;
3626 /* Skip vector tables. We can't get attributes for them. */
3627 if (JUMP_TABLE_DATA_P (insn))
3628 continue;
3630 if (JUMP_P (insn))
3631 INSN_ANNULLED_BRANCH_P (insn) = 0;
3632 INSN_FROM_TARGET_P (insn) = 0;
3634 if (num_delay_slots (insn) > 0)
3635 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3637 /* Ensure all jumps go to the last of a set of consecutive labels. */
3638 if (JUMP_P (insn)
3639 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3640 && !ANY_RETURN_P (JUMP_LABEL (insn))
3641 && ((target = skip_consecutive_labels (JUMP_LABEL (insn)))
3642 != JUMP_LABEL (insn)))
3643 redirect_jump (insn, target, 1);
3646 init_resource_info (epilogue_insn);
3648 /* Show we haven't computed an end-of-function label yet. */
3649 function_return_label = function_simple_return_label = NULL_RTX;
3651 /* Initialize the statistics for this function. */
3652 memset (num_insns_needing_delays, 0, sizeof num_insns_needing_delays);
3653 memset (num_filled_delays, 0, sizeof num_filled_delays);
3655 /* Now do the delay slot filling. Try everything twice in case earlier
3656 changes make more slots fillable. */
3658 for (reorg_pass_number = 0;
3659 reorg_pass_number < MAX_REORG_PASSES;
3660 reorg_pass_number++)
3662 fill_simple_delay_slots (1);
3663 fill_simple_delay_slots (0);
3664 fill_eager_delay_slots ();
3665 relax_delay_slots (first);
3668 /* If we made an end of function label, indicate that it is now
3669 safe to delete it by undoing our prior adjustment to LABEL_NUSES.
3670 If it is now unused, delete it. */
3671 if (function_return_label && --LABEL_NUSES (function_return_label) == 0)
3672 delete_related_insns (function_return_label);
3673 if (function_simple_return_label
3674 && --LABEL_NUSES (function_simple_return_label) == 0)
3675 delete_related_insns (function_simple_return_label);
3677 need_return_insns = false;
3678 #ifdef HAVE_return
3679 need_return_insns |= HAVE_return && function_return_label != 0;
3680 #endif
3681 #ifdef HAVE_simple_return
3682 need_return_insns |= HAVE_simple_return && function_simple_return_label != 0;
3683 #endif
3684 if (need_return_insns)
3685 make_return_insns (first);
3687 /* Delete any USE insns made by update_block; subsequent passes don't need
3688 them or know how to deal with them. */
3689 for (insn = first; insn; insn = next)
3691 next = NEXT_INSN (insn);
3693 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
3694 && INSN_P (XEXP (PATTERN (insn), 0)))
3695 next = delete_related_insns (insn);
3698 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3700 /* It is not clear why the line below is needed, but it does seem to be. */
3701 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3703 if (dump_file)
3705 int i, j, need_comma;
3706 int total_delay_slots[MAX_DELAY_HISTOGRAM + 1];
3707 int total_annul_slots[MAX_DELAY_HISTOGRAM + 1];
3709 for (reorg_pass_number = 0;
3710 reorg_pass_number < MAX_REORG_PASSES;
3711 reorg_pass_number++)
3713 fprintf (dump_file, ";; Reorg pass #%d:\n", reorg_pass_number + 1);
3714 for (i = 0; i < NUM_REORG_FUNCTIONS; i++)
3716 need_comma = 0;
3717 fprintf (dump_file, ";; Reorg function #%d\n", i);
3719 fprintf (dump_file, ";; %d insns needing delay slots\n;; ",
3720 num_insns_needing_delays[i][reorg_pass_number]);
3722 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3723 if (num_filled_delays[i][j][reorg_pass_number])
3725 if (need_comma)
3726 fprintf (dump_file, ", ");
3727 need_comma = 1;
3728 fprintf (dump_file, "%d got %d delays",
3729 num_filled_delays[i][j][reorg_pass_number], j);
3731 fprintf (dump_file, "\n");
3734 memset (total_delay_slots, 0, sizeof total_delay_slots);
3735 memset (total_annul_slots, 0, sizeof total_annul_slots);
3736 for (insn = first; insn; insn = NEXT_INSN (insn))
3738 if (! INSN_DELETED_P (insn)
3739 && NONJUMP_INSN_P (insn)
3740 && GET_CODE (PATTERN (insn)) != USE
3741 && GET_CODE (PATTERN (insn)) != CLOBBER)
3743 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
3745 rtx control;
3746 j = XVECLEN (PATTERN (insn), 0) - 1;
3747 if (j > MAX_DELAY_HISTOGRAM)
3748 j = MAX_DELAY_HISTOGRAM;
3749 control = XVECEXP (PATTERN (insn), 0, 0);
3750 if (JUMP_P (control) && INSN_ANNULLED_BRANCH_P (control))
3751 total_annul_slots[j]++;
3752 else
3753 total_delay_slots[j]++;
3755 else if (num_delay_slots (insn) > 0)
3756 total_delay_slots[0]++;
3759 fprintf (dump_file, ";; Reorg totals: ");
3760 need_comma = 0;
3761 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3763 if (total_delay_slots[j])
3765 if (need_comma)
3766 fprintf (dump_file, ", ");
3767 need_comma = 1;
3768 fprintf (dump_file, "%d got %d delays", total_delay_slots[j], j);
3771 fprintf (dump_file, "\n");
3772 #if defined (ANNUL_IFTRUE_SLOTS) || defined (ANNUL_IFFALSE_SLOTS)
3773 fprintf (dump_file, ";; Reorg annuls: ");
3774 need_comma = 0;
3775 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3777 if (total_annul_slots[j])
3779 if (need_comma)
3780 fprintf (dump_file, ", ");
3781 need_comma = 1;
3782 fprintf (dump_file, "%d got %d delays", total_annul_slots[j], j);
3785 fprintf (dump_file, "\n");
3786 #endif
3787 fprintf (dump_file, "\n");
3790 free_resource_info ();
3791 free (uid_to_ruid);
3792 crtl->dbr_scheduled_p = true;
3794 #endif /* DELAY_SLOTS */
3796 static bool
3797 gate_handle_delay_slots (void)
3799 #ifdef DELAY_SLOTS
3800 /* At -O0 dataflow info isn't updated after RA. */
3801 return optimize > 0 && flag_delayed_branch && !crtl->dbr_scheduled_p;
3802 #else
3803 return 0;
3804 #endif
3807 /* Run delay slot optimization. */
3808 static unsigned int
3809 rest_of_handle_delay_slots (void)
3811 #ifdef DELAY_SLOTS
3812 dbr_schedule (get_insns ());
3813 #endif
3814 return 0;
3817 struct rtl_opt_pass pass_delay_slots =
3820 RTL_PASS,
3821 "dbr", /* name */
3822 OPTGROUP_NONE, /* optinfo_flags */
3823 gate_handle_delay_slots, /* gate */
3824 rest_of_handle_delay_slots, /* execute */
3825 NULL, /* sub */
3826 NULL, /* next */
3827 0, /* static_pass_number */
3828 TV_DBR_SCHED, /* tv_id */
3829 0, /* properties_required */
3830 0, /* properties_provided */
3831 0, /* properties_destroyed */
3832 0, /* todo_flags_start */
3833 0 /* todo_flags_finish */
3837 /* Machine dependent reorg pass. */
3838 static bool
3839 gate_handle_machine_reorg (void)
3841 return targetm.machine_dependent_reorg != 0;
3845 static unsigned int
3846 rest_of_handle_machine_reorg (void)
3848 targetm.machine_dependent_reorg ();
3849 return 0;
3852 struct rtl_opt_pass pass_machine_reorg =
3855 RTL_PASS,
3856 "mach", /* name */
3857 OPTGROUP_NONE, /* optinfo_flags */
3858 gate_handle_machine_reorg, /* gate */
3859 rest_of_handle_machine_reorg, /* execute */
3860 NULL, /* sub */
3861 NULL, /* next */
3862 0, /* static_pass_number */
3863 TV_MACH_DEP, /* tv_id */
3864 0, /* properties_required */
3865 0, /* properties_provided */
3866 0, /* properties_destroyed */
3867 0, /* todo_flags_start */
3868 0 /* todo_flags_finish */