PR ipa/64481
[official-gcc.git] / gcc / reorg.c
blob043ba4b8045610c25a4697d12e45485b9bd09aad
1 /* Perform instruction reorganizations for delay slot filling.
2 Copyright (C) 1992-2015 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu).
4 Hacked by Michael Tiemann (tiemann@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Instruction reorganization pass.
24 This pass runs after register allocation and final jump
25 optimization. It should be the last pass to run before peephole.
26 It serves primarily to fill delay slots of insns, typically branch
27 and call insns. Other insns typically involve more complicated
28 interactions of data dependencies and resource constraints, and
29 are better handled by scheduling before register allocation (by the
30 function `schedule_insns').
32 The Branch Penalty is the number of extra cycles that are needed to
33 execute a branch insn. On an ideal machine, branches take a single
34 cycle, and the Branch Penalty is 0. Several RISC machines approach
35 branch delays differently:
37 The MIPS has a single branch delay slot. Most insns
38 (except other branches) can be used to fill this slot. When the
39 slot is filled, two insns execute in two cycles, reducing the
40 branch penalty to zero.
42 The SPARC always has a branch delay slot, but its effects can be
43 annulled when the branch is not taken. This means that failing to
44 find other sources of insns, we can hoist an insn from the branch
45 target that would only be safe to execute knowing that the branch
46 is taken.
48 The HP-PA always has a branch delay slot. For unconditional branches
49 its effects can be annulled when the branch is taken. The effects
50 of the delay slot in a conditional branch can be nullified for forward
51 taken branches, or for untaken backward branches. This means
52 we can hoist insns from the fall-through path for forward branches or
53 steal insns from the target of backward branches.
55 The TMS320C3x and C4x have three branch delay slots. When the three
56 slots are filled, the branch penalty is zero. Most insns can fill the
57 delay slots except jump insns.
59 Three techniques for filling delay slots have been implemented so far:
61 (1) `fill_simple_delay_slots' is the simplest, most efficient way
62 to fill delay slots. This pass first looks for insns which come
63 from before the branch and which are safe to execute after the
64 branch. Then it searches after the insn requiring delay slots or,
65 in the case of a branch, for insns that are after the point at
66 which the branch merges into the fallthrough code, if such a point
67 exists. When such insns are found, the branch penalty decreases
68 and no code expansion takes place.
70 (2) `fill_eager_delay_slots' is more complicated: it is used for
71 scheduling conditional jumps, or for scheduling jumps which cannot
72 be filled using (1). A machine need not have annulled jumps to use
73 this strategy, but it helps (by keeping more options open).
74 `fill_eager_delay_slots' tries to guess the direction the branch
75 will go; if it guesses right 100% of the time, it can reduce the
76 branch penalty as much as `fill_simple_delay_slots' does. If it
77 guesses wrong 100% of the time, it might as well schedule nops. When
78 `fill_eager_delay_slots' takes insns from the fall-through path of
79 the jump, usually there is no code expansion; when it takes insns
80 from the branch target, there is code expansion if it is not the
81 only way to reach that target.
83 (3) `relax_delay_slots' uses a set of rules to simplify code that
84 has been reorganized by (1) and (2). It finds cases where
85 conditional test can be eliminated, jumps can be threaded, extra
86 insns can be eliminated, etc. It is the job of (1) and (2) to do a
87 good job of scheduling locally; `relax_delay_slots' takes care of
88 making the various individual schedules work well together. It is
89 especially tuned to handle the control flow interactions of branch
90 insns. It does nothing for insns with delay slots that do not
91 branch.
93 On machines that use CC0, we are very conservative. We will not make
94 a copy of an insn involving CC0 since we want to maintain a 1-1
95 correspondence between the insn that sets and uses CC0. The insns are
96 allowed to be separated by placing an insn that sets CC0 (but not an insn
97 that uses CC0; we could do this, but it doesn't seem worthwhile) in a
98 delay slot. In that case, we point each insn at the other with REG_CC_USER
99 and REG_CC_SETTER notes. Note that these restrictions affect very few
100 machines because most RISC machines with delay slots will not use CC0
101 (the RT is the only known exception at this point). */
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "tm.h"
107 #include "diagnostic-core.h"
108 #include "rtl.h"
109 #include "tm_p.h"
110 #include "symtab.h"
111 #include "expr.h"
112 #include "hashtab.h"
113 #include "hash-set.h"
114 #include "vec.h"
115 #include "machmode.h"
116 #include "hard-reg-set.h"
117 #include "input.h"
118 #include "function.h"
119 #include "insn-config.h"
120 #include "conditions.h"
121 #include "predict.h"
122 #include "dominance.h"
123 #include "cfg.h"
124 #include "basic-block.h"
125 #include "regs.h"
126 #include "recog.h"
127 #include "flags.h"
128 #include "obstack.h"
129 #include "insn-attr.h"
130 #include "resource.h"
131 #include "except.h"
132 #include "params.h"
133 #include "target.h"
134 #include "tree-pass.h"
135 #include "emit-rtl.h"
137 #ifdef DELAY_SLOTS
139 #ifndef ANNUL_IFTRUE_SLOTS
140 #define eligible_for_annul_true(INSN, SLOTS, TRIAL, FLAGS) 0
141 #endif
142 #ifndef ANNUL_IFFALSE_SLOTS
143 #define eligible_for_annul_false(INSN, SLOTS, TRIAL, FLAGS) 0
144 #endif
147 /* First, some functions that were used before GCC got a control flow graph.
148 These functions are now only used here in reorg.c, and have therefore
149 been moved here to avoid inadvertent misuse elsewhere in the compiler. */
151 /* Return the last label to mark the same position as LABEL. Return LABEL
152 itself if it is null or any return rtx. */
154 static rtx
155 skip_consecutive_labels (rtx label_or_return)
157 rtx_insn *insn;
159 if (label_or_return && ANY_RETURN_P (label_or_return))
160 return label_or_return;
162 rtx_insn *label = as_a <rtx_insn *> (label_or_return);
164 for (insn = label; insn != 0 && !INSN_P (insn); insn = NEXT_INSN (insn))
165 if (LABEL_P (insn))
166 label = insn;
168 return label;
171 #ifdef HAVE_cc0
172 /* INSN uses CC0 and is being moved into a delay slot. Set up REG_CC_SETTER
173 and REG_CC_USER notes so we can find it. */
175 static void
176 link_cc0_insns (rtx insn)
178 rtx user = next_nonnote_insn (insn);
180 if (NONJUMP_INSN_P (user) && GET_CODE (PATTERN (user)) == SEQUENCE)
181 user = XVECEXP (PATTERN (user), 0, 0);
183 add_reg_note (user, REG_CC_SETTER, insn);
184 add_reg_note (insn, REG_CC_USER, user);
186 #endif
188 /* Insns which have delay slots that have not yet been filled. */
190 static struct obstack unfilled_slots_obstack;
191 static rtx *unfilled_firstobj;
193 /* Define macros to refer to the first and last slot containing unfilled
194 insns. These are used because the list may move and its address
195 should be recomputed at each use. */
197 #define unfilled_slots_base \
198 ((rtx_insn **) obstack_base (&unfilled_slots_obstack))
200 #define unfilled_slots_next \
201 ((rtx_insn **) obstack_next_free (&unfilled_slots_obstack))
203 /* Points to the label before the end of the function, or before a
204 return insn. */
205 static rtx_code_label *function_return_label;
206 /* Likewise for a simple_return. */
207 static rtx_code_label *function_simple_return_label;
209 /* Mapping between INSN_UID's and position in the code since INSN_UID's do
210 not always monotonically increase. */
211 static int *uid_to_ruid;
213 /* Highest valid index in `uid_to_ruid'. */
214 static int max_uid;
216 static int stop_search_p (rtx, int);
217 static int resource_conflicts_p (struct resources *, struct resources *);
218 static int insn_references_resource_p (rtx, struct resources *, bool);
219 static int insn_sets_resource_p (rtx, struct resources *, bool);
220 static rtx_code_label *find_end_label (rtx);
221 static rtx_insn *emit_delay_sequence (rtx_insn *, rtx_insn_list *, int);
222 static rtx_insn_list *add_to_delay_list (rtx_insn *, rtx_insn_list *);
223 static rtx_insn *delete_from_delay_slot (rtx_insn *);
224 static void delete_scheduled_jump (rtx_insn *);
225 static void note_delay_statistics (int, int);
226 #if defined(ANNUL_IFFALSE_SLOTS) || defined(ANNUL_IFTRUE_SLOTS)
227 static rtx_insn_list *optimize_skip (rtx_insn *);
228 #endif
229 static int get_jump_flags (const rtx_insn *, rtx);
230 static int mostly_true_jump (rtx);
231 static rtx get_branch_condition (const rtx_insn *, rtx);
232 static int condition_dominates_p (rtx, const rtx_insn *);
233 static int redirect_with_delay_slots_safe_p (rtx_insn *, rtx, rtx);
234 static int redirect_with_delay_list_safe_p (rtx_insn *, rtx, rtx_insn_list *);
235 static int check_annul_list_true_false (int, rtx);
236 static rtx_insn_list *steal_delay_list_from_target (rtx_insn *, rtx,
237 rtx_sequence *,
238 rtx_insn_list *,
239 struct resources *,
240 struct resources *,
241 struct resources *,
242 int, int *, int *,
243 rtx *);
244 static rtx_insn_list *steal_delay_list_from_fallthrough (rtx_insn *, rtx,
245 rtx_sequence *,
246 rtx_insn_list *,
247 struct resources *,
248 struct resources *,
249 struct resources *,
250 int, int *, int *);
251 static void try_merge_delay_insns (rtx, rtx_insn *);
252 static rtx redundant_insn (rtx, rtx_insn *, rtx);
253 static int own_thread_p (rtx, rtx, int);
254 static void update_block (rtx_insn *, rtx);
255 static int reorg_redirect_jump (rtx_insn *, rtx);
256 static void update_reg_dead_notes (rtx, rtx);
257 static void fix_reg_dead_note (rtx, rtx);
258 static void update_reg_unused_notes (rtx, rtx);
259 static void fill_simple_delay_slots (int);
260 static rtx_insn_list *fill_slots_from_thread (rtx_insn *, rtx, rtx, rtx,
261 int, int, int, int,
262 int *, rtx_insn_list *);
263 static void fill_eager_delay_slots (void);
264 static void relax_delay_slots (rtx_insn *);
265 static void make_return_insns (rtx_insn *);
267 /* A wrapper around next_active_insn which takes care to return ret_rtx
268 unchanged. */
270 static rtx
271 first_active_target_insn (rtx insn)
273 if (ANY_RETURN_P (insn))
274 return insn;
275 return next_active_insn (as_a <rtx_insn *> (insn));
278 /* Return true iff INSN is a simplejump, or any kind of return insn. */
280 static bool
281 simplejump_or_return_p (rtx insn)
283 return (JUMP_P (insn)
284 && (simplejump_p (as_a <rtx_insn *> (insn))
285 || ANY_RETURN_P (PATTERN (insn))));
288 /* Return TRUE if this insn should stop the search for insn to fill delay
289 slots. LABELS_P indicates that labels should terminate the search.
290 In all cases, jumps terminate the search. */
292 static int
293 stop_search_p (rtx insn, int labels_p)
295 if (insn == 0)
296 return 1;
298 /* If the insn can throw an exception that is caught within the function,
299 it may effectively perform a jump from the viewpoint of the function.
300 Therefore act like for a jump. */
301 if (can_throw_internal (insn))
302 return 1;
304 switch (GET_CODE (insn))
306 case NOTE:
307 case CALL_INSN:
308 return 0;
310 case CODE_LABEL:
311 return labels_p;
313 case JUMP_INSN:
314 case BARRIER:
315 return 1;
317 case INSN:
318 /* OK unless it contains a delay slot or is an `asm' insn of some type.
319 We don't know anything about these. */
320 return (GET_CODE (PATTERN (insn)) == SEQUENCE
321 || GET_CODE (PATTERN (insn)) == ASM_INPUT
322 || asm_noperands (PATTERN (insn)) >= 0);
324 default:
325 gcc_unreachable ();
329 /* Return TRUE if any resources are marked in both RES1 and RES2 or if either
330 resource set contains a volatile memory reference. Otherwise, return FALSE. */
332 static int
333 resource_conflicts_p (struct resources *res1, struct resources *res2)
335 if ((res1->cc && res2->cc) || (res1->memory && res2->memory)
336 || res1->volatil || res2->volatil)
337 return 1;
339 return hard_reg_set_intersect_p (res1->regs, res2->regs);
342 /* Return TRUE if any resource marked in RES, a `struct resources', is
343 referenced by INSN. If INCLUDE_DELAYED_EFFECTS is set, return if the called
344 routine is using those resources.
346 We compute this by computing all the resources referenced by INSN and
347 seeing if this conflicts with RES. It might be faster to directly check
348 ourselves, and this is the way it used to work, but it means duplicating
349 a large block of complex code. */
351 static int
352 insn_references_resource_p (rtx insn, struct resources *res,
353 bool include_delayed_effects)
355 struct resources insn_res;
357 CLEAR_RESOURCE (&insn_res);
358 mark_referenced_resources (insn, &insn_res, include_delayed_effects);
359 return resource_conflicts_p (&insn_res, res);
362 /* Return TRUE if INSN modifies resources that are marked in RES.
363 INCLUDE_DELAYED_EFFECTS is set if the actions of that routine should be
364 included. CC0 is only modified if it is explicitly set; see comments
365 in front of mark_set_resources for details. */
367 static int
368 insn_sets_resource_p (rtx insn, struct resources *res,
369 bool include_delayed_effects)
371 struct resources insn_sets;
373 CLEAR_RESOURCE (&insn_sets);
374 mark_set_resources (insn, &insn_sets, 0,
375 (include_delayed_effects
376 ? MARK_SRC_DEST_CALL
377 : MARK_SRC_DEST));
378 return resource_conflicts_p (&insn_sets, res);
381 /* Find a label at the end of the function or before a RETURN. If there
382 is none, try to make one. If that fails, returns 0.
384 The property of such a label is that it is placed just before the
385 epilogue or a bare RETURN insn, so that another bare RETURN can be
386 turned into a jump to the label unconditionally. In particular, the
387 label cannot be placed before a RETURN insn with a filled delay slot.
389 ??? There may be a problem with the current implementation. Suppose
390 we start with a bare RETURN insn and call find_end_label. It may set
391 function_return_label just before the RETURN. Suppose the machinery
392 is able to fill the delay slot of the RETURN insn afterwards. Then
393 function_return_label is no longer valid according to the property
394 described above and find_end_label will still return it unmodified.
395 Note that this is probably mitigated by the following observation:
396 once function_return_label is made, it is very likely the target of
397 a jump, so filling the delay slot of the RETURN will be much more
398 difficult.
399 KIND is either simple_return_rtx or ret_rtx, indicating which type of
400 return we're looking for. */
402 static rtx_code_label *
403 find_end_label (rtx kind)
405 rtx_insn *insn;
406 rtx_code_label **plabel;
408 if (kind == ret_rtx)
409 plabel = &function_return_label;
410 else
412 gcc_assert (kind == simple_return_rtx);
413 plabel = &function_simple_return_label;
416 /* If we found one previously, return it. */
417 if (*plabel)
418 return *plabel;
420 /* Otherwise, see if there is a label at the end of the function. If there
421 is, it must be that RETURN insns aren't needed, so that is our return
422 label and we don't have to do anything else. */
424 insn = get_last_insn ();
425 while (NOTE_P (insn)
426 || (NONJUMP_INSN_P (insn)
427 && (GET_CODE (PATTERN (insn)) == USE
428 || GET_CODE (PATTERN (insn)) == CLOBBER)))
429 insn = PREV_INSN (insn);
431 /* When a target threads its epilogue we might already have a
432 suitable return insn. If so put a label before it for the
433 function_return_label. */
434 if (BARRIER_P (insn)
435 && JUMP_P (PREV_INSN (insn))
436 && PATTERN (PREV_INSN (insn)) == kind)
438 rtx_insn *temp = PREV_INSN (PREV_INSN (insn));
439 rtx_code_label *label = gen_label_rtx ();
440 LABEL_NUSES (label) = 0;
442 /* Put the label before any USE insns that may precede the RETURN
443 insn. */
444 while (GET_CODE (temp) == USE)
445 temp = PREV_INSN (temp);
447 emit_label_after (label, temp);
448 *plabel = label;
451 else if (LABEL_P (insn))
452 *plabel = as_a <rtx_code_label *> (insn);
453 else
455 rtx_code_label *label = gen_label_rtx ();
456 LABEL_NUSES (label) = 0;
457 /* If the basic block reorder pass moves the return insn to
458 some other place try to locate it again and put our
459 function_return_label there. */
460 while (insn && ! (JUMP_P (insn) && (PATTERN (insn) == kind)))
461 insn = PREV_INSN (insn);
462 if (insn)
464 insn = PREV_INSN (insn);
466 /* Put the label before any USE insns that may precede the
467 RETURN insn. */
468 while (GET_CODE (insn) == USE)
469 insn = PREV_INSN (insn);
471 emit_label_after (label, insn);
473 else
475 #ifdef HAVE_epilogue
476 if (HAVE_epilogue
477 #ifdef HAVE_return
478 && ! HAVE_return
479 #endif
481 /* The RETURN insn has its delay slot filled so we cannot
482 emit the label just before it. Since we already have
483 an epilogue and cannot emit a new RETURN, we cannot
484 emit the label at all. */
485 return NULL;
486 #endif /* HAVE_epilogue */
488 /* Otherwise, make a new label and emit a RETURN and BARRIER,
489 if needed. */
490 emit_label (label);
491 #ifdef HAVE_return
492 if (HAVE_return)
494 /* The return we make may have delay slots too. */
495 rtx pat = gen_return ();
496 rtx_insn *insn = emit_jump_insn (pat);
497 set_return_jump_label (insn);
498 emit_barrier ();
499 if (num_delay_slots (insn) > 0)
500 obstack_ptr_grow (&unfilled_slots_obstack, insn);
502 #endif
504 *plabel = label;
507 /* Show one additional use for this label so it won't go away until
508 we are done. */
509 ++LABEL_NUSES (*plabel);
511 return *plabel;
514 /* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace
515 the pattern of INSN with the SEQUENCE.
517 Returns the insn containing the SEQUENCE that replaces INSN. */
519 static rtx_insn *
520 emit_delay_sequence (rtx_insn *insn, rtx_insn_list *list, int length)
522 /* Allocate the rtvec to hold the insns and the SEQUENCE. */
523 rtvec seqv = rtvec_alloc (length + 1);
524 rtx seq = gen_rtx_SEQUENCE (VOIDmode, seqv);
525 rtx_insn *seq_insn = make_insn_raw (seq);
527 /* If DELAY_INSN has a location, use it for SEQ_INSN. If DELAY_INSN does
528 not have a location, but one of the delayed insns does, we pick up a
529 location from there later. */
530 INSN_LOCATION (seq_insn) = INSN_LOCATION (insn);
532 /* Unlink INSN from the insn chain, so that we can put it into
533 the SEQUENCE. Remember where we want to emit SEQUENCE in AFTER. */
534 rtx after = PREV_INSN (insn);
535 remove_insn (insn);
536 SET_NEXT_INSN (insn) = SET_PREV_INSN (insn) = NULL;
538 /* Build our SEQUENCE and rebuild the insn chain. */
539 int i = 1;
540 start_sequence ();
541 XVECEXP (seq, 0, 0) = emit_insn (insn);
542 for (rtx_insn_list *li = list; li; li = li->next (), i++)
544 rtx_insn *tem = li->insn ();
545 rtx note, next;
547 /* Show that this copy of the insn isn't deleted. */
548 tem->set_undeleted ();
550 /* Unlink insn from its original place, and re-emit it into
551 the sequence. */
552 SET_NEXT_INSN (tem) = SET_PREV_INSN (tem) = NULL;
553 XVECEXP (seq, 0, i) = emit_insn (tem);
555 /* SPARC assembler, for instance, emit warning when debug info is output
556 into the delay slot. */
557 if (INSN_LOCATION (tem) && !INSN_LOCATION (seq_insn))
558 INSN_LOCATION (seq_insn) = INSN_LOCATION (tem);
559 INSN_LOCATION (tem) = 0;
561 for (note = REG_NOTES (tem); note; note = next)
563 next = XEXP (note, 1);
564 switch (REG_NOTE_KIND (note))
566 case REG_DEAD:
567 /* Remove any REG_DEAD notes because we can't rely on them now
568 that the insn has been moved. */
569 remove_note (tem, note);
570 break;
572 case REG_LABEL_OPERAND:
573 case REG_LABEL_TARGET:
574 /* Keep the label reference count up to date. */
575 if (LABEL_P (XEXP (note, 0)))
576 LABEL_NUSES (XEXP (note, 0)) ++;
577 break;
579 default:
580 break;
584 end_sequence ();
585 gcc_assert (i == length + 1);
587 /* Splice our SEQUENCE into the insn stream where INSN used to be. */
588 add_insn_after (seq_insn, after, NULL);
590 return seq_insn;
593 /* Add INSN to DELAY_LIST and return the head of the new list. The list must
594 be in the order in which the insns are to be executed. */
596 static rtx_insn_list *
597 add_to_delay_list (rtx_insn *insn, rtx_insn_list *delay_list)
599 /* If we have an empty list, just make a new list element. If
600 INSN has its block number recorded, clear it since we may
601 be moving the insn to a new block. */
603 if (delay_list == 0)
605 clear_hashed_info_for_insn (insn);
606 return gen_rtx_INSN_LIST (VOIDmode, insn, NULL_RTX);
609 /* Otherwise this must be an INSN_LIST. Add INSN to the end of the
610 list. */
611 XEXP (delay_list, 1) = add_to_delay_list (insn, delay_list->next ());
613 return delay_list;
616 /* Delete INSN from the delay slot of the insn that it is in, which may
617 produce an insn with no delay slots. Return the new insn. */
619 static rtx_insn *
620 delete_from_delay_slot (rtx_insn *insn)
622 rtx_insn *trial, *seq_insn, *prev;
623 rtx_sequence *seq;
624 rtx_insn_list *delay_list = 0;
625 int i;
626 int had_barrier = 0;
628 /* We first must find the insn containing the SEQUENCE with INSN in its
629 delay slot. Do this by finding an insn, TRIAL, where
630 PREV_INSN (NEXT_INSN (TRIAL)) != TRIAL. */
632 for (trial = insn;
633 PREV_INSN (NEXT_INSN (trial)) == trial;
634 trial = NEXT_INSN (trial))
637 seq_insn = PREV_INSN (NEXT_INSN (trial));
638 seq = as_a <rtx_sequence *> (PATTERN (seq_insn));
640 if (NEXT_INSN (seq_insn) && BARRIER_P (NEXT_INSN (seq_insn)))
641 had_barrier = 1;
643 /* Create a delay list consisting of all the insns other than the one
644 we are deleting (unless we were the only one). */
645 if (seq->len () > 2)
646 for (i = 1; i < seq->len (); i++)
647 if (seq->insn (i) != insn)
648 delay_list = add_to_delay_list (seq->insn (i), delay_list);
650 /* Delete the old SEQUENCE, re-emit the insn that used to have the delay
651 list, and rebuild the delay list if non-empty. */
652 prev = PREV_INSN (seq_insn);
653 trial = seq->insn (0);
654 delete_related_insns (seq_insn);
655 add_insn_after (trial, prev, NULL);
657 /* If there was a barrier after the old SEQUENCE, remit it. */
658 if (had_barrier)
659 emit_barrier_after (trial);
661 /* If there are any delay insns, remit them. Otherwise clear the
662 annul flag. */
663 if (delay_list)
664 trial = emit_delay_sequence (trial, delay_list, XVECLEN (seq, 0) - 2);
665 else if (JUMP_P (trial))
666 INSN_ANNULLED_BRANCH_P (trial) = 0;
668 INSN_FROM_TARGET_P (insn) = 0;
670 /* Show we need to fill this insn again. */
671 obstack_ptr_grow (&unfilled_slots_obstack, trial);
673 return trial;
676 /* Delete INSN, a JUMP_INSN. If it is a conditional jump, we must track down
677 the insn that sets CC0 for it and delete it too. */
679 static void
680 delete_scheduled_jump (rtx_insn *insn)
682 /* Delete the insn that sets cc0 for us. On machines without cc0, we could
683 delete the insn that sets the condition code, but it is hard to find it.
684 Since this case is rare anyway, don't bother trying; there would likely
685 be other insns that became dead anyway, which we wouldn't know to
686 delete. */
688 #ifdef HAVE_cc0
689 if (reg_mentioned_p (cc0_rtx, insn))
691 rtx note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
693 /* If a reg-note was found, it points to an insn to set CC0. This
694 insn is in the delay list of some other insn. So delete it from
695 the delay list it was in. */
696 if (note)
698 if (! FIND_REG_INC_NOTE (XEXP (note, 0), NULL_RTX)
699 && sets_cc0_p (PATTERN (XEXP (note, 0))) == 1)
700 delete_from_delay_slot (as_a <rtx_insn *> (XEXP (note, 0)));
702 else
704 /* The insn setting CC0 is our previous insn, but it may be in
705 a delay slot. It will be the last insn in the delay slot, if
706 it is. */
707 rtx_insn *trial = previous_insn (insn);
708 if (NOTE_P (trial))
709 trial = prev_nonnote_insn (trial);
710 if (sets_cc0_p (PATTERN (trial)) != 1
711 || FIND_REG_INC_NOTE (trial, NULL_RTX))
712 return;
713 if (PREV_INSN (NEXT_INSN (trial)) == trial)
714 delete_related_insns (trial);
715 else
716 delete_from_delay_slot (trial);
719 #endif
721 delete_related_insns (insn);
724 /* Counters for delay-slot filling. */
726 #define NUM_REORG_FUNCTIONS 2
727 #define MAX_DELAY_HISTOGRAM 3
728 #define MAX_REORG_PASSES 2
730 static int num_insns_needing_delays[NUM_REORG_FUNCTIONS][MAX_REORG_PASSES];
732 static int num_filled_delays[NUM_REORG_FUNCTIONS][MAX_DELAY_HISTOGRAM+1][MAX_REORG_PASSES];
734 static int reorg_pass_number;
736 static void
737 note_delay_statistics (int slots_filled, int index)
739 num_insns_needing_delays[index][reorg_pass_number]++;
740 if (slots_filled > MAX_DELAY_HISTOGRAM)
741 slots_filled = MAX_DELAY_HISTOGRAM;
742 num_filled_delays[index][slots_filled][reorg_pass_number]++;
745 #if defined(ANNUL_IFFALSE_SLOTS) || defined(ANNUL_IFTRUE_SLOTS)
747 /* Optimize the following cases:
749 1. When a conditional branch skips over only one instruction,
750 use an annulling branch and put that insn in the delay slot.
751 Use either a branch that annuls when the condition if true or
752 invert the test with a branch that annuls when the condition is
753 false. This saves insns, since otherwise we must copy an insn
754 from the L1 target.
756 (orig) (skip) (otherwise)
757 Bcc.n L1 Bcc',a L1 Bcc,a L1'
758 insn insn insn2
759 L1: L1: L1:
760 insn2 insn2 insn2
761 insn3 insn3 L1':
762 insn3
764 2. When a conditional branch skips over only one instruction,
765 and after that, it unconditionally branches somewhere else,
766 perform the similar optimization. This saves executing the
767 second branch in the case where the inverted condition is true.
769 Bcc.n L1 Bcc',a L2
770 insn insn
771 L1: L1:
772 Bra L2 Bra L2
774 INSN is a JUMP_INSN.
776 This should be expanded to skip over N insns, where N is the number
777 of delay slots required. */
779 static rtx_insn_list *
780 optimize_skip (rtx_insn *insn)
782 rtx_insn *trial = next_nonnote_insn (insn);
783 rtx_insn *next_trial = next_active_insn (trial);
784 rtx_insn_list *delay_list = 0;
785 int flags;
787 flags = get_jump_flags (insn, JUMP_LABEL (insn));
789 if (trial == 0
790 || !NONJUMP_INSN_P (trial)
791 || GET_CODE (PATTERN (trial)) == SEQUENCE
792 || recog_memoized (trial) < 0
793 || (! eligible_for_annul_false (insn, 0, trial, flags)
794 && ! eligible_for_annul_true (insn, 0, trial, flags))
795 || can_throw_internal (trial))
796 return 0;
798 /* There are two cases where we are just executing one insn (we assume
799 here that a branch requires only one insn; this should be generalized
800 at some point): Where the branch goes around a single insn or where
801 we have one insn followed by a branch to the same label we branch to.
802 In both of these cases, inverting the jump and annulling the delay
803 slot give the same effect in fewer insns. */
804 if (next_trial == next_active_insn (JUMP_LABEL (insn))
805 || (next_trial != 0
806 && simplejump_or_return_p (next_trial)
807 && JUMP_LABEL (insn) == JUMP_LABEL (next_trial)))
809 if (eligible_for_annul_false (insn, 0, trial, flags))
811 if (invert_jump (insn, JUMP_LABEL (insn), 1))
812 INSN_FROM_TARGET_P (trial) = 1;
813 else if (! eligible_for_annul_true (insn, 0, trial, flags))
814 return 0;
817 delay_list = add_to_delay_list (trial, NULL);
818 next_trial = next_active_insn (trial);
819 update_block (trial, trial);
820 delete_related_insns (trial);
822 /* Also, if we are targeting an unconditional
823 branch, thread our jump to the target of that branch. Don't
824 change this into a RETURN here, because it may not accept what
825 we have in the delay slot. We'll fix this up later. */
826 if (next_trial && simplejump_or_return_p (next_trial))
828 rtx target_label = JUMP_LABEL (next_trial);
829 if (ANY_RETURN_P (target_label))
830 target_label = find_end_label (target_label);
832 if (target_label)
834 /* Recompute the flags based on TARGET_LABEL since threading
835 the jump to TARGET_LABEL may change the direction of the
836 jump (which may change the circumstances in which the
837 delay slot is nullified). */
838 flags = get_jump_flags (insn, target_label);
839 if (eligible_for_annul_true (insn, 0, trial, flags))
840 reorg_redirect_jump (insn, target_label);
844 INSN_ANNULLED_BRANCH_P (insn) = 1;
847 return delay_list;
849 #endif
851 /* Encode and return branch direction and prediction information for
852 INSN assuming it will jump to LABEL.
854 Non conditional branches return no direction information and
855 are predicted as very likely taken. */
857 static int
858 get_jump_flags (const rtx_insn *insn, rtx label)
860 int flags;
862 /* get_jump_flags can be passed any insn with delay slots, these may
863 be INSNs, CALL_INSNs, or JUMP_INSNs. Only JUMP_INSNs have branch
864 direction information, and only if they are conditional jumps.
866 If LABEL is a return, then there is no way to determine the branch
867 direction. */
868 if (JUMP_P (insn)
869 && (condjump_p (insn) || condjump_in_parallel_p (insn))
870 && !ANY_RETURN_P (label)
871 && INSN_UID (insn) <= max_uid
872 && INSN_UID (label) <= max_uid)
873 flags
874 = (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)])
875 ? ATTR_FLAG_forward : ATTR_FLAG_backward;
876 /* No valid direction information. */
877 else
878 flags = 0;
880 return flags;
883 /* Return truth value of the statement that this branch
884 is mostly taken. If we think that the branch is extremely likely
885 to be taken, we return 2. If the branch is slightly more likely to be
886 taken, return 1. If the branch is slightly less likely to be taken,
887 return 0 and if the branch is highly unlikely to be taken, return -1. */
889 static int
890 mostly_true_jump (rtx jump_insn)
892 /* If branch probabilities are available, then use that number since it
893 always gives a correct answer. */
894 rtx note = find_reg_note (jump_insn, REG_BR_PROB, 0);
895 if (note)
897 int prob = XINT (note, 0);
899 if (prob >= REG_BR_PROB_BASE * 9 / 10)
900 return 2;
901 else if (prob >= REG_BR_PROB_BASE / 2)
902 return 1;
903 else if (prob >= REG_BR_PROB_BASE / 10)
904 return 0;
905 else
906 return -1;
909 /* If there is no note, assume branches are not taken.
910 This should be rare. */
911 return 0;
914 /* Return the condition under which INSN will branch to TARGET. If TARGET
915 is zero, return the condition under which INSN will return. If INSN is
916 an unconditional branch, return const_true_rtx. If INSN isn't a simple
917 type of jump, or it doesn't go to TARGET, return 0. */
919 static rtx
920 get_branch_condition (const rtx_insn *insn, rtx target)
922 rtx pat = PATTERN (insn);
923 rtx src;
925 if (condjump_in_parallel_p (insn))
926 pat = XVECEXP (pat, 0, 0);
928 if (ANY_RETURN_P (pat) && pat == target)
929 return const_true_rtx;
931 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
932 return 0;
934 src = SET_SRC (pat);
935 if (GET_CODE (src) == LABEL_REF && LABEL_REF_LABEL (src) == target)
936 return const_true_rtx;
938 else if (GET_CODE (src) == IF_THEN_ELSE
939 && XEXP (src, 2) == pc_rtx
940 && ((GET_CODE (XEXP (src, 1)) == LABEL_REF
941 && LABEL_REF_LABEL (XEXP (src, 1)) == target)
942 || (ANY_RETURN_P (XEXP (src, 1)) && XEXP (src, 1) == target)))
943 return XEXP (src, 0);
945 else if (GET_CODE (src) == IF_THEN_ELSE
946 && XEXP (src, 1) == pc_rtx
947 && ((GET_CODE (XEXP (src, 2)) == LABEL_REF
948 && LABEL_REF_LABEL (XEXP (src, 2)) == target)
949 || (ANY_RETURN_P (XEXP (src, 2)) && XEXP (src, 2) == target)))
951 enum rtx_code rev;
952 rev = reversed_comparison_code (XEXP (src, 0), insn);
953 if (rev != UNKNOWN)
954 return gen_rtx_fmt_ee (rev, GET_MODE (XEXP (src, 0)),
955 XEXP (XEXP (src, 0), 0),
956 XEXP (XEXP (src, 0), 1));
959 return 0;
962 /* Return nonzero if CONDITION is more strict than the condition of
963 INSN, i.e., if INSN will always branch if CONDITION is true. */
965 static int
966 condition_dominates_p (rtx condition, const rtx_insn *insn)
968 rtx other_condition = get_branch_condition (insn, JUMP_LABEL (insn));
969 enum rtx_code code = GET_CODE (condition);
970 enum rtx_code other_code;
972 if (rtx_equal_p (condition, other_condition)
973 || other_condition == const_true_rtx)
974 return 1;
976 else if (condition == const_true_rtx || other_condition == 0)
977 return 0;
979 other_code = GET_CODE (other_condition);
980 if (GET_RTX_LENGTH (code) != 2 || GET_RTX_LENGTH (other_code) != 2
981 || ! rtx_equal_p (XEXP (condition, 0), XEXP (other_condition, 0))
982 || ! rtx_equal_p (XEXP (condition, 1), XEXP (other_condition, 1)))
983 return 0;
985 return comparison_dominates_p (code, other_code);
988 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
989 any insns already in the delay slot of JUMP. */
991 static int
992 redirect_with_delay_slots_safe_p (rtx_insn *jump, rtx newlabel, rtx seq)
994 int flags, i;
995 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (seq));
997 /* Make sure all the delay slots of this jump would still
998 be valid after threading the jump. If they are still
999 valid, then return nonzero. */
1001 flags = get_jump_flags (jump, newlabel);
1002 for (i = 1; i < pat->len (); i++)
1003 if (! (
1004 #ifdef ANNUL_IFFALSE_SLOTS
1005 (INSN_ANNULLED_BRANCH_P (jump)
1006 && INSN_FROM_TARGET_P (pat->insn (i)))
1007 ? eligible_for_annul_false (jump, i - 1, pat->insn (i), flags) :
1008 #endif
1009 #ifdef ANNUL_IFTRUE_SLOTS
1010 (INSN_ANNULLED_BRANCH_P (jump)
1011 && ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
1012 ? eligible_for_annul_true (jump, i - 1, pat->insn (i), flags) :
1013 #endif
1014 eligible_for_delay (jump, i - 1, pat->insn (i), flags)))
1015 break;
1017 return (i == pat->len ());
1020 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
1021 any insns we wish to place in the delay slot of JUMP. */
1023 static int
1024 redirect_with_delay_list_safe_p (rtx_insn *jump, rtx newlabel,
1025 rtx_insn_list *delay_list)
1027 int flags, i;
1028 rtx_insn_list *li;
1030 /* Make sure all the insns in DELAY_LIST would still be
1031 valid after threading the jump. If they are still
1032 valid, then return nonzero. */
1034 flags = get_jump_flags (jump, newlabel);
1035 for (li = delay_list, i = 0; li; li = li->next (), i++)
1036 if (! (
1037 #ifdef ANNUL_IFFALSE_SLOTS
1038 (INSN_ANNULLED_BRANCH_P (jump)
1039 && INSN_FROM_TARGET_P (li->insn ()))
1040 ? eligible_for_annul_false (jump, i, li->insn (), flags) :
1041 #endif
1042 #ifdef ANNUL_IFTRUE_SLOTS
1043 (INSN_ANNULLED_BRANCH_P (jump)
1044 && ! INSN_FROM_TARGET_P (XEXP (li, 0)))
1045 ? eligible_for_annul_true (jump, i, li->insn (), flags) :
1046 #endif
1047 eligible_for_delay (jump, i, li->insn (), flags)))
1048 break;
1050 return (li == NULL);
1053 /* DELAY_LIST is a list of insns that have already been placed into delay
1054 slots. See if all of them have the same annulling status as ANNUL_TRUE_P.
1055 If not, return 0; otherwise return 1. */
1057 static int
1058 check_annul_list_true_false (int annul_true_p, rtx delay_list)
1060 rtx temp;
1062 if (delay_list)
1064 for (temp = delay_list; temp; temp = XEXP (temp, 1))
1066 rtx trial = XEXP (temp, 0);
1068 if ((annul_true_p && INSN_FROM_TARGET_P (trial))
1069 || (!annul_true_p && !INSN_FROM_TARGET_P (trial)))
1070 return 0;
1074 return 1;
1077 /* INSN branches to an insn whose pattern SEQ is a SEQUENCE. Given that
1078 the condition tested by INSN is CONDITION and the resources shown in
1079 OTHER_NEEDED are needed after INSN, see whether INSN can take all the insns
1080 from SEQ's delay list, in addition to whatever insns it may execute
1081 (in DELAY_LIST). SETS and NEEDED are denote resources already set and
1082 needed while searching for delay slot insns. Return the concatenated
1083 delay list if possible, otherwise, return 0.
1085 SLOTS_TO_FILL is the total number of slots required by INSN, and
1086 PSLOTS_FILLED points to the number filled so far (also the number of
1087 insns in DELAY_LIST). It is updated with the number that have been
1088 filled from the SEQUENCE, if any.
1090 PANNUL_P points to a nonzero value if we already know that we need
1091 to annul INSN. If this routine determines that annulling is needed,
1092 it may set that value nonzero.
1094 PNEW_THREAD points to a location that is to receive the place at which
1095 execution should continue. */
1097 static rtx_insn_list *
1098 steal_delay_list_from_target (rtx_insn *insn, rtx condition, rtx_sequence *seq,
1099 rtx_insn_list *delay_list, struct resources *sets,
1100 struct resources *needed,
1101 struct resources *other_needed,
1102 int slots_to_fill, int *pslots_filled,
1103 int *pannul_p, rtx *pnew_thread)
1105 int slots_remaining = slots_to_fill - *pslots_filled;
1106 int total_slots_filled = *pslots_filled;
1107 rtx_insn_list *new_delay_list = 0;
1108 int must_annul = *pannul_p;
1109 int used_annul = 0;
1110 int i;
1111 struct resources cc_set;
1112 bool *redundant;
1114 /* We can't do anything if there are more delay slots in SEQ than we
1115 can handle, or if we don't know that it will be a taken branch.
1116 We know that it will be a taken branch if it is either an unconditional
1117 branch or a conditional branch with a stricter branch condition.
1119 Also, exit if the branch has more than one set, since then it is computing
1120 other results that can't be ignored, e.g. the HPPA mov&branch instruction.
1121 ??? It may be possible to move other sets into INSN in addition to
1122 moving the instructions in the delay slots.
1124 We can not steal the delay list if one of the instructions in the
1125 current delay_list modifies the condition codes and the jump in the
1126 sequence is a conditional jump. We can not do this because we can
1127 not change the direction of the jump because the condition codes
1128 will effect the direction of the jump in the sequence. */
1130 CLEAR_RESOURCE (&cc_set);
1131 for (rtx_insn_list *temp = delay_list; temp; temp = temp->next ())
1133 rtx_insn *trial = temp->insn ();
1135 mark_set_resources (trial, &cc_set, 0, MARK_SRC_DEST_CALL);
1136 if (insn_references_resource_p (seq->insn (0), &cc_set, false))
1137 return delay_list;
1140 if (XVECLEN (seq, 0) - 1 > slots_remaining
1141 || ! condition_dominates_p (condition, seq->insn (0))
1142 || ! single_set (seq->insn (0)))
1143 return delay_list;
1145 #ifdef MD_CAN_REDIRECT_BRANCH
1146 /* On some targets, branches with delay slots can have a limited
1147 displacement. Give the back end a chance to tell us we can't do
1148 this. */
1149 if (! MD_CAN_REDIRECT_BRANCH (insn, seq->insn (0)))
1150 return delay_list;
1151 #endif
1153 redundant = XALLOCAVEC (bool, XVECLEN (seq, 0));
1154 for (i = 1; i < seq->len (); i++)
1156 rtx_insn *trial = seq->insn (i);
1157 int flags;
1159 if (insn_references_resource_p (trial, sets, false)
1160 || insn_sets_resource_p (trial, needed, false)
1161 || insn_sets_resource_p (trial, sets, false)
1162 #ifdef HAVE_cc0
1163 /* If TRIAL sets CC0, we can't copy it, so we can't steal this
1164 delay list. */
1165 || find_reg_note (trial, REG_CC_USER, NULL_RTX)
1166 #endif
1167 /* If TRIAL is from the fallthrough code of an annulled branch insn
1168 in SEQ, we cannot use it. */
1169 || (INSN_ANNULLED_BRANCH_P (seq->insn (0))
1170 && ! INSN_FROM_TARGET_P (trial)))
1171 return delay_list;
1173 /* If this insn was already done (usually in a previous delay slot),
1174 pretend we put it in our delay slot. */
1175 redundant[i] = redundant_insn (trial, insn, new_delay_list);
1176 if (redundant[i])
1177 continue;
1179 /* We will end up re-vectoring this branch, so compute flags
1180 based on jumping to the new label. */
1181 flags = get_jump_flags (insn, JUMP_LABEL (seq->insn (0)));
1183 if (! must_annul
1184 && ((condition == const_true_rtx
1185 || (! insn_sets_resource_p (trial, other_needed, false)
1186 && ! may_trap_or_fault_p (PATTERN (trial)))))
1187 ? eligible_for_delay (insn, total_slots_filled, trial, flags)
1188 : (must_annul || (delay_list == NULL && new_delay_list == NULL))
1189 && (must_annul = 1,
1190 check_annul_list_true_false (0, delay_list)
1191 && check_annul_list_true_false (0, new_delay_list)
1192 && eligible_for_annul_false (insn, total_slots_filled,
1193 trial, flags)))
1195 if (must_annul)
1196 used_annul = 1;
1197 rtx_insn *temp = copy_delay_slot_insn (trial);
1198 INSN_FROM_TARGET_P (temp) = 1;
1199 new_delay_list = add_to_delay_list (temp, new_delay_list);
1200 total_slots_filled++;
1202 if (--slots_remaining == 0)
1203 break;
1205 else
1206 return delay_list;
1209 /* Record the effect of the instructions that were redundant and which
1210 we therefore decided not to copy. */
1211 for (i = 1; i < seq->len (); i++)
1212 if (redundant[i])
1213 update_block (seq->insn (i), insn);
1215 /* Show the place to which we will be branching. */
1216 *pnew_thread = first_active_target_insn (JUMP_LABEL (seq->insn (0)));
1218 /* Add any new insns to the delay list and update the count of the
1219 number of slots filled. */
1220 *pslots_filled = total_slots_filled;
1221 if (used_annul)
1222 *pannul_p = 1;
1224 if (delay_list == 0)
1225 return new_delay_list;
1227 for (rtx_insn_list *temp = new_delay_list; temp; temp = temp->next ())
1228 delay_list = add_to_delay_list (temp->insn (), delay_list);
1230 return delay_list;
1233 /* Similar to steal_delay_list_from_target except that SEQ is on the
1234 fallthrough path of INSN. Here we only do something if the delay insn
1235 of SEQ is an unconditional branch. In that case we steal its delay slot
1236 for INSN since unconditional branches are much easier to fill. */
1238 static rtx_insn_list *
1239 steal_delay_list_from_fallthrough (rtx_insn *insn, rtx condition,
1240 rtx_sequence *seq,
1241 rtx_insn_list *delay_list,
1242 struct resources *sets,
1243 struct resources *needed,
1244 struct resources *other_needed,
1245 int slots_to_fill, int *pslots_filled,
1246 int *pannul_p)
1248 int i;
1249 int flags;
1250 int must_annul = *pannul_p;
1251 int used_annul = 0;
1253 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1255 /* We can't do anything if SEQ's delay insn isn't an
1256 unconditional branch. */
1258 if (! simplejump_or_return_p (seq->insn (0)))
1259 return delay_list;
1261 for (i = 1; i < seq->len (); i++)
1263 rtx_insn *trial = seq->insn (i);
1265 /* If TRIAL sets CC0, stealing it will move it too far from the use
1266 of CC0. */
1267 if (insn_references_resource_p (trial, sets, false)
1268 || insn_sets_resource_p (trial, needed, false)
1269 || insn_sets_resource_p (trial, sets, false)
1270 #ifdef HAVE_cc0
1271 || sets_cc0_p (PATTERN (trial))
1272 #endif
1275 break;
1277 /* If this insn was already done, we don't need it. */
1278 if (redundant_insn (trial, insn, delay_list))
1280 update_block (trial, insn);
1281 delete_from_delay_slot (trial);
1282 continue;
1285 if (! must_annul
1286 && ((condition == const_true_rtx
1287 || (! insn_sets_resource_p (trial, other_needed, false)
1288 && ! may_trap_or_fault_p (PATTERN (trial)))))
1289 ? eligible_for_delay (insn, *pslots_filled, trial, flags)
1290 : (must_annul || delay_list == NULL) && (must_annul = 1,
1291 check_annul_list_true_false (1, delay_list)
1292 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
1294 if (must_annul)
1295 used_annul = 1;
1296 delete_from_delay_slot (trial);
1297 delay_list = add_to_delay_list (trial, delay_list);
1299 if (++(*pslots_filled) == slots_to_fill)
1300 break;
1302 else
1303 break;
1306 if (used_annul)
1307 *pannul_p = 1;
1308 return delay_list;
1311 /* Try merging insns starting at THREAD which match exactly the insns in
1312 INSN's delay list.
1314 If all insns were matched and the insn was previously annulling, the
1315 annul bit will be cleared.
1317 For each insn that is merged, if the branch is or will be non-annulling,
1318 we delete the merged insn. */
1320 static void
1321 try_merge_delay_insns (rtx insn, rtx_insn *thread)
1323 rtx_insn *trial, *next_trial;
1324 rtx_insn *delay_insn = as_a <rtx_insn *> (XVECEXP (PATTERN (insn), 0, 0));
1325 int annul_p = JUMP_P (delay_insn) && INSN_ANNULLED_BRANCH_P (delay_insn);
1326 int slot_number = 1;
1327 int num_slots = XVECLEN (PATTERN (insn), 0);
1328 rtx next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1329 struct resources set, needed;
1330 rtx_insn_list *merged_insns = 0;
1331 int i;
1332 int flags;
1334 flags = get_jump_flags (delay_insn, JUMP_LABEL (delay_insn));
1336 CLEAR_RESOURCE (&needed);
1337 CLEAR_RESOURCE (&set);
1339 /* If this is not an annulling branch, take into account anything needed in
1340 INSN's delay slot. This prevents two increments from being incorrectly
1341 folded into one. If we are annulling, this would be the correct
1342 thing to do. (The alternative, looking at things set in NEXT_TO_MATCH
1343 will essentially disable this optimization. This method is somewhat of
1344 a kludge, but I don't see a better way.) */
1345 if (! annul_p)
1346 for (i = 1 ; i < num_slots; i++)
1347 if (XVECEXP (PATTERN (insn), 0, i))
1348 mark_referenced_resources (XVECEXP (PATTERN (insn), 0, i), &needed,
1349 true);
1351 for (trial = thread; !stop_search_p (trial, 1); trial = next_trial)
1353 rtx pat = PATTERN (trial);
1354 rtx oldtrial = trial;
1356 next_trial = next_nonnote_insn (trial);
1358 /* TRIAL must be a CALL_INSN or INSN. Skip USE and CLOBBER. */
1359 if (NONJUMP_INSN_P (trial)
1360 && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER))
1361 continue;
1363 if (GET_CODE (next_to_match) == GET_CODE (trial)
1364 #ifdef HAVE_cc0
1365 /* We can't share an insn that sets cc0. */
1366 && ! sets_cc0_p (pat)
1367 #endif
1368 && ! insn_references_resource_p (trial, &set, true)
1369 && ! insn_sets_resource_p (trial, &set, true)
1370 && ! insn_sets_resource_p (trial, &needed, true)
1371 && (trial = try_split (pat, trial, 0)) != 0
1372 /* Update next_trial, in case try_split succeeded. */
1373 && (next_trial = next_nonnote_insn (trial))
1374 /* Likewise THREAD. */
1375 && (thread = oldtrial == thread ? trial : thread)
1376 && rtx_equal_p (PATTERN (next_to_match), PATTERN (trial))
1377 /* Have to test this condition if annul condition is different
1378 from (and less restrictive than) non-annulling one. */
1379 && eligible_for_delay (delay_insn, slot_number - 1, trial, flags))
1382 if (! annul_p)
1384 update_block (trial, thread);
1385 if (trial == thread)
1386 thread = next_active_insn (thread);
1388 delete_related_insns (trial);
1389 INSN_FROM_TARGET_P (next_to_match) = 0;
1391 else
1392 merged_insns = gen_rtx_INSN_LIST (VOIDmode, trial, merged_insns);
1394 if (++slot_number == num_slots)
1395 break;
1397 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1400 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
1401 mark_referenced_resources (trial, &needed, true);
1404 /* See if we stopped on a filled insn. If we did, try to see if its
1405 delay slots match. */
1406 if (slot_number != num_slots
1407 && trial && NONJUMP_INSN_P (trial)
1408 && GET_CODE (PATTERN (trial)) == SEQUENCE
1409 && !(JUMP_P (XVECEXP (PATTERN (trial), 0, 0))
1410 && INSN_ANNULLED_BRANCH_P (XVECEXP (PATTERN (trial), 0, 0))))
1412 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (trial));
1413 rtx filled_insn = XVECEXP (pat, 0, 0);
1415 /* Account for resources set/needed by the filled insn. */
1416 mark_set_resources (filled_insn, &set, 0, MARK_SRC_DEST_CALL);
1417 mark_referenced_resources (filled_insn, &needed, true);
1419 for (i = 1; i < pat->len (); i++)
1421 rtx_insn *dtrial = pat->insn (i);
1423 if (! insn_references_resource_p (dtrial, &set, true)
1424 && ! insn_sets_resource_p (dtrial, &set, true)
1425 && ! insn_sets_resource_p (dtrial, &needed, true)
1426 #ifdef HAVE_cc0
1427 && ! sets_cc0_p (PATTERN (dtrial))
1428 #endif
1429 && rtx_equal_p (PATTERN (next_to_match), PATTERN (dtrial))
1430 && eligible_for_delay (delay_insn, slot_number - 1, dtrial, flags))
1432 if (! annul_p)
1434 rtx_insn *new_rtx;
1436 update_block (dtrial, thread);
1437 new_rtx = delete_from_delay_slot (dtrial);
1438 if (thread->deleted ())
1439 thread = new_rtx;
1440 INSN_FROM_TARGET_P (next_to_match) = 0;
1442 else
1443 merged_insns = gen_rtx_INSN_LIST (SImode, dtrial,
1444 merged_insns);
1446 if (++slot_number == num_slots)
1447 break;
1449 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1451 else
1453 /* Keep track of the set/referenced resources for the delay
1454 slots of any trial insns we encounter. */
1455 mark_set_resources (dtrial, &set, 0, MARK_SRC_DEST_CALL);
1456 mark_referenced_resources (dtrial, &needed, true);
1461 /* If all insns in the delay slot have been matched and we were previously
1462 annulling the branch, we need not any more. In that case delete all the
1463 merged insns. Also clear the INSN_FROM_TARGET_P bit of each insn in
1464 the delay list so that we know that it isn't only being used at the
1465 target. */
1466 if (slot_number == num_slots && annul_p)
1468 for (; merged_insns; merged_insns = merged_insns->next ())
1470 if (GET_MODE (merged_insns) == SImode)
1472 rtx_insn *new_rtx;
1474 update_block (merged_insns->insn (), thread);
1475 new_rtx = delete_from_delay_slot (merged_insns->insn ());
1476 if (thread->deleted ())
1477 thread = new_rtx;
1479 else
1481 update_block (merged_insns->insn (), thread);
1482 delete_related_insns (merged_insns->insn ());
1486 INSN_ANNULLED_BRANCH_P (delay_insn) = 0;
1488 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1489 INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i)) = 0;
1493 /* See if INSN is redundant with an insn in front of TARGET. Often this
1494 is called when INSN is a candidate for a delay slot of TARGET.
1495 DELAY_LIST are insns that will be placed in delay slots of TARGET in front
1496 of INSN. Often INSN will be redundant with an insn in a delay slot of
1497 some previous insn. This happens when we have a series of branches to the
1498 same label; in that case the first insn at the target might want to go
1499 into each of the delay slots.
1501 If we are not careful, this routine can take up a significant fraction
1502 of the total compilation time (4%), but only wins rarely. Hence we
1503 speed this routine up by making two passes. The first pass goes back
1504 until it hits a label and sees if it finds an insn with an identical
1505 pattern. Only in this (relatively rare) event does it check for
1506 data conflicts.
1508 We do not split insns we encounter. This could cause us not to find a
1509 redundant insn, but the cost of splitting seems greater than the possible
1510 gain in rare cases. */
1512 static rtx
1513 redundant_insn (rtx insn, rtx_insn *target, rtx delay_list)
1515 rtx target_main = target;
1516 rtx ipat = PATTERN (insn);
1517 rtx_insn *trial;
1518 rtx pat;
1519 struct resources needed, set;
1520 int i;
1521 unsigned insns_to_search;
1523 /* If INSN has any REG_UNUSED notes, it can't match anything since we
1524 are allowed to not actually assign to such a register. */
1525 if (find_reg_note (insn, REG_UNUSED, NULL_RTX) != 0)
1526 return 0;
1528 /* Scan backwards looking for a match. */
1529 for (trial = PREV_INSN (target),
1530 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1531 trial && insns_to_search > 0;
1532 trial = PREV_INSN (trial))
1534 /* (use (insn))s can come immediately after a barrier if the
1535 label that used to precede them has been deleted as dead.
1536 See delete_related_insns. */
1537 if (LABEL_P (trial) || BARRIER_P (trial))
1538 return 0;
1540 if (!INSN_P (trial))
1541 continue;
1542 --insns_to_search;
1544 pat = PATTERN (trial);
1545 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1546 continue;
1548 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1550 /* Stop for a CALL and its delay slots because it is difficult to
1551 track its resource needs correctly. */
1552 if (CALL_P (seq->element (0)))
1553 return 0;
1555 /* Stop for an INSN or JUMP_INSN with delayed effects and its delay
1556 slots because it is difficult to track its resource needs
1557 correctly. */
1559 #ifdef INSN_SETS_ARE_DELAYED
1560 if (INSN_SETS_ARE_DELAYED (seq->insn (0)))
1561 return 0;
1562 #endif
1564 #ifdef INSN_REFERENCES_ARE_DELAYED
1565 if (INSN_REFERENCES_ARE_DELAYED (seq->insn (0)))
1566 return 0;
1567 #endif
1569 /* See if any of the insns in the delay slot match, updating
1570 resource requirements as we go. */
1571 for (i = seq->len () - 1; i > 0; i--)
1572 if (GET_CODE (seq->element (i)) == GET_CODE (insn)
1573 && rtx_equal_p (PATTERN (seq->element (i)), ipat)
1574 && ! find_reg_note (seq->element (i), REG_UNUSED, NULL_RTX))
1575 break;
1577 /* If found a match, exit this loop early. */
1578 if (i > 0)
1579 break;
1582 else if (GET_CODE (trial) == GET_CODE (insn) && rtx_equal_p (pat, ipat)
1583 && ! find_reg_note (trial, REG_UNUSED, NULL_RTX))
1584 break;
1587 /* If we didn't find an insn that matches, return 0. */
1588 if (trial == 0)
1589 return 0;
1591 /* See what resources this insn sets and needs. If they overlap, or
1592 if this insn references CC0, it can't be redundant. */
1594 CLEAR_RESOURCE (&needed);
1595 CLEAR_RESOURCE (&set);
1596 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
1597 mark_referenced_resources (insn, &needed, true);
1599 /* If TARGET is a SEQUENCE, get the main insn. */
1600 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1601 target_main = XVECEXP (PATTERN (target), 0, 0);
1603 if (resource_conflicts_p (&needed, &set)
1604 #ifdef HAVE_cc0
1605 || reg_mentioned_p (cc0_rtx, ipat)
1606 #endif
1607 /* The insn requiring the delay may not set anything needed or set by
1608 INSN. */
1609 || insn_sets_resource_p (target_main, &needed, true)
1610 || insn_sets_resource_p (target_main, &set, true))
1611 return 0;
1613 /* Insns we pass may not set either NEEDED or SET, so merge them for
1614 simpler tests. */
1615 needed.memory |= set.memory;
1616 IOR_HARD_REG_SET (needed.regs, set.regs);
1618 /* This insn isn't redundant if it conflicts with an insn that either is
1619 or will be in a delay slot of TARGET. */
1621 while (delay_list)
1623 if (insn_sets_resource_p (XEXP (delay_list, 0), &needed, true))
1624 return 0;
1625 delay_list = XEXP (delay_list, 1);
1628 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1629 for (i = 1; i < XVECLEN (PATTERN (target), 0); i++)
1630 if (insn_sets_resource_p (XVECEXP (PATTERN (target), 0, i), &needed,
1631 true))
1632 return 0;
1634 /* Scan backwards until we reach a label or an insn that uses something
1635 INSN sets or sets something insn uses or sets. */
1637 for (trial = PREV_INSN (target),
1638 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1639 trial && !LABEL_P (trial) && insns_to_search > 0;
1640 trial = PREV_INSN (trial))
1642 if (!INSN_P (trial))
1643 continue;
1644 --insns_to_search;
1646 pat = PATTERN (trial);
1647 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1648 continue;
1650 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1652 bool annul_p = false;
1653 rtx_insn *control = seq->insn (0);
1655 /* If this is a CALL_INSN and its delay slots, it is hard to track
1656 the resource needs properly, so give up. */
1657 if (CALL_P (control))
1658 return 0;
1660 /* If this is an INSN or JUMP_INSN with delayed effects, it
1661 is hard to track the resource needs properly, so give up. */
1663 #ifdef INSN_SETS_ARE_DELAYED
1664 if (INSN_SETS_ARE_DELAYED (control))
1665 return 0;
1666 #endif
1668 #ifdef INSN_REFERENCES_ARE_DELAYED
1669 if (INSN_REFERENCES_ARE_DELAYED (control))
1670 return 0;
1671 #endif
1673 if (JUMP_P (control))
1674 annul_p = INSN_ANNULLED_BRANCH_P (control);
1676 /* See if any of the insns in the delay slot match, updating
1677 resource requirements as we go. */
1678 for (i = seq->len () - 1; i > 0; i--)
1680 rtx candidate = seq->element (i);
1682 /* If an insn will be annulled if the branch is false, it isn't
1683 considered as a possible duplicate insn. */
1684 if (rtx_equal_p (PATTERN (candidate), ipat)
1685 && ! (annul_p && INSN_FROM_TARGET_P (candidate)))
1687 /* Show that this insn will be used in the sequel. */
1688 INSN_FROM_TARGET_P (candidate) = 0;
1689 return candidate;
1692 /* Unless this is an annulled insn from the target of a branch,
1693 we must stop if it sets anything needed or set by INSN. */
1694 if ((!annul_p || !INSN_FROM_TARGET_P (candidate))
1695 && insn_sets_resource_p (candidate, &needed, true))
1696 return 0;
1699 /* If the insn requiring the delay slot conflicts with INSN, we
1700 must stop. */
1701 if (insn_sets_resource_p (control, &needed, true))
1702 return 0;
1704 else
1706 /* See if TRIAL is the same as INSN. */
1707 pat = PATTERN (trial);
1708 if (rtx_equal_p (pat, ipat))
1709 return trial;
1711 /* Can't go any further if TRIAL conflicts with INSN. */
1712 if (insn_sets_resource_p (trial, &needed, true))
1713 return 0;
1717 return 0;
1720 /* Return 1 if THREAD can only be executed in one way. If LABEL is nonzero,
1721 it is the target of the branch insn being scanned. If ALLOW_FALLTHROUGH
1722 is nonzero, we are allowed to fall into this thread; otherwise, we are
1723 not.
1725 If LABEL is used more than one or we pass a label other than LABEL before
1726 finding an active insn, we do not own this thread. */
1728 static int
1729 own_thread_p (rtx thread, rtx label, int allow_fallthrough)
1731 rtx_insn *active_insn;
1732 rtx_insn *insn;
1734 /* We don't own the function end. */
1735 if (thread == 0 || ANY_RETURN_P (thread))
1736 return 0;
1738 /* We have a non-NULL insn. */
1739 rtx_insn *thread_insn = as_a <rtx_insn *> (thread);
1741 /* Get the first active insn, or THREAD_INSN, if it is an active insn. */
1742 active_insn = next_active_insn (PREV_INSN (thread_insn));
1744 for (insn = thread_insn; insn != active_insn; insn = NEXT_INSN (insn))
1745 if (LABEL_P (insn)
1746 && (insn != label || LABEL_NUSES (insn) != 1))
1747 return 0;
1749 if (allow_fallthrough)
1750 return 1;
1752 /* Ensure that we reach a BARRIER before any insn or label. */
1753 for (insn = prev_nonnote_insn (thread_insn);
1754 insn == 0 || !BARRIER_P (insn);
1755 insn = prev_nonnote_insn (insn))
1756 if (insn == 0
1757 || LABEL_P (insn)
1758 || (NONJUMP_INSN_P (insn)
1759 && GET_CODE (PATTERN (insn)) != USE
1760 && GET_CODE (PATTERN (insn)) != CLOBBER))
1761 return 0;
1763 return 1;
1766 /* Called when INSN is being moved from a location near the target of a jump.
1767 We leave a marker of the form (use (INSN)) immediately in front
1768 of WHERE for mark_target_live_regs. These markers will be deleted when
1769 reorg finishes.
1771 We used to try to update the live status of registers if WHERE is at
1772 the start of a basic block, but that can't work since we may remove a
1773 BARRIER in relax_delay_slots. */
1775 static void
1776 update_block (rtx_insn *insn, rtx where)
1778 /* Ignore if this was in a delay slot and it came from the target of
1779 a branch. */
1780 if (INSN_FROM_TARGET_P (insn))
1781 return;
1783 emit_insn_before (gen_rtx_USE (VOIDmode, insn), where);
1785 /* INSN might be making a value live in a block where it didn't use to
1786 be. So recompute liveness information for this block. */
1788 incr_ticks_for_insn (insn);
1791 /* Similar to REDIRECT_JUMP except that we update the BB_TICKS entry for
1792 the basic block containing the jump. */
1794 static int
1795 reorg_redirect_jump (rtx_insn *jump, rtx nlabel)
1797 incr_ticks_for_insn (jump);
1798 return redirect_jump (jump, nlabel, 1);
1801 /* Called when INSN is being moved forward into a delay slot of DELAYED_INSN.
1802 We check every instruction between INSN and DELAYED_INSN for REG_DEAD notes
1803 that reference values used in INSN. If we find one, then we move the
1804 REG_DEAD note to INSN.
1806 This is needed to handle the case where a later insn (after INSN) has a
1807 REG_DEAD note for a register used by INSN, and this later insn subsequently
1808 gets moved before a CODE_LABEL because it is a redundant insn. In this
1809 case, mark_target_live_regs may be confused into thinking the register
1810 is dead because it sees a REG_DEAD note immediately before a CODE_LABEL. */
1812 static void
1813 update_reg_dead_notes (rtx insn, rtx delayed_insn)
1815 rtx p, link, next;
1817 for (p = next_nonnote_insn (insn); p != delayed_insn;
1818 p = next_nonnote_insn (p))
1819 for (link = REG_NOTES (p); link; link = next)
1821 next = XEXP (link, 1);
1823 if (REG_NOTE_KIND (link) != REG_DEAD
1824 || !REG_P (XEXP (link, 0)))
1825 continue;
1827 if (reg_referenced_p (XEXP (link, 0), PATTERN (insn)))
1829 /* Move the REG_DEAD note from P to INSN. */
1830 remove_note (p, link);
1831 XEXP (link, 1) = REG_NOTES (insn);
1832 REG_NOTES (insn) = link;
1837 /* Called when an insn redundant with start_insn is deleted. If there
1838 is a REG_DEAD note for the target of start_insn between start_insn
1839 and stop_insn, then the REG_DEAD note needs to be deleted since the
1840 value no longer dies there.
1842 If the REG_DEAD note isn't deleted, then mark_target_live_regs may be
1843 confused into thinking the register is dead. */
1845 static void
1846 fix_reg_dead_note (rtx start_insn, rtx stop_insn)
1848 rtx p, link, next;
1850 for (p = next_nonnote_insn (start_insn); p != stop_insn;
1851 p = next_nonnote_insn (p))
1852 for (link = REG_NOTES (p); link; link = next)
1854 next = XEXP (link, 1);
1856 if (REG_NOTE_KIND (link) != REG_DEAD
1857 || !REG_P (XEXP (link, 0)))
1858 continue;
1860 if (reg_set_p (XEXP (link, 0), PATTERN (start_insn)))
1862 remove_note (p, link);
1863 return;
1868 /* Delete any REG_UNUSED notes that exist on INSN but not on REDUNDANT_INSN.
1870 This handles the case of udivmodXi4 instructions which optimize their
1871 output depending on whether any REG_UNUSED notes are present.
1872 we must make sure that INSN calculates as many results as REDUNDANT_INSN
1873 does. */
1875 static void
1876 update_reg_unused_notes (rtx insn, rtx redundant_insn)
1878 rtx link, next;
1880 for (link = REG_NOTES (insn); link; link = next)
1882 next = XEXP (link, 1);
1884 if (REG_NOTE_KIND (link) != REG_UNUSED
1885 || !REG_P (XEXP (link, 0)))
1886 continue;
1888 if (! find_regno_note (redundant_insn, REG_UNUSED,
1889 REGNO (XEXP (link, 0))))
1890 remove_note (insn, link);
1894 static vec <rtx> sibling_labels;
1896 /* Return the label before INSN, or put a new label there. If SIBLING is
1897 non-zero, it is another label associated with the new label (if any),
1898 typically the former target of the jump that will be redirected to
1899 the new label. */
1901 static rtx_insn *
1902 get_label_before (rtx_insn *insn, rtx sibling)
1904 rtx_insn *label;
1906 /* Find an existing label at this point
1907 or make a new one if there is none. */
1908 label = prev_nonnote_insn (insn);
1910 if (label == 0 || !LABEL_P (label))
1912 rtx_insn *prev = PREV_INSN (insn);
1914 label = gen_label_rtx ();
1915 emit_label_after (label, prev);
1916 LABEL_NUSES (label) = 0;
1917 if (sibling)
1919 sibling_labels.safe_push (label);
1920 sibling_labels.safe_push (sibling);
1923 return label;
1926 /* Scan a function looking for insns that need a delay slot and find insns to
1927 put into the delay slot.
1929 NON_JUMPS_P is nonzero if we are to only try to fill non-jump insns (such
1930 as calls). We do these first since we don't want jump insns (that are
1931 easier to fill) to get the only insns that could be used for non-jump insns.
1932 When it is zero, only try to fill JUMP_INSNs.
1934 When slots are filled in this manner, the insns (including the
1935 delay_insn) are put together in a SEQUENCE rtx. In this fashion,
1936 it is possible to tell whether a delay slot has really been filled
1937 or not. `final' knows how to deal with this, by communicating
1938 through FINAL_SEQUENCE. */
1940 static void
1941 fill_simple_delay_slots (int non_jumps_p)
1943 rtx_insn *insn, *trial, *next_trial;
1944 rtx pat;
1945 int i;
1946 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
1947 struct resources needed, set;
1948 int slots_to_fill, slots_filled;
1949 rtx_insn_list *delay_list;
1951 for (i = 0; i < num_unfilled_slots; i++)
1953 int flags;
1954 /* Get the next insn to fill. If it has already had any slots assigned,
1955 we can't do anything with it. Maybe we'll improve this later. */
1957 insn = unfilled_slots_base[i];
1958 if (insn == 0
1959 || insn->deleted ()
1960 || (NONJUMP_INSN_P (insn)
1961 && GET_CODE (PATTERN (insn)) == SEQUENCE)
1962 || (JUMP_P (insn) && non_jumps_p)
1963 || (!JUMP_P (insn) && ! non_jumps_p))
1964 continue;
1966 /* It may have been that this insn used to need delay slots, but
1967 now doesn't; ignore in that case. This can happen, for example,
1968 on the HP PA RISC, where the number of delay slots depends on
1969 what insns are nearby. */
1970 slots_to_fill = num_delay_slots (insn);
1972 /* Some machine description have defined instructions to have
1973 delay slots only in certain circumstances which may depend on
1974 nearby insns (which change due to reorg's actions).
1976 For example, the PA port normally has delay slots for unconditional
1977 jumps.
1979 However, the PA port claims such jumps do not have a delay slot
1980 if they are immediate successors of certain CALL_INSNs. This
1981 allows the port to favor filling the delay slot of the call with
1982 the unconditional jump. */
1983 if (slots_to_fill == 0)
1984 continue;
1986 /* This insn needs, or can use, some delay slots. SLOTS_TO_FILL
1987 says how many. After initialization, first try optimizing
1989 call _foo call _foo
1990 nop add %o7,.-L1,%o7
1991 b,a L1
1994 If this case applies, the delay slot of the call is filled with
1995 the unconditional jump. This is done first to avoid having the
1996 delay slot of the call filled in the backward scan. Also, since
1997 the unconditional jump is likely to also have a delay slot, that
1998 insn must exist when it is subsequently scanned.
2000 This is tried on each insn with delay slots as some machines
2001 have insns which perform calls, but are not represented as
2002 CALL_INSNs. */
2004 slots_filled = 0;
2005 delay_list = 0;
2007 if (JUMP_P (insn))
2008 flags = get_jump_flags (insn, JUMP_LABEL (insn));
2009 else
2010 flags = get_jump_flags (insn, NULL_RTX);
2012 if ((trial = next_active_insn (insn))
2013 && JUMP_P (trial)
2014 && simplejump_p (trial)
2015 && eligible_for_delay (insn, slots_filled, trial, flags)
2016 && no_labels_between_p (insn, trial)
2017 && ! can_throw_internal (trial))
2019 rtx_insn **tmp;
2020 slots_filled++;
2021 delay_list = add_to_delay_list (trial, delay_list);
2023 /* TRIAL may have had its delay slot filled, then unfilled. When
2024 the delay slot is unfilled, TRIAL is placed back on the unfilled
2025 slots obstack. Unfortunately, it is placed on the end of the
2026 obstack, not in its original location. Therefore, we must search
2027 from entry i + 1 to the end of the unfilled slots obstack to
2028 try and find TRIAL. */
2029 tmp = &unfilled_slots_base[i + 1];
2030 while (*tmp != trial && tmp != unfilled_slots_next)
2031 tmp++;
2033 /* Remove the unconditional jump from consideration for delay slot
2034 filling and unthread it. */
2035 if (*tmp == trial)
2036 *tmp = 0;
2038 rtx_insn *next = NEXT_INSN (trial);
2039 rtx_insn *prev = PREV_INSN (trial);
2040 if (prev)
2041 SET_NEXT_INSN (prev) = next;
2042 if (next)
2043 SET_PREV_INSN (next) = prev;
2047 /* Now, scan backwards from the insn to search for a potential
2048 delay-slot candidate. Stop searching when a label or jump is hit.
2050 For each candidate, if it is to go into the delay slot (moved
2051 forward in execution sequence), it must not need or set any resources
2052 that were set by later insns and must not set any resources that
2053 are needed for those insns.
2055 The delay slot insn itself sets resources unless it is a call
2056 (in which case the called routine, not the insn itself, is doing
2057 the setting). */
2059 if (slots_filled < slots_to_fill)
2061 CLEAR_RESOURCE (&needed);
2062 CLEAR_RESOURCE (&set);
2063 mark_set_resources (insn, &set, 0, MARK_SRC_DEST);
2064 mark_referenced_resources (insn, &needed, false);
2066 for (trial = prev_nonnote_insn (insn); ! stop_search_p (trial, 1);
2067 trial = next_trial)
2069 next_trial = prev_nonnote_insn (trial);
2071 /* This must be an INSN or CALL_INSN. */
2072 pat = PATTERN (trial);
2074 /* Stand-alone USE and CLOBBER are just for flow. */
2075 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2076 continue;
2078 /* Check for resource conflict first, to avoid unnecessary
2079 splitting. */
2080 if (! insn_references_resource_p (trial, &set, true)
2081 && ! insn_sets_resource_p (trial, &set, true)
2082 && ! insn_sets_resource_p (trial, &needed, true)
2083 #ifdef HAVE_cc0
2084 /* Can't separate set of cc0 from its use. */
2085 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat))
2086 #endif
2087 && ! can_throw_internal (trial))
2089 trial = try_split (pat, trial, 1);
2090 next_trial = prev_nonnote_insn (trial);
2091 if (eligible_for_delay (insn, slots_filled, trial, flags))
2093 /* In this case, we are searching backward, so if we
2094 find insns to put on the delay list, we want
2095 to put them at the head, rather than the
2096 tail, of the list. */
2098 update_reg_dead_notes (trial, insn);
2099 delay_list = gen_rtx_INSN_LIST (VOIDmode,
2100 trial, delay_list);
2101 update_block (trial, trial);
2102 delete_related_insns (trial);
2103 if (slots_to_fill == ++slots_filled)
2104 break;
2105 continue;
2109 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2110 mark_referenced_resources (trial, &needed, true);
2114 /* If all needed slots haven't been filled, we come here. */
2116 /* Try to optimize case of jumping around a single insn. */
2117 #if defined(ANNUL_IFFALSE_SLOTS) || defined(ANNUL_IFTRUE_SLOTS)
2118 if (slots_filled != slots_to_fill
2119 && delay_list == 0
2120 && JUMP_P (insn)
2121 && (condjump_p (insn) || condjump_in_parallel_p (insn))
2122 && !ANY_RETURN_P (JUMP_LABEL (insn)))
2124 delay_list = optimize_skip (insn);
2125 if (delay_list)
2126 slots_filled += 1;
2128 #endif
2130 /* Try to get insns from beyond the insn needing the delay slot.
2131 These insns can neither set or reference resources set in insns being
2132 skipped, cannot set resources in the insn being skipped, and, if this
2133 is a CALL_INSN (or a CALL_INSN is passed), cannot trap (because the
2134 call might not return).
2136 There used to be code which continued past the target label if
2137 we saw all uses of the target label. This code did not work,
2138 because it failed to account for some instructions which were
2139 both annulled and marked as from the target. This can happen as a
2140 result of optimize_skip. Since this code was redundant with
2141 fill_eager_delay_slots anyways, it was just deleted. */
2143 if (slots_filled != slots_to_fill
2144 /* If this instruction could throw an exception which is
2145 caught in the same function, then it's not safe to fill
2146 the delay slot with an instruction from beyond this
2147 point. For example, consider:
2149 int i = 2;
2151 try {
2152 f();
2153 i = 3;
2154 } catch (...) {}
2156 return i;
2158 Even though `i' is a local variable, we must be sure not
2159 to put `i = 3' in the delay slot if `f' might throw an
2160 exception.
2162 Presumably, we should also check to see if we could get
2163 back to this function via `setjmp'. */
2164 && ! can_throw_internal (insn)
2165 && !JUMP_P (insn))
2167 int maybe_never = 0;
2168 rtx pat, trial_delay;
2170 CLEAR_RESOURCE (&needed);
2171 CLEAR_RESOURCE (&set);
2172 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
2173 mark_referenced_resources (insn, &needed, true);
2175 if (CALL_P (insn))
2176 maybe_never = 1;
2178 for (trial = next_nonnote_insn (insn); !stop_search_p (trial, 1);
2179 trial = next_trial)
2181 next_trial = next_nonnote_insn (trial);
2183 /* This must be an INSN or CALL_INSN. */
2184 pat = PATTERN (trial);
2186 /* Stand-alone USE and CLOBBER are just for flow. */
2187 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2188 continue;
2190 /* If this already has filled delay slots, get the insn needing
2191 the delay slots. */
2192 if (GET_CODE (pat) == SEQUENCE)
2193 trial_delay = XVECEXP (pat, 0, 0);
2194 else
2195 trial_delay = trial;
2197 /* Stop our search when seeing a jump. */
2198 if (JUMP_P (trial_delay))
2199 break;
2201 /* See if we have a resource problem before we try to split. */
2202 if (GET_CODE (pat) != SEQUENCE
2203 && ! insn_references_resource_p (trial, &set, true)
2204 && ! insn_sets_resource_p (trial, &set, true)
2205 && ! insn_sets_resource_p (trial, &needed, true)
2206 #ifdef HAVE_cc0
2207 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat))
2208 #endif
2209 && ! (maybe_never && may_trap_or_fault_p (pat))
2210 && (trial = try_split (pat, trial, 0))
2211 && eligible_for_delay (insn, slots_filled, trial, flags)
2212 && ! can_throw_internal (trial))
2214 next_trial = next_nonnote_insn (trial);
2215 delay_list = add_to_delay_list (trial, delay_list);
2216 #ifdef HAVE_cc0
2217 if (reg_mentioned_p (cc0_rtx, pat))
2218 link_cc0_insns (trial);
2219 #endif
2220 delete_related_insns (trial);
2221 if (slots_to_fill == ++slots_filled)
2222 break;
2223 continue;
2226 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2227 mark_referenced_resources (trial, &needed, true);
2229 /* Ensure we don't put insns between the setting of cc and the
2230 comparison by moving a setting of cc into an earlier delay
2231 slot since these insns could clobber the condition code. */
2232 set.cc = 1;
2234 /* If this is a call, we might not get here. */
2235 if (CALL_P (trial_delay))
2236 maybe_never = 1;
2239 /* If there are slots left to fill and our search was stopped by an
2240 unconditional branch, try the insn at the branch target. We can
2241 redirect the branch if it works.
2243 Don't do this if the insn at the branch target is a branch. */
2244 if (slots_to_fill != slots_filled
2245 && trial
2246 && jump_to_label_p (trial)
2247 && simplejump_p (trial)
2248 && (next_trial = next_active_insn (JUMP_LABEL (trial))) != 0
2249 && ! (NONJUMP_INSN_P (next_trial)
2250 && GET_CODE (PATTERN (next_trial)) == SEQUENCE)
2251 && !JUMP_P (next_trial)
2252 && ! insn_references_resource_p (next_trial, &set, true)
2253 && ! insn_sets_resource_p (next_trial, &set, true)
2254 && ! insn_sets_resource_p (next_trial, &needed, true)
2255 #ifdef HAVE_cc0
2256 && ! reg_mentioned_p (cc0_rtx, PATTERN (next_trial))
2257 #endif
2258 && ! (maybe_never && may_trap_or_fault_p (PATTERN (next_trial)))
2259 && (next_trial = try_split (PATTERN (next_trial), next_trial, 0))
2260 && eligible_for_delay (insn, slots_filled, next_trial, flags)
2261 && ! can_throw_internal (trial))
2263 /* See comment in relax_delay_slots about necessity of using
2264 next_real_insn here. */
2265 rtx_insn *new_label = next_real_insn (next_trial);
2267 if (new_label != 0)
2268 new_label = get_label_before (new_label, JUMP_LABEL (trial));
2269 else
2270 new_label = find_end_label (simple_return_rtx);
2272 if (new_label)
2274 delay_list
2275 = add_to_delay_list (copy_delay_slot_insn (next_trial),
2276 delay_list);
2277 slots_filled++;
2278 reorg_redirect_jump (trial, new_label);
2283 /* If this is an unconditional jump, then try to get insns from the
2284 target of the jump. */
2285 if (JUMP_P (insn)
2286 && simplejump_p (insn)
2287 && slots_filled != slots_to_fill)
2288 delay_list
2289 = fill_slots_from_thread (insn, const_true_rtx,
2290 next_active_insn (JUMP_LABEL (insn)),
2291 NULL, 1, 1,
2292 own_thread_p (JUMP_LABEL (insn),
2293 JUMP_LABEL (insn), 0),
2294 slots_to_fill, &slots_filled,
2295 delay_list);
2297 if (delay_list)
2298 unfilled_slots_base[i]
2299 = emit_delay_sequence (insn, delay_list, slots_filled);
2301 if (slots_to_fill == slots_filled)
2302 unfilled_slots_base[i] = 0;
2304 note_delay_statistics (slots_filled, 0);
2308 /* Follow any unconditional jump at LABEL, for the purpose of redirecting JUMP;
2309 return the ultimate label reached by any such chain of jumps.
2310 Return a suitable return rtx if the chain ultimately leads to a
2311 return instruction.
2312 If LABEL is not followed by a jump, return LABEL.
2313 If the chain loops or we can't find end, return LABEL,
2314 since that tells caller to avoid changing the insn.
2315 If the returned label is obtained by following a crossing jump,
2316 set *CROSSING to true, otherwise set it to false. */
2318 static rtx
2319 follow_jumps (rtx label, rtx_insn *jump, bool *crossing)
2321 rtx_insn *insn;
2322 rtx_insn *next;
2323 int depth;
2325 *crossing = false;
2326 if (ANY_RETURN_P (label))
2327 return label;
2329 rtx_insn *value = as_a <rtx_insn *> (label);
2331 for (depth = 0;
2332 (depth < 10
2333 && (insn = next_active_insn (value)) != 0
2334 && JUMP_P (insn)
2335 && JUMP_LABEL (insn) != NULL_RTX
2336 && ((any_uncondjump_p (insn) && onlyjump_p (insn))
2337 || ANY_RETURN_P (PATTERN (insn)))
2338 && (next = NEXT_INSN (insn))
2339 && BARRIER_P (next));
2340 depth++)
2342 rtx this_label_or_return = JUMP_LABEL (insn);
2344 /* If we have found a cycle, make the insn jump to itself. */
2345 if (this_label_or_return == label)
2346 return label;
2348 /* Cannot follow returns and cannot look through tablejumps. */
2349 if (ANY_RETURN_P (this_label_or_return))
2350 return this_label_or_return;
2352 rtx_insn *this_label = as_a <rtx_insn *> (this_label_or_return);
2353 if (NEXT_INSN (this_label)
2354 && JUMP_TABLE_DATA_P (NEXT_INSN (this_label)))
2355 break;
2357 if (!targetm.can_follow_jump (jump, insn))
2358 break;
2359 if (!*crossing)
2360 *crossing = CROSSING_JUMP_P (jump);
2361 value = this_label;
2363 if (depth == 10)
2364 return label;
2365 return value;
2368 /* Try to find insns to place in delay slots.
2370 INSN is the jump needing SLOTS_TO_FILL delay slots. It tests CONDITION
2371 or is an unconditional branch if CONDITION is const_true_rtx.
2372 *PSLOTS_FILLED is updated with the number of slots that we have filled.
2374 THREAD is a flow-of-control, either the insns to be executed if the
2375 branch is true or if the branch is false, THREAD_IF_TRUE says which.
2377 OPPOSITE_THREAD is the thread in the opposite direction. It is used
2378 to see if any potential delay slot insns set things needed there.
2380 LIKELY is nonzero if it is extremely likely that the branch will be
2381 taken and THREAD_IF_TRUE is set. This is used for the branch at the
2382 end of a loop back up to the top.
2384 OWN_THREAD and OWN_OPPOSITE_THREAD are true if we are the only user of the
2385 thread. I.e., it is the fallthrough code of our jump or the target of the
2386 jump when we are the only jump going there.
2388 If OWN_THREAD is false, it must be the "true" thread of a jump. In that
2389 case, we can only take insns from the head of the thread for our delay
2390 slot. We then adjust the jump to point after the insns we have taken. */
2392 static rtx_insn_list *
2393 fill_slots_from_thread (rtx_insn *insn, rtx condition, rtx thread_or_return,
2394 rtx opposite_thread, int likely,
2395 int thread_if_true,
2396 int own_thread, int slots_to_fill,
2397 int *pslots_filled, rtx_insn_list *delay_list)
2399 rtx new_thread;
2400 struct resources opposite_needed, set, needed;
2401 rtx_insn *trial;
2402 int lose = 0;
2403 int must_annul = 0;
2404 int flags;
2406 /* Validate our arguments. */
2407 gcc_assert (condition != const_true_rtx || thread_if_true);
2408 gcc_assert (own_thread || thread_if_true);
2410 flags = get_jump_flags (insn, JUMP_LABEL (insn));
2412 /* If our thread is the end of subroutine, we can't get any delay
2413 insns from that. */
2414 if (thread_or_return == NULL_RTX || ANY_RETURN_P (thread_or_return))
2415 return delay_list;
2417 rtx_insn *thread = as_a <rtx_insn *> (thread_or_return);
2419 /* If this is an unconditional branch, nothing is needed at the
2420 opposite thread. Otherwise, compute what is needed there. */
2421 if (condition == const_true_rtx)
2422 CLEAR_RESOURCE (&opposite_needed);
2423 else
2424 mark_target_live_regs (get_insns (), opposite_thread, &opposite_needed);
2426 /* If the insn at THREAD can be split, do it here to avoid having to
2427 update THREAD and NEW_THREAD if it is done in the loop below. Also
2428 initialize NEW_THREAD. */
2430 new_thread = thread = try_split (PATTERN (thread), thread, 0);
2432 /* Scan insns at THREAD. We are looking for an insn that can be removed
2433 from THREAD (it neither sets nor references resources that were set
2434 ahead of it and it doesn't set anything needs by the insns ahead of
2435 it) and that either can be placed in an annulling insn or aren't
2436 needed at OPPOSITE_THREAD. */
2438 CLEAR_RESOURCE (&needed);
2439 CLEAR_RESOURCE (&set);
2441 /* If we do not own this thread, we must stop as soon as we find
2442 something that we can't put in a delay slot, since all we can do
2443 is branch into THREAD at a later point. Therefore, labels stop
2444 the search if this is not the `true' thread. */
2446 for (trial = thread;
2447 ! stop_search_p (trial, ! thread_if_true) && (! lose || own_thread);
2448 trial = next_nonnote_insn (trial))
2450 rtx pat, old_trial;
2452 /* If we have passed a label, we no longer own this thread. */
2453 if (LABEL_P (trial))
2455 own_thread = 0;
2456 continue;
2459 pat = PATTERN (trial);
2460 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2461 continue;
2463 /* If TRIAL conflicts with the insns ahead of it, we lose. Also,
2464 don't separate or copy insns that set and use CC0. */
2465 if (! insn_references_resource_p (trial, &set, true)
2466 && ! insn_sets_resource_p (trial, &set, true)
2467 && ! insn_sets_resource_p (trial, &needed, true)
2468 #ifdef HAVE_cc0
2469 && ! (reg_mentioned_p (cc0_rtx, pat)
2470 && (! own_thread || ! sets_cc0_p (pat)))
2471 #endif
2472 && ! can_throw_internal (trial))
2474 rtx prior_insn;
2476 /* If TRIAL is redundant with some insn before INSN, we don't
2477 actually need to add it to the delay list; we can merely pretend
2478 we did. */
2479 if ((prior_insn = redundant_insn (trial, insn, delay_list)))
2481 fix_reg_dead_note (prior_insn, insn);
2482 if (own_thread)
2484 update_block (trial, thread);
2485 if (trial == thread)
2487 thread = next_active_insn (thread);
2488 if (new_thread == trial)
2489 new_thread = thread;
2492 delete_related_insns (trial);
2494 else
2496 update_reg_unused_notes (prior_insn, trial);
2497 new_thread = next_active_insn (trial);
2500 continue;
2503 /* There are two ways we can win: If TRIAL doesn't set anything
2504 needed at the opposite thread and can't trap, or if it can
2505 go into an annulled delay slot. But we want neither to copy
2506 nor to speculate frame-related insns. */
2507 if (!must_annul
2508 && ((condition == const_true_rtx
2509 && (own_thread || !RTX_FRAME_RELATED_P (trial)))
2510 || (! insn_sets_resource_p (trial, &opposite_needed, true)
2511 && ! may_trap_or_fault_p (pat)
2512 && ! RTX_FRAME_RELATED_P (trial))))
2514 old_trial = trial;
2515 trial = try_split (pat, trial, 0);
2516 if (new_thread == old_trial)
2517 new_thread = trial;
2518 if (thread == old_trial)
2519 thread = trial;
2520 pat = PATTERN (trial);
2521 if (eligible_for_delay (insn, *pslots_filled, trial, flags))
2522 goto winner;
2524 else if (0
2525 #ifdef ANNUL_IFTRUE_SLOTS
2526 || ! thread_if_true
2527 #endif
2528 #ifdef ANNUL_IFFALSE_SLOTS
2529 || thread_if_true
2530 #endif
2533 old_trial = trial;
2534 trial = try_split (pat, trial, 0);
2535 if (new_thread == old_trial)
2536 new_thread = trial;
2537 if (thread == old_trial)
2538 thread = trial;
2539 pat = PATTERN (trial);
2540 if ((must_annul || delay_list == NULL) && (thread_if_true
2541 ? check_annul_list_true_false (0, delay_list)
2542 && eligible_for_annul_false (insn, *pslots_filled, trial, flags)
2543 : check_annul_list_true_false (1, delay_list)
2544 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
2546 rtx_insn *temp;
2548 must_annul = 1;
2549 winner:
2551 #ifdef HAVE_cc0
2552 if (reg_mentioned_p (cc0_rtx, pat))
2553 link_cc0_insns (trial);
2554 #endif
2556 /* If we own this thread, delete the insn. If this is the
2557 destination of a branch, show that a basic block status
2558 may have been updated. In any case, mark the new
2559 starting point of this thread. */
2560 if (own_thread)
2562 rtx note;
2564 update_block (trial, thread);
2565 if (trial == thread)
2567 thread = next_active_insn (thread);
2568 if (new_thread == trial)
2569 new_thread = thread;
2572 /* We are moving this insn, not deleting it. We must
2573 temporarily increment the use count on any referenced
2574 label lest it be deleted by delete_related_insns. */
2575 for (note = REG_NOTES (trial);
2576 note != NULL_RTX;
2577 note = XEXP (note, 1))
2578 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2579 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2581 /* REG_LABEL_OPERAND could be
2582 NOTE_INSN_DELETED_LABEL too. */
2583 if (LABEL_P (XEXP (note, 0)))
2584 LABEL_NUSES (XEXP (note, 0))++;
2585 else
2586 gcc_assert (REG_NOTE_KIND (note)
2587 == REG_LABEL_OPERAND);
2589 if (jump_to_label_p (trial))
2590 LABEL_NUSES (JUMP_LABEL (trial))++;
2592 delete_related_insns (trial);
2594 for (note = REG_NOTES (trial);
2595 note != NULL_RTX;
2596 note = XEXP (note, 1))
2597 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2598 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2600 /* REG_LABEL_OPERAND could be
2601 NOTE_INSN_DELETED_LABEL too. */
2602 if (LABEL_P (XEXP (note, 0)))
2603 LABEL_NUSES (XEXP (note, 0))--;
2604 else
2605 gcc_assert (REG_NOTE_KIND (note)
2606 == REG_LABEL_OPERAND);
2608 if (jump_to_label_p (trial))
2609 LABEL_NUSES (JUMP_LABEL (trial))--;
2611 else
2612 new_thread = next_active_insn (trial);
2614 temp = own_thread ? trial : copy_delay_slot_insn (trial);
2615 if (thread_if_true)
2616 INSN_FROM_TARGET_P (temp) = 1;
2618 delay_list = add_to_delay_list (temp, delay_list);
2620 if (slots_to_fill == ++(*pslots_filled))
2622 /* Even though we have filled all the slots, we
2623 may be branching to a location that has a
2624 redundant insn. Skip any if so. */
2625 while (new_thread && ! own_thread
2626 && ! insn_sets_resource_p (new_thread, &set, true)
2627 && ! insn_sets_resource_p (new_thread, &needed,
2628 true)
2629 && ! insn_references_resource_p (new_thread,
2630 &set, true)
2631 && (prior_insn
2632 = redundant_insn (new_thread, insn,
2633 delay_list)))
2635 /* We know we do not own the thread, so no need
2636 to call update_block and delete_insn. */
2637 fix_reg_dead_note (prior_insn, insn);
2638 update_reg_unused_notes (prior_insn, new_thread);
2639 new_thread = next_active_insn (new_thread);
2641 break;
2644 continue;
2649 /* This insn can't go into a delay slot. */
2650 lose = 1;
2651 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2652 mark_referenced_resources (trial, &needed, true);
2654 /* Ensure we don't put insns between the setting of cc and the comparison
2655 by moving a setting of cc into an earlier delay slot since these insns
2656 could clobber the condition code. */
2657 set.cc = 1;
2659 /* If this insn is a register-register copy and the next insn has
2660 a use of our destination, change it to use our source. That way,
2661 it will become a candidate for our delay slot the next time
2662 through this loop. This case occurs commonly in loops that
2663 scan a list.
2665 We could check for more complex cases than those tested below,
2666 but it doesn't seem worth it. It might also be a good idea to try
2667 to swap the two insns. That might do better.
2669 We can't do this if the next insn modifies our destination, because
2670 that would make the replacement into the insn invalid. We also can't
2671 do this if it modifies our source, because it might be an earlyclobber
2672 operand. This latter test also prevents updating the contents of
2673 a PRE_INC. We also can't do this if there's overlap of source and
2674 destination. Overlap may happen for larger-than-register-size modes. */
2676 if (NONJUMP_INSN_P (trial) && GET_CODE (pat) == SET
2677 && REG_P (SET_SRC (pat))
2678 && REG_P (SET_DEST (pat))
2679 && !reg_overlap_mentioned_p (SET_DEST (pat), SET_SRC (pat)))
2681 rtx next = next_nonnote_insn (trial);
2683 if (next && NONJUMP_INSN_P (next)
2684 && GET_CODE (PATTERN (next)) != USE
2685 && ! reg_set_p (SET_DEST (pat), next)
2686 && ! reg_set_p (SET_SRC (pat), next)
2687 && reg_referenced_p (SET_DEST (pat), PATTERN (next))
2688 && ! modified_in_p (SET_DEST (pat), next))
2689 validate_replace_rtx (SET_DEST (pat), SET_SRC (pat), next);
2693 /* If we stopped on a branch insn that has delay slots, see if we can
2694 steal some of the insns in those slots. */
2695 if (trial && NONJUMP_INSN_P (trial)
2696 && GET_CODE (PATTERN (trial)) == SEQUENCE
2697 && JUMP_P (XVECEXP (PATTERN (trial), 0, 0)))
2699 rtx_sequence *sequence = as_a <rtx_sequence *> (PATTERN (trial));
2700 /* If this is the `true' thread, we will want to follow the jump,
2701 so we can only do this if we have taken everything up to here. */
2702 if (thread_if_true && trial == new_thread)
2704 delay_list
2705 = steal_delay_list_from_target (insn, condition, sequence,
2706 delay_list, &set, &needed,
2707 &opposite_needed, slots_to_fill,
2708 pslots_filled, &must_annul,
2709 &new_thread);
2710 /* If we owned the thread and are told that it branched
2711 elsewhere, make sure we own the thread at the new location. */
2712 if (own_thread && trial != new_thread)
2713 own_thread = own_thread_p (new_thread, new_thread, 0);
2715 else if (! thread_if_true)
2716 delay_list
2717 = steal_delay_list_from_fallthrough (insn, condition,
2718 sequence,
2719 delay_list, &set, &needed,
2720 &opposite_needed, slots_to_fill,
2721 pslots_filled, &must_annul);
2724 /* If we haven't found anything for this delay slot and it is very
2725 likely that the branch will be taken, see if the insn at our target
2726 increments or decrements a register with an increment that does not
2727 depend on the destination register. If so, try to place the opposite
2728 arithmetic insn after the jump insn and put the arithmetic insn in the
2729 delay slot. If we can't do this, return. */
2730 if (delay_list == 0 && likely
2731 && new_thread && !ANY_RETURN_P (new_thread)
2732 && NONJUMP_INSN_P (new_thread)
2733 && !RTX_FRAME_RELATED_P (new_thread)
2734 && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
2735 && asm_noperands (PATTERN (new_thread)) < 0)
2737 rtx pat = PATTERN (new_thread);
2738 rtx dest;
2739 rtx src;
2741 /* We know "new_thread" is an insn due to NONJUMP_INSN_P (new_thread)
2742 above. */
2743 trial = as_a <rtx_insn *> (new_thread);
2744 pat = PATTERN (trial);
2746 if (!NONJUMP_INSN_P (trial)
2747 || GET_CODE (pat) != SET
2748 || ! eligible_for_delay (insn, 0, trial, flags)
2749 || can_throw_internal (trial))
2750 return 0;
2752 dest = SET_DEST (pat), src = SET_SRC (pat);
2753 if ((GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
2754 && rtx_equal_p (XEXP (src, 0), dest)
2755 && (!FLOAT_MODE_P (GET_MODE (src))
2756 || flag_unsafe_math_optimizations)
2757 && ! reg_overlap_mentioned_p (dest, XEXP (src, 1))
2758 && ! side_effects_p (pat))
2760 rtx other = XEXP (src, 1);
2761 rtx new_arith;
2762 rtx_insn *ninsn;
2764 /* If this is a constant adjustment, use the same code with
2765 the negated constant. Otherwise, reverse the sense of the
2766 arithmetic. */
2767 if (CONST_INT_P (other))
2768 new_arith = gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src), dest,
2769 negate_rtx (GET_MODE (src), other));
2770 else
2771 new_arith = gen_rtx_fmt_ee (GET_CODE (src) == PLUS ? MINUS : PLUS,
2772 GET_MODE (src), dest, other);
2774 ninsn = emit_insn_after (gen_rtx_SET (VOIDmode, dest, new_arith),
2775 insn);
2777 if (recog_memoized (ninsn) < 0
2778 || (extract_insn (ninsn),
2779 !constrain_operands (1, get_preferred_alternatives (ninsn))))
2781 delete_related_insns (ninsn);
2782 return 0;
2785 if (own_thread)
2787 update_block (trial, thread);
2788 if (trial == thread)
2790 thread = next_active_insn (thread);
2791 if (new_thread == trial)
2792 new_thread = thread;
2794 delete_related_insns (trial);
2796 else
2797 new_thread = next_active_insn (trial);
2799 ninsn = own_thread ? trial : copy_delay_slot_insn (trial);
2800 if (thread_if_true)
2801 INSN_FROM_TARGET_P (ninsn) = 1;
2803 delay_list = add_to_delay_list (ninsn, NULL);
2804 (*pslots_filled)++;
2808 if (delay_list && must_annul)
2809 INSN_ANNULLED_BRANCH_P (insn) = 1;
2811 /* If we are to branch into the middle of this thread, find an appropriate
2812 label or make a new one if none, and redirect INSN to it. If we hit the
2813 end of the function, use the end-of-function label. */
2814 if (new_thread != thread)
2816 rtx label;
2817 bool crossing = false;
2819 gcc_assert (thread_if_true);
2821 if (new_thread && simplejump_or_return_p (new_thread)
2822 && redirect_with_delay_list_safe_p (insn,
2823 JUMP_LABEL (new_thread),
2824 delay_list))
2825 new_thread = follow_jumps (JUMP_LABEL (new_thread), insn,
2826 &crossing);
2828 if (ANY_RETURN_P (new_thread))
2829 label = find_end_label (new_thread);
2830 else if (LABEL_P (new_thread))
2831 label = new_thread;
2832 else
2833 label = get_label_before (as_a <rtx_insn *> (new_thread),
2834 JUMP_LABEL (insn));
2836 if (label)
2838 reorg_redirect_jump (insn, label);
2839 if (crossing)
2840 CROSSING_JUMP_P (insn) = 1;
2844 return delay_list;
2847 /* Make another attempt to find insns to place in delay slots.
2849 We previously looked for insns located in front of the delay insn
2850 and, for non-jump delay insns, located behind the delay insn.
2852 Here only try to schedule jump insns and try to move insns from either
2853 the target or the following insns into the delay slot. If annulling is
2854 supported, we will be likely to do this. Otherwise, we can do this only
2855 if safe. */
2857 static void
2858 fill_eager_delay_slots (void)
2860 rtx_insn *insn;
2861 int i;
2862 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
2864 for (i = 0; i < num_unfilled_slots; i++)
2866 rtx condition;
2867 rtx target_label, insn_at_target;
2868 rtx_insn *fallthrough_insn;
2869 rtx_insn_list *delay_list = 0;
2870 int own_target;
2871 int own_fallthrough;
2872 int prediction, slots_to_fill, slots_filled;
2874 insn = unfilled_slots_base[i];
2875 if (insn == 0
2876 || insn->deleted ()
2877 || !JUMP_P (insn)
2878 || ! (condjump_p (insn) || condjump_in_parallel_p (insn)))
2879 continue;
2881 slots_to_fill = num_delay_slots (insn);
2882 /* Some machine description have defined instructions to have
2883 delay slots only in certain circumstances which may depend on
2884 nearby insns (which change due to reorg's actions).
2886 For example, the PA port normally has delay slots for unconditional
2887 jumps.
2889 However, the PA port claims such jumps do not have a delay slot
2890 if they are immediate successors of certain CALL_INSNs. This
2891 allows the port to favor filling the delay slot of the call with
2892 the unconditional jump. */
2893 if (slots_to_fill == 0)
2894 continue;
2896 slots_filled = 0;
2897 target_label = JUMP_LABEL (insn);
2898 condition = get_branch_condition (insn, target_label);
2900 if (condition == 0)
2901 continue;
2903 /* Get the next active fallthrough and target insns and see if we own
2904 them. Then see whether the branch is likely true. We don't need
2905 to do a lot of this for unconditional branches. */
2907 insn_at_target = first_active_target_insn (target_label);
2908 own_target = own_thread_p (target_label, target_label, 0);
2910 if (condition == const_true_rtx)
2912 own_fallthrough = 0;
2913 fallthrough_insn = 0;
2914 prediction = 2;
2916 else
2918 fallthrough_insn = next_active_insn (insn);
2919 own_fallthrough = own_thread_p (NEXT_INSN (insn), NULL_RTX, 1);
2920 prediction = mostly_true_jump (insn);
2923 /* If this insn is expected to branch, first try to get insns from our
2924 target, then our fallthrough insns. If it is not expected to branch,
2925 try the other order. */
2927 if (prediction > 0)
2929 delay_list
2930 = fill_slots_from_thread (insn, condition, insn_at_target,
2931 fallthrough_insn, prediction == 2, 1,
2932 own_target,
2933 slots_to_fill, &slots_filled, delay_list);
2935 if (delay_list == 0 && own_fallthrough)
2937 /* Even though we didn't find anything for delay slots,
2938 we might have found a redundant insn which we deleted
2939 from the thread that was filled. So we have to recompute
2940 the next insn at the target. */
2941 target_label = JUMP_LABEL (insn);
2942 insn_at_target = first_active_target_insn (target_label);
2944 delay_list
2945 = fill_slots_from_thread (insn, condition, fallthrough_insn,
2946 insn_at_target, 0, 0,
2947 own_fallthrough,
2948 slots_to_fill, &slots_filled,
2949 delay_list);
2952 else
2954 if (own_fallthrough)
2955 delay_list
2956 = fill_slots_from_thread (insn, condition, fallthrough_insn,
2957 insn_at_target, 0, 0,
2958 own_fallthrough,
2959 slots_to_fill, &slots_filled,
2960 delay_list);
2962 if (delay_list == 0)
2963 delay_list
2964 = fill_slots_from_thread (insn, condition, insn_at_target,
2965 next_active_insn (insn), 0, 1,
2966 own_target,
2967 slots_to_fill, &slots_filled,
2968 delay_list);
2971 if (delay_list)
2972 unfilled_slots_base[i]
2973 = emit_delay_sequence (insn, delay_list, slots_filled);
2975 if (slots_to_fill == slots_filled)
2976 unfilled_slots_base[i] = 0;
2978 note_delay_statistics (slots_filled, 1);
2982 static void delete_computation (rtx insn);
2984 /* Recursively delete prior insns that compute the value (used only by INSN
2985 which the caller is deleting) stored in the register mentioned by NOTE
2986 which is a REG_DEAD note associated with INSN. */
2988 static void
2989 delete_prior_computation (rtx note, rtx insn)
2991 rtx our_prev;
2992 rtx reg = XEXP (note, 0);
2994 for (our_prev = prev_nonnote_insn (insn);
2995 our_prev && (NONJUMP_INSN_P (our_prev)
2996 || CALL_P (our_prev));
2997 our_prev = prev_nonnote_insn (our_prev))
2999 rtx pat = PATTERN (our_prev);
3001 /* If we reach a CALL which is not calling a const function
3002 or the callee pops the arguments, then give up. */
3003 if (CALL_P (our_prev)
3004 && (! RTL_CONST_CALL_P (our_prev)
3005 || GET_CODE (pat) != SET || GET_CODE (SET_SRC (pat)) != CALL))
3006 break;
3008 /* If we reach a SEQUENCE, it is too complex to try to
3009 do anything with it, so give up. We can be run during
3010 and after reorg, so SEQUENCE rtl can legitimately show
3011 up here. */
3012 if (GET_CODE (pat) == SEQUENCE)
3013 break;
3015 if (GET_CODE (pat) == USE
3016 && NONJUMP_INSN_P (XEXP (pat, 0)))
3017 /* reorg creates USEs that look like this. We leave them
3018 alone because reorg needs them for its own purposes. */
3019 break;
3021 if (reg_set_p (reg, pat))
3023 if (side_effects_p (pat) && !CALL_P (our_prev))
3024 break;
3026 if (GET_CODE (pat) == PARALLEL)
3028 /* If we find a SET of something else, we can't
3029 delete the insn. */
3031 int i;
3033 for (i = 0; i < XVECLEN (pat, 0); i++)
3035 rtx part = XVECEXP (pat, 0, i);
3037 if (GET_CODE (part) == SET
3038 && SET_DEST (part) != reg)
3039 break;
3042 if (i == XVECLEN (pat, 0))
3043 delete_computation (our_prev);
3045 else if (GET_CODE (pat) == SET
3046 && REG_P (SET_DEST (pat)))
3048 int dest_regno = REGNO (SET_DEST (pat));
3049 int dest_endregno = END_REGNO (SET_DEST (pat));
3050 int regno = REGNO (reg);
3051 int endregno = END_REGNO (reg);
3053 if (dest_regno >= regno
3054 && dest_endregno <= endregno)
3055 delete_computation (our_prev);
3057 /* We may have a multi-word hard register and some, but not
3058 all, of the words of the register are needed in subsequent
3059 insns. Write REG_UNUSED notes for those parts that were not
3060 needed. */
3061 else if (dest_regno <= regno
3062 && dest_endregno >= endregno)
3064 int i;
3066 add_reg_note (our_prev, REG_UNUSED, reg);
3068 for (i = dest_regno; i < dest_endregno; i++)
3069 if (! find_regno_note (our_prev, REG_UNUSED, i))
3070 break;
3072 if (i == dest_endregno)
3073 delete_computation (our_prev);
3077 break;
3080 /* If PAT references the register that dies here, it is an
3081 additional use. Hence any prior SET isn't dead. However, this
3082 insn becomes the new place for the REG_DEAD note. */
3083 if (reg_overlap_mentioned_p (reg, pat))
3085 XEXP (note, 1) = REG_NOTES (our_prev);
3086 REG_NOTES (our_prev) = note;
3087 break;
3092 /* Delete INSN and recursively delete insns that compute values used only
3093 by INSN. This uses the REG_DEAD notes computed during flow analysis.
3095 Look at all our REG_DEAD notes. If a previous insn does nothing other
3096 than set a register that dies in this insn, we can delete that insn
3097 as well.
3099 On machines with CC0, if CC0 is used in this insn, we may be able to
3100 delete the insn that set it. */
3102 static void
3103 delete_computation (rtx insn)
3105 rtx note, next;
3107 #ifdef HAVE_cc0
3108 if (reg_referenced_p (cc0_rtx, PATTERN (insn)))
3110 rtx prev = prev_nonnote_insn (insn);
3111 /* We assume that at this stage
3112 CC's are always set explicitly
3113 and always immediately before the jump that
3114 will use them. So if the previous insn
3115 exists to set the CC's, delete it
3116 (unless it performs auto-increments, etc.). */
3117 if (prev && NONJUMP_INSN_P (prev)
3118 && sets_cc0_p (PATTERN (prev)))
3120 if (sets_cc0_p (PATTERN (prev)) > 0
3121 && ! side_effects_p (PATTERN (prev)))
3122 delete_computation (prev);
3123 else
3124 /* Otherwise, show that cc0 won't be used. */
3125 add_reg_note (prev, REG_UNUSED, cc0_rtx);
3128 #endif
3130 for (note = REG_NOTES (insn); note; note = next)
3132 next = XEXP (note, 1);
3134 if (REG_NOTE_KIND (note) != REG_DEAD
3135 /* Verify that the REG_NOTE is legitimate. */
3136 || !REG_P (XEXP (note, 0)))
3137 continue;
3139 delete_prior_computation (note, insn);
3142 delete_related_insns (insn);
3145 /* If all INSN does is set the pc, delete it,
3146 and delete the insn that set the condition codes for it
3147 if that's what the previous thing was. */
3149 static void
3150 delete_jump (rtx_insn *insn)
3152 rtx set = single_set (insn);
3154 if (set && GET_CODE (SET_DEST (set)) == PC)
3155 delete_computation (insn);
3158 static rtx_insn *
3159 label_before_next_insn (rtx x, rtx scan_limit)
3161 rtx_insn *insn = next_active_insn (x);
3162 while (insn)
3164 insn = PREV_INSN (insn);
3165 if (insn == scan_limit || insn == NULL_RTX)
3166 return NULL;
3167 if (LABEL_P (insn))
3168 break;
3170 return insn;
3174 /* Once we have tried two ways to fill a delay slot, make a pass over the
3175 code to try to improve the results and to do such things as more jump
3176 threading. */
3178 static void
3179 relax_delay_slots (rtx_insn *first)
3181 rtx_insn *insn, *next;
3182 rtx_sequence *pat;
3183 rtx trial;
3184 rtx_insn *delay_insn;
3185 rtx target_label;
3187 /* Look at every JUMP_INSN and see if we can improve it. */
3188 for (insn = first; insn; insn = next)
3190 rtx_insn *other;
3191 bool crossing;
3193 next = next_active_insn (insn);
3195 /* If this is a jump insn, see if it now jumps to a jump, jumps to
3196 the next insn, or jumps to a label that is not the last of a
3197 group of consecutive labels. */
3198 if (JUMP_P (insn)
3199 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3200 && !ANY_RETURN_P (target_label = JUMP_LABEL (insn)))
3202 target_label
3203 = skip_consecutive_labels (follow_jumps (target_label, insn,
3204 &crossing));
3205 if (ANY_RETURN_P (target_label))
3206 target_label = find_end_label (target_label);
3208 if (target_label && next_active_insn (target_label) == next
3209 && ! condjump_in_parallel_p (insn))
3211 delete_jump (insn);
3212 continue;
3215 if (target_label && target_label != JUMP_LABEL (insn))
3217 reorg_redirect_jump (insn, target_label);
3218 if (crossing)
3219 CROSSING_JUMP_P (insn) = 1;
3222 /* See if this jump conditionally branches around an unconditional
3223 jump. If so, invert this jump and point it to the target of the
3224 second jump. */
3225 if (next && simplejump_or_return_p (next)
3226 && any_condjump_p (insn)
3227 && target_label
3228 && next_active_insn (target_label) == next_active_insn (next)
3229 && no_labels_between_p (insn, next))
3231 rtx label = JUMP_LABEL (next);
3233 /* Be careful how we do this to avoid deleting code or
3234 labels that are momentarily dead. See similar optimization
3235 in jump.c.
3237 We also need to ensure we properly handle the case when
3238 invert_jump fails. */
3240 ++LABEL_NUSES (target_label);
3241 if (!ANY_RETURN_P (label))
3242 ++LABEL_NUSES (label);
3244 if (invert_jump (insn, label, 1))
3246 delete_related_insns (next);
3247 next = insn;
3250 if (!ANY_RETURN_P (label))
3251 --LABEL_NUSES (label);
3253 if (--LABEL_NUSES (target_label) == 0)
3254 delete_related_insns (target_label);
3256 continue;
3260 /* If this is an unconditional jump and the previous insn is a
3261 conditional jump, try reversing the condition of the previous
3262 insn and swapping our targets. The next pass might be able to
3263 fill the slots.
3265 Don't do this if we expect the conditional branch to be true, because
3266 we would then be making the more common case longer. */
3268 if (simplejump_or_return_p (insn)
3269 && (other = prev_active_insn (insn)) != 0
3270 && any_condjump_p (other)
3271 && no_labels_between_p (other, insn)
3272 && 0 > mostly_true_jump (other))
3274 rtx other_target = JUMP_LABEL (other);
3275 target_label = JUMP_LABEL (insn);
3277 if (invert_jump (other, target_label, 0))
3278 reorg_redirect_jump (insn, other_target);
3281 /* Now look only at cases where we have a filled delay slot. */
3282 if (!NONJUMP_INSN_P (insn) || GET_CODE (PATTERN (insn)) != SEQUENCE)
3283 continue;
3285 pat = as_a <rtx_sequence *> (PATTERN (insn));
3286 delay_insn = pat->insn (0);
3288 /* See if the first insn in the delay slot is redundant with some
3289 previous insn. Remove it from the delay slot if so; then set up
3290 to reprocess this insn. */
3291 if (redundant_insn (pat->insn (1), delay_insn, 0))
3293 update_block (pat->insn (1), insn);
3294 delete_from_delay_slot (pat->insn (1));
3295 next = prev_active_insn (next);
3296 continue;
3299 /* See if we have a RETURN insn with a filled delay slot followed
3300 by a RETURN insn with an unfilled a delay slot. If so, we can delete
3301 the first RETURN (but not its delay insn). This gives the same
3302 effect in fewer instructions.
3304 Only do so if optimizing for size since this results in slower, but
3305 smaller code. */
3306 if (optimize_function_for_size_p (cfun)
3307 && ANY_RETURN_P (PATTERN (delay_insn))
3308 && next
3309 && JUMP_P (next)
3310 && PATTERN (next) == PATTERN (delay_insn))
3312 rtx_insn *after;
3313 int i;
3315 /* Delete the RETURN and just execute the delay list insns.
3317 We do this by deleting the INSN containing the SEQUENCE, then
3318 re-emitting the insns separately, and then deleting the RETURN.
3319 This allows the count of the jump target to be properly
3320 decremented.
3322 Note that we need to change the INSN_UID of the re-emitted insns
3323 since it is used to hash the insns for mark_target_live_regs and
3324 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3326 Clear the from target bit, since these insns are no longer
3327 in delay slots. */
3328 for (i = 0; i < XVECLEN (pat, 0); i++)
3329 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3331 trial = PREV_INSN (insn);
3332 delete_related_insns (insn);
3333 gcc_assert (GET_CODE (pat) == SEQUENCE);
3334 add_insn_after (delay_insn, trial, NULL);
3335 after = delay_insn;
3336 for (i = 1; i < pat->len (); i++)
3337 after = emit_copy_of_insn_after (pat->insn (i), after);
3338 delete_scheduled_jump (delay_insn);
3339 continue;
3342 /* Now look only at the cases where we have a filled JUMP_INSN. */
3343 if (!JUMP_P (delay_insn)
3344 || !(condjump_p (delay_insn) || condjump_in_parallel_p (delay_insn)))
3345 continue;
3347 target_label = JUMP_LABEL (delay_insn);
3348 if (target_label && ANY_RETURN_P (target_label))
3349 continue;
3351 /* If this jump goes to another unconditional jump, thread it, but
3352 don't convert a jump into a RETURN here. */
3353 trial = skip_consecutive_labels (follow_jumps (target_label, delay_insn,
3354 &crossing));
3355 if (ANY_RETURN_P (trial))
3356 trial = find_end_label (trial);
3358 if (trial && trial != target_label
3359 && redirect_with_delay_slots_safe_p (delay_insn, trial, insn))
3361 reorg_redirect_jump (delay_insn, trial);
3362 target_label = trial;
3363 if (crossing)
3364 CROSSING_JUMP_P (insn) = 1;
3367 /* If the first insn at TARGET_LABEL is redundant with a previous
3368 insn, redirect the jump to the following insn and process again.
3369 We use next_real_insn instead of next_active_insn so we
3370 don't skip USE-markers, or we'll end up with incorrect
3371 liveness info. */
3372 trial = next_real_insn (target_label);
3373 if (trial && GET_CODE (PATTERN (trial)) != SEQUENCE
3374 && redundant_insn (trial, insn, 0)
3375 && ! can_throw_internal (trial))
3377 /* Figure out where to emit the special USE insn so we don't
3378 later incorrectly compute register live/death info. */
3379 rtx_insn *tmp = next_active_insn (trial);
3380 if (tmp == 0)
3381 tmp = find_end_label (simple_return_rtx);
3383 if (tmp)
3385 /* Insert the special USE insn and update dataflow info.
3386 We know "trial" is an insn here as it is the output of
3387 next_real_insn () above. */
3388 update_block (as_a <rtx_insn *> (trial), tmp);
3390 /* Now emit a label before the special USE insn, and
3391 redirect our jump to the new label. */
3392 target_label = get_label_before (PREV_INSN (tmp), target_label);
3393 reorg_redirect_jump (delay_insn, target_label);
3394 next = insn;
3395 continue;
3399 /* Similarly, if it is an unconditional jump with one insn in its
3400 delay list and that insn is redundant, thread the jump. */
3401 rtx_sequence *trial_seq =
3402 trial ? dyn_cast <rtx_sequence *> (PATTERN (trial)) : NULL;
3403 if (trial_seq
3404 && trial_seq->len () == 2
3405 && JUMP_P (trial_seq->insn (0))
3406 && simplejump_or_return_p (trial_seq->insn (0))
3407 && redundant_insn (trial_seq->insn (1), insn, 0))
3409 target_label = JUMP_LABEL (trial_seq->insn (0));
3410 if (ANY_RETURN_P (target_label))
3411 target_label = find_end_label (target_label);
3413 if (target_label
3414 && redirect_with_delay_slots_safe_p (delay_insn, target_label,
3415 insn))
3417 update_block (trial_seq->insn (1), insn);
3418 reorg_redirect_jump (delay_insn, target_label);
3419 next = insn;
3420 continue;
3424 /* See if we have a simple (conditional) jump that is useless. */
3425 if (! INSN_ANNULLED_BRANCH_P (delay_insn)
3426 && ! condjump_in_parallel_p (delay_insn)
3427 && prev_active_insn (target_label) == insn
3428 && ! BARRIER_P (prev_nonnote_insn (target_label))
3429 #ifdef HAVE_cc0
3430 /* If the last insn in the delay slot sets CC0 for some insn,
3431 various code assumes that it is in a delay slot. We could
3432 put it back where it belonged and delete the register notes,
3433 but it doesn't seem worthwhile in this uncommon case. */
3434 && ! find_reg_note (XVECEXP (pat, 0, XVECLEN (pat, 0) - 1),
3435 REG_CC_USER, NULL_RTX)
3436 #endif
3439 rtx_insn *after;
3440 int i;
3442 /* All this insn does is execute its delay list and jump to the
3443 following insn. So delete the jump and just execute the delay
3444 list insns.
3446 We do this by deleting the INSN containing the SEQUENCE, then
3447 re-emitting the insns separately, and then deleting the jump.
3448 This allows the count of the jump target to be properly
3449 decremented.
3451 Note that we need to change the INSN_UID of the re-emitted insns
3452 since it is used to hash the insns for mark_target_live_regs and
3453 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3455 Clear the from target bit, since these insns are no longer
3456 in delay slots. */
3457 for (i = 0; i < XVECLEN (pat, 0); i++)
3458 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3460 trial = PREV_INSN (insn);
3461 delete_related_insns (insn);
3462 gcc_assert (GET_CODE (pat) == SEQUENCE);
3463 add_insn_after (delay_insn, trial, NULL);
3464 after = delay_insn;
3465 for (i = 1; i < pat->len (); i++)
3466 after = emit_copy_of_insn_after (pat->insn (i), after);
3467 delete_scheduled_jump (delay_insn);
3468 continue;
3471 /* See if this is an unconditional jump around a single insn which is
3472 identical to the one in its delay slot. In this case, we can just
3473 delete the branch and the insn in its delay slot. */
3474 if (next && NONJUMP_INSN_P (next)
3475 && label_before_next_insn (next, insn) == target_label
3476 && simplejump_p (insn)
3477 && XVECLEN (pat, 0) == 2
3478 && rtx_equal_p (PATTERN (next), PATTERN (pat->insn (1))))
3480 delete_related_insns (insn);
3481 continue;
3484 /* See if this jump (with its delay slots) conditionally branches
3485 around an unconditional jump (without delay slots). If so, invert
3486 this jump and point it to the target of the second jump. We cannot
3487 do this for annulled jumps, though. Again, don't convert a jump to
3488 a RETURN here. */
3489 if (! INSN_ANNULLED_BRANCH_P (delay_insn)
3490 && any_condjump_p (delay_insn)
3491 && next && simplejump_or_return_p (next)
3492 && next_active_insn (target_label) == next_active_insn (next)
3493 && no_labels_between_p (insn, next))
3495 rtx label = JUMP_LABEL (next);
3496 rtx old_label = JUMP_LABEL (delay_insn);
3498 if (ANY_RETURN_P (label))
3499 label = find_end_label (label);
3501 /* find_end_label can generate a new label. Check this first. */
3502 if (label
3503 && no_labels_between_p (insn, next)
3504 && redirect_with_delay_slots_safe_p (delay_insn, label, insn))
3506 /* Be careful how we do this to avoid deleting code or labels
3507 that are momentarily dead. See similar optimization in
3508 jump.c */
3509 if (old_label)
3510 ++LABEL_NUSES (old_label);
3512 if (invert_jump (delay_insn, label, 1))
3514 int i;
3516 /* Must update the INSN_FROM_TARGET_P bits now that
3517 the branch is reversed, so that mark_target_live_regs
3518 will handle the delay slot insn correctly. */
3519 for (i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
3521 rtx slot = XVECEXP (PATTERN (insn), 0, i);
3522 INSN_FROM_TARGET_P (slot) = ! INSN_FROM_TARGET_P (slot);
3525 delete_related_insns (next);
3526 next = insn;
3529 if (old_label && --LABEL_NUSES (old_label) == 0)
3530 delete_related_insns (old_label);
3531 continue;
3535 /* If we own the thread opposite the way this insn branches, see if we
3536 can merge its delay slots with following insns. */
3537 if (INSN_FROM_TARGET_P (pat->insn (1))
3538 && own_thread_p (NEXT_INSN (insn), 0, 1))
3539 try_merge_delay_insns (insn, next);
3540 else if (! INSN_FROM_TARGET_P (pat->insn (1))
3541 && own_thread_p (target_label, target_label, 0))
3542 try_merge_delay_insns (insn, next_active_insn (target_label));
3544 /* If we get here, we haven't deleted INSN. But we may have deleted
3545 NEXT, so recompute it. */
3546 next = next_active_insn (insn);
3551 /* Look for filled jumps to the end of function label. We can try to convert
3552 them into RETURN insns if the insns in the delay slot are valid for the
3553 RETURN as well. */
3555 static void
3556 make_return_insns (rtx_insn *first)
3558 rtx_insn *insn;
3559 rtx_insn *jump_insn;
3560 rtx real_return_label = function_return_label;
3561 rtx real_simple_return_label = function_simple_return_label;
3562 int slots, i;
3564 /* See if there is a RETURN insn in the function other than the one we
3565 made for END_OF_FUNCTION_LABEL. If so, set up anything we can't change
3566 into a RETURN to jump to it. */
3567 for (insn = first; insn; insn = NEXT_INSN (insn))
3568 if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
3570 rtx t = get_label_before (insn, NULL_RTX);
3571 if (PATTERN (insn) == ret_rtx)
3572 real_return_label = t;
3573 else
3574 real_simple_return_label = t;
3575 break;
3578 /* Show an extra usage of REAL_RETURN_LABEL so it won't go away if it
3579 was equal to END_OF_FUNCTION_LABEL. */
3580 if (real_return_label)
3581 LABEL_NUSES (real_return_label)++;
3582 if (real_simple_return_label)
3583 LABEL_NUSES (real_simple_return_label)++;
3585 /* Clear the list of insns to fill so we can use it. */
3586 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3588 for (insn = first; insn; insn = NEXT_INSN (insn))
3590 int flags;
3591 rtx kind, real_label;
3593 /* Only look at filled JUMP_INSNs that go to the end of function
3594 label. */
3595 if (!NONJUMP_INSN_P (insn))
3596 continue;
3598 if (GET_CODE (PATTERN (insn)) != SEQUENCE)
3599 continue;
3601 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (insn));
3603 if (!jump_to_label_p (pat->insn (0)))
3604 continue;
3606 if (JUMP_LABEL (pat->insn (0)) == function_return_label)
3608 kind = ret_rtx;
3609 real_label = real_return_label;
3611 else if (JUMP_LABEL (pat->insn (0)) == function_simple_return_label)
3613 kind = simple_return_rtx;
3614 real_label = real_simple_return_label;
3616 else
3617 continue;
3619 jump_insn = pat->insn (0);
3621 /* If we can't make the jump into a RETURN, try to redirect it to the best
3622 RETURN and go on to the next insn. */
3623 if (!reorg_redirect_jump (jump_insn, kind))
3625 /* Make sure redirecting the jump will not invalidate the delay
3626 slot insns. */
3627 if (redirect_with_delay_slots_safe_p (jump_insn, real_label, insn))
3628 reorg_redirect_jump (jump_insn, real_label);
3629 continue;
3632 /* See if this RETURN can accept the insns current in its delay slot.
3633 It can if it has more or an equal number of slots and the contents
3634 of each is valid. */
3636 flags = get_jump_flags (jump_insn, JUMP_LABEL (jump_insn));
3637 slots = num_delay_slots (jump_insn);
3638 if (slots >= XVECLEN (pat, 0) - 1)
3640 for (i = 1; i < XVECLEN (pat, 0); i++)
3641 if (! (
3642 #ifdef ANNUL_IFFALSE_SLOTS
3643 (INSN_ANNULLED_BRANCH_P (jump_insn)
3644 && INSN_FROM_TARGET_P (pat->insn (i)))
3645 ? eligible_for_annul_false (jump_insn, i - 1,
3646 pat->insn (i), flags) :
3647 #endif
3648 #ifdef ANNUL_IFTRUE_SLOTS
3649 (INSN_ANNULLED_BRANCH_P (jump_insn)
3650 && ! INSN_FROM_TARGET_P (pat->insn (i)))
3651 ? eligible_for_annul_true (jump_insn, i - 1,
3652 pat->insn (i), flags) :
3653 #endif
3654 eligible_for_delay (jump_insn, i - 1,
3655 pat->insn (i), flags)))
3656 break;
3658 else
3659 i = 0;
3661 if (i == XVECLEN (pat, 0))
3662 continue;
3664 /* We have to do something with this insn. If it is an unconditional
3665 RETURN, delete the SEQUENCE and output the individual insns,
3666 followed by the RETURN. Then set things up so we try to find
3667 insns for its delay slots, if it needs some. */
3668 if (ANY_RETURN_P (PATTERN (jump_insn)))
3670 rtx_insn *prev = PREV_INSN (insn);
3672 delete_related_insns (insn);
3673 for (i = 1; i < XVECLEN (pat, 0); i++)
3674 prev = emit_insn_after (PATTERN (XVECEXP (pat, 0, i)), prev);
3676 insn = emit_jump_insn_after (PATTERN (jump_insn), prev);
3677 emit_barrier_after (insn);
3679 if (slots)
3680 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3682 else
3683 /* It is probably more efficient to keep this with its current
3684 delay slot as a branch to a RETURN. */
3685 reorg_redirect_jump (jump_insn, real_label);
3688 /* Now delete REAL_RETURN_LABEL if we never used it. Then try to fill any
3689 new delay slots we have created. */
3690 if (real_return_label != NULL_RTX && --LABEL_NUSES (real_return_label) == 0)
3691 delete_related_insns (real_return_label);
3692 if (real_simple_return_label != NULL_RTX
3693 && --LABEL_NUSES (real_simple_return_label) == 0)
3694 delete_related_insns (real_simple_return_label);
3696 fill_simple_delay_slots (1);
3697 fill_simple_delay_slots (0);
3700 /* Try to find insns to place in delay slots. */
3702 static void
3703 dbr_schedule (rtx_insn *first)
3705 rtx_insn *insn, *next, *epilogue_insn = 0;
3706 int i;
3707 bool need_return_insns;
3709 /* If the current function has no insns other than the prologue and
3710 epilogue, then do not try to fill any delay slots. */
3711 if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS)
3712 return;
3714 /* Find the highest INSN_UID and allocate and initialize our map from
3715 INSN_UID's to position in code. */
3716 for (max_uid = 0, insn = first; insn; insn = NEXT_INSN (insn))
3718 if (INSN_UID (insn) > max_uid)
3719 max_uid = INSN_UID (insn);
3720 if (NOTE_P (insn)
3721 && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
3722 epilogue_insn = insn;
3725 uid_to_ruid = XNEWVEC (int, max_uid + 1);
3726 for (i = 0, insn = first; insn; i++, insn = NEXT_INSN (insn))
3727 uid_to_ruid[INSN_UID (insn)] = i;
3729 /* Initialize the list of insns that need filling. */
3730 if (unfilled_firstobj == 0)
3732 gcc_obstack_init (&unfilled_slots_obstack);
3733 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3736 for (insn = next_active_insn (first); insn; insn = next_active_insn (insn))
3738 rtx target;
3740 /* Skip vector tables. We can't get attributes for them. */
3741 if (JUMP_TABLE_DATA_P (insn))
3742 continue;
3744 if (JUMP_P (insn))
3745 INSN_ANNULLED_BRANCH_P (insn) = 0;
3746 INSN_FROM_TARGET_P (insn) = 0;
3748 if (num_delay_slots (insn) > 0)
3749 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3751 /* Ensure all jumps go to the last of a set of consecutive labels. */
3752 if (JUMP_P (insn)
3753 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3754 && !ANY_RETURN_P (JUMP_LABEL (insn))
3755 && ((target = skip_consecutive_labels (JUMP_LABEL (insn)))
3756 != JUMP_LABEL (insn)))
3757 redirect_jump (insn, target, 1);
3760 init_resource_info (epilogue_insn);
3762 /* Show we haven't computed an end-of-function label yet. */
3763 function_return_label = function_simple_return_label = NULL;
3765 /* Initialize the statistics for this function. */
3766 memset (num_insns_needing_delays, 0, sizeof num_insns_needing_delays);
3767 memset (num_filled_delays, 0, sizeof num_filled_delays);
3769 /* Now do the delay slot filling. Try everything twice in case earlier
3770 changes make more slots fillable. */
3772 for (reorg_pass_number = 0;
3773 reorg_pass_number < MAX_REORG_PASSES;
3774 reorg_pass_number++)
3776 fill_simple_delay_slots (1);
3777 fill_simple_delay_slots (0);
3778 fill_eager_delay_slots ();
3779 relax_delay_slots (first);
3782 /* If we made an end of function label, indicate that it is now
3783 safe to delete it by undoing our prior adjustment to LABEL_NUSES.
3784 If it is now unused, delete it. */
3785 if (function_return_label && --LABEL_NUSES (function_return_label) == 0)
3786 delete_related_insns (function_return_label);
3787 if (function_simple_return_label
3788 && --LABEL_NUSES (function_simple_return_label) == 0)
3789 delete_related_insns (function_simple_return_label);
3791 need_return_insns = false;
3792 #ifdef HAVE_return
3793 need_return_insns |= HAVE_return && function_return_label != 0;
3794 #endif
3795 #ifdef HAVE_simple_return
3796 need_return_insns |= HAVE_simple_return && function_simple_return_label != 0;
3797 #endif
3798 if (need_return_insns)
3799 make_return_insns (first);
3801 /* Delete any USE insns made by update_block; subsequent passes don't need
3802 them or know how to deal with them. */
3803 for (insn = first; insn; insn = next)
3805 next = NEXT_INSN (insn);
3807 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
3808 && INSN_P (XEXP (PATTERN (insn), 0)))
3809 next = delete_related_insns (insn);
3812 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3814 /* It is not clear why the line below is needed, but it does seem to be. */
3815 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3817 if (dump_file)
3819 int i, j, need_comma;
3820 int total_delay_slots[MAX_DELAY_HISTOGRAM + 1];
3821 int total_annul_slots[MAX_DELAY_HISTOGRAM + 1];
3823 for (reorg_pass_number = 0;
3824 reorg_pass_number < MAX_REORG_PASSES;
3825 reorg_pass_number++)
3827 fprintf (dump_file, ";; Reorg pass #%d:\n", reorg_pass_number + 1);
3828 for (i = 0; i < NUM_REORG_FUNCTIONS; i++)
3830 need_comma = 0;
3831 fprintf (dump_file, ";; Reorg function #%d\n", i);
3833 fprintf (dump_file, ";; %d insns needing delay slots\n;; ",
3834 num_insns_needing_delays[i][reorg_pass_number]);
3836 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3837 if (num_filled_delays[i][j][reorg_pass_number])
3839 if (need_comma)
3840 fprintf (dump_file, ", ");
3841 need_comma = 1;
3842 fprintf (dump_file, "%d got %d delays",
3843 num_filled_delays[i][j][reorg_pass_number], j);
3845 fprintf (dump_file, "\n");
3848 memset (total_delay_slots, 0, sizeof total_delay_slots);
3849 memset (total_annul_slots, 0, sizeof total_annul_slots);
3850 for (insn = first; insn; insn = NEXT_INSN (insn))
3852 if (! insn->deleted ()
3853 && NONJUMP_INSN_P (insn)
3854 && GET_CODE (PATTERN (insn)) != USE
3855 && GET_CODE (PATTERN (insn)) != CLOBBER)
3857 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
3859 rtx control;
3860 j = XVECLEN (PATTERN (insn), 0) - 1;
3861 if (j > MAX_DELAY_HISTOGRAM)
3862 j = MAX_DELAY_HISTOGRAM;
3863 control = XVECEXP (PATTERN (insn), 0, 0);
3864 if (JUMP_P (control) && INSN_ANNULLED_BRANCH_P (control))
3865 total_annul_slots[j]++;
3866 else
3867 total_delay_slots[j]++;
3869 else if (num_delay_slots (insn) > 0)
3870 total_delay_slots[0]++;
3873 fprintf (dump_file, ";; Reorg totals: ");
3874 need_comma = 0;
3875 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3877 if (total_delay_slots[j])
3879 if (need_comma)
3880 fprintf (dump_file, ", ");
3881 need_comma = 1;
3882 fprintf (dump_file, "%d got %d delays", total_delay_slots[j], j);
3885 fprintf (dump_file, "\n");
3886 #if defined (ANNUL_IFTRUE_SLOTS) || defined (ANNUL_IFFALSE_SLOTS)
3887 fprintf (dump_file, ";; Reorg annuls: ");
3888 need_comma = 0;
3889 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3891 if (total_annul_slots[j])
3893 if (need_comma)
3894 fprintf (dump_file, ", ");
3895 need_comma = 1;
3896 fprintf (dump_file, "%d got %d delays", total_annul_slots[j], j);
3899 fprintf (dump_file, "\n");
3900 #endif
3901 fprintf (dump_file, "\n");
3904 if (!sibling_labels.is_empty ())
3906 update_alignments (sibling_labels);
3907 sibling_labels.release ();
3910 free_resource_info ();
3911 free (uid_to_ruid);
3912 crtl->dbr_scheduled_p = true;
3914 #endif /* DELAY_SLOTS */
3916 /* Run delay slot optimization. */
3917 static unsigned int
3918 rest_of_handle_delay_slots (void)
3920 #ifdef DELAY_SLOTS
3921 dbr_schedule (get_insns ());
3922 #endif
3923 return 0;
3926 namespace {
3928 const pass_data pass_data_delay_slots =
3930 RTL_PASS, /* type */
3931 "dbr", /* name */
3932 OPTGROUP_NONE, /* optinfo_flags */
3933 TV_DBR_SCHED, /* tv_id */
3934 0, /* properties_required */
3935 0, /* properties_provided */
3936 0, /* properties_destroyed */
3937 0, /* todo_flags_start */
3938 0, /* todo_flags_finish */
3941 class pass_delay_slots : public rtl_opt_pass
3943 public:
3944 pass_delay_slots (gcc::context *ctxt)
3945 : rtl_opt_pass (pass_data_delay_slots, ctxt)
3948 /* opt_pass methods: */
3949 virtual bool gate (function *);
3950 virtual unsigned int execute (function *)
3952 return rest_of_handle_delay_slots ();
3955 }; // class pass_delay_slots
3957 bool
3958 pass_delay_slots::gate (function *)
3960 #ifdef DELAY_SLOTS
3961 /* At -O0 dataflow info isn't updated after RA. */
3962 return optimize > 0 && flag_delayed_branch && !crtl->dbr_scheduled_p;
3963 #else
3964 return 0;
3965 #endif
3968 } // anon namespace
3970 rtl_opt_pass *
3971 make_pass_delay_slots (gcc::context *ctxt)
3973 return new pass_delay_slots (ctxt);
3976 /* Machine dependent reorg pass. */
3978 namespace {
3980 const pass_data pass_data_machine_reorg =
3982 RTL_PASS, /* type */
3983 "mach", /* name */
3984 OPTGROUP_NONE, /* optinfo_flags */
3985 TV_MACH_DEP, /* tv_id */
3986 0, /* properties_required */
3987 0, /* properties_provided */
3988 0, /* properties_destroyed */
3989 0, /* todo_flags_start */
3990 0, /* todo_flags_finish */
3993 class pass_machine_reorg : public rtl_opt_pass
3995 public:
3996 pass_machine_reorg (gcc::context *ctxt)
3997 : rtl_opt_pass (pass_data_machine_reorg, ctxt)
4000 /* opt_pass methods: */
4001 virtual bool gate (function *)
4003 return targetm.machine_dependent_reorg != 0;
4006 virtual unsigned int execute (function *)
4008 targetm.machine_dependent_reorg ();
4009 return 0;
4012 }; // class pass_machine_reorg
4014 } // anon namespace
4016 rtl_opt_pass *
4017 make_pass_machine_reorg (gcc::context *ctxt)
4019 return new pass_machine_reorg (ctxt);