libgomp: GOMP_OFFLOAD_* are part of the libgomp/plugin interface.
[official-gcc.git] / gcc / reorg.c
blobe8d29a4ab1e484c5dedb48590bbd2f49de14bd56
1 /* Perform instruction reorganizations for delay slot filling.
2 Copyright (C) 1992-2014 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu).
4 Hacked by Michael Tiemann (tiemann@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Instruction reorganization pass.
24 This pass runs after register allocation and final jump
25 optimization. It should be the last pass to run before peephole.
26 It serves primarily to fill delay slots of insns, typically branch
27 and call insns. Other insns typically involve more complicated
28 interactions of data dependencies and resource constraints, and
29 are better handled by scheduling before register allocation (by the
30 function `schedule_insns').
32 The Branch Penalty is the number of extra cycles that are needed to
33 execute a branch insn. On an ideal machine, branches take a single
34 cycle, and the Branch Penalty is 0. Several RISC machines approach
35 branch delays differently:
37 The MIPS has a single branch delay slot. Most insns
38 (except other branches) can be used to fill this slot. When the
39 slot is filled, two insns execute in two cycles, reducing the
40 branch penalty to zero.
42 The SPARC always has a branch delay slot, but its effects can be
43 annulled when the branch is not taken. This means that failing to
44 find other sources of insns, we can hoist an insn from the branch
45 target that would only be safe to execute knowing that the branch
46 is taken.
48 The HP-PA always has a branch delay slot. For unconditional branches
49 its effects can be annulled when the branch is taken. The effects
50 of the delay slot in a conditional branch can be nullified for forward
51 taken branches, or for untaken backward branches. This means
52 we can hoist insns from the fall-through path for forward branches or
53 steal insns from the target of backward branches.
55 The TMS320C3x and C4x have three branch delay slots. When the three
56 slots are filled, the branch penalty is zero. Most insns can fill the
57 delay slots except jump insns.
59 Three techniques for filling delay slots have been implemented so far:
61 (1) `fill_simple_delay_slots' is the simplest, most efficient way
62 to fill delay slots. This pass first looks for insns which come
63 from before the branch and which are safe to execute after the
64 branch. Then it searches after the insn requiring delay slots or,
65 in the case of a branch, for insns that are after the point at
66 which the branch merges into the fallthrough code, if such a point
67 exists. When such insns are found, the branch penalty decreases
68 and no code expansion takes place.
70 (2) `fill_eager_delay_slots' is more complicated: it is used for
71 scheduling conditional jumps, or for scheduling jumps which cannot
72 be filled using (1). A machine need not have annulled jumps to use
73 this strategy, but it helps (by keeping more options open).
74 `fill_eager_delay_slots' tries to guess the direction the branch
75 will go; if it guesses right 100% of the time, it can reduce the
76 branch penalty as much as `fill_simple_delay_slots' does. If it
77 guesses wrong 100% of the time, it might as well schedule nops. When
78 `fill_eager_delay_slots' takes insns from the fall-through path of
79 the jump, usually there is no code expansion; when it takes insns
80 from the branch target, there is code expansion if it is not the
81 only way to reach that target.
83 (3) `relax_delay_slots' uses a set of rules to simplify code that
84 has been reorganized by (1) and (2). It finds cases where
85 conditional test can be eliminated, jumps can be threaded, extra
86 insns can be eliminated, etc. It is the job of (1) and (2) to do a
87 good job of scheduling locally; `relax_delay_slots' takes care of
88 making the various individual schedules work well together. It is
89 especially tuned to handle the control flow interactions of branch
90 insns. It does nothing for insns with delay slots that do not
91 branch.
93 On machines that use CC0, we are very conservative. We will not make
94 a copy of an insn involving CC0 since we want to maintain a 1-1
95 correspondence between the insn that sets and uses CC0. The insns are
96 allowed to be separated by placing an insn that sets CC0 (but not an insn
97 that uses CC0; we could do this, but it doesn't seem worthwhile) in a
98 delay slot. In that case, we point each insn at the other with REG_CC_USER
99 and REG_CC_SETTER notes. Note that these restrictions affect very few
100 machines because most RISC machines with delay slots will not use CC0
101 (the RT is the only known exception at this point). */
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "tm.h"
107 #include "diagnostic-core.h"
108 #include "rtl.h"
109 #include "tm_p.h"
110 #include "expr.h"
111 #include "hashtab.h"
112 #include "hash-set.h"
113 #include "vec.h"
114 #include "machmode.h"
115 #include "hard-reg-set.h"
116 #include "input.h"
117 #include "function.h"
118 #include "insn-config.h"
119 #include "conditions.h"
120 #include "predict.h"
121 #include "dominance.h"
122 #include "cfg.h"
123 #include "basic-block.h"
124 #include "regs.h"
125 #include "recog.h"
126 #include "flags.h"
127 #include "obstack.h"
128 #include "insn-attr.h"
129 #include "resource.h"
130 #include "except.h"
131 #include "params.h"
132 #include "target.h"
133 #include "tree-pass.h"
134 #include "emit-rtl.h"
136 #ifdef DELAY_SLOTS
138 #ifndef ANNUL_IFTRUE_SLOTS
139 #define eligible_for_annul_true(INSN, SLOTS, TRIAL, FLAGS) 0
140 #endif
141 #ifndef ANNUL_IFFALSE_SLOTS
142 #define eligible_for_annul_false(INSN, SLOTS, TRIAL, FLAGS) 0
143 #endif
146 /* First, some functions that were used before GCC got a control flow graph.
147 These functions are now only used here in reorg.c, and have therefore
148 been moved here to avoid inadvertent misuse elsewhere in the compiler. */
150 /* Return the last label to mark the same position as LABEL. Return LABEL
151 itself if it is null or any return rtx. */
153 static rtx
154 skip_consecutive_labels (rtx label_or_return)
156 rtx_insn *insn;
158 if (label_or_return && ANY_RETURN_P (label_or_return))
159 return label_or_return;
161 rtx_insn *label = as_a <rtx_insn *> (label_or_return);
163 for (insn = label; insn != 0 && !INSN_P (insn); insn = NEXT_INSN (insn))
164 if (LABEL_P (insn))
165 label = insn;
167 return label;
170 #ifdef HAVE_cc0
171 /* INSN uses CC0 and is being moved into a delay slot. Set up REG_CC_SETTER
172 and REG_CC_USER notes so we can find it. */
174 static void
175 link_cc0_insns (rtx insn)
177 rtx user = next_nonnote_insn (insn);
179 if (NONJUMP_INSN_P (user) && GET_CODE (PATTERN (user)) == SEQUENCE)
180 user = XVECEXP (PATTERN (user), 0, 0);
182 add_reg_note (user, REG_CC_SETTER, insn);
183 add_reg_note (insn, REG_CC_USER, user);
185 #endif
187 /* Insns which have delay slots that have not yet been filled. */
189 static struct obstack unfilled_slots_obstack;
190 static rtx *unfilled_firstobj;
192 /* Define macros to refer to the first and last slot containing unfilled
193 insns. These are used because the list may move and its address
194 should be recomputed at each use. */
196 #define unfilled_slots_base \
197 ((rtx_insn **) obstack_base (&unfilled_slots_obstack))
199 #define unfilled_slots_next \
200 ((rtx_insn **) obstack_next_free (&unfilled_slots_obstack))
202 /* Points to the label before the end of the function, or before a
203 return insn. */
204 static rtx_code_label *function_return_label;
205 /* Likewise for a simple_return. */
206 static rtx_code_label *function_simple_return_label;
208 /* Mapping between INSN_UID's and position in the code since INSN_UID's do
209 not always monotonically increase. */
210 static int *uid_to_ruid;
212 /* Highest valid index in `uid_to_ruid'. */
213 static int max_uid;
215 static int stop_search_p (rtx, int);
216 static int resource_conflicts_p (struct resources *, struct resources *);
217 static int insn_references_resource_p (rtx, struct resources *, bool);
218 static int insn_sets_resource_p (rtx, struct resources *, bool);
219 static rtx_code_label *find_end_label (rtx);
220 static rtx_insn *emit_delay_sequence (rtx_insn *, rtx_insn_list *, int);
221 static rtx_insn_list *add_to_delay_list (rtx_insn *, rtx_insn_list *);
222 static rtx_insn *delete_from_delay_slot (rtx_insn *);
223 static void delete_scheduled_jump (rtx_insn *);
224 static void note_delay_statistics (int, int);
225 #if defined(ANNUL_IFFALSE_SLOTS) || defined(ANNUL_IFTRUE_SLOTS)
226 static rtx_insn_list *optimize_skip (rtx_insn *);
227 #endif
228 static int get_jump_flags (const rtx_insn *, rtx);
229 static int mostly_true_jump (rtx);
230 static rtx get_branch_condition (const rtx_insn *, rtx);
231 static int condition_dominates_p (rtx, const rtx_insn *);
232 static int redirect_with_delay_slots_safe_p (rtx_insn *, rtx, rtx);
233 static int redirect_with_delay_list_safe_p (rtx_insn *, rtx, rtx_insn_list *);
234 static int check_annul_list_true_false (int, rtx);
235 static rtx_insn_list *steal_delay_list_from_target (rtx_insn *, rtx,
236 rtx_sequence *,
237 rtx_insn_list *,
238 struct resources *,
239 struct resources *,
240 struct resources *,
241 int, int *, int *,
242 rtx *);
243 static rtx_insn_list *steal_delay_list_from_fallthrough (rtx_insn *, rtx,
244 rtx_sequence *,
245 rtx_insn_list *,
246 struct resources *,
247 struct resources *,
248 struct resources *,
249 int, int *, int *);
250 static void try_merge_delay_insns (rtx, rtx_insn *);
251 static rtx redundant_insn (rtx, rtx_insn *, rtx);
252 static int own_thread_p (rtx, rtx, int);
253 static void update_block (rtx_insn *, rtx);
254 static int reorg_redirect_jump (rtx_insn *, rtx);
255 static void update_reg_dead_notes (rtx, rtx);
256 static void fix_reg_dead_note (rtx, rtx);
257 static void update_reg_unused_notes (rtx, rtx);
258 static void fill_simple_delay_slots (int);
259 static rtx_insn_list *fill_slots_from_thread (rtx_insn *, rtx, rtx, rtx,
260 int, int, int, int,
261 int *, rtx_insn_list *);
262 static void fill_eager_delay_slots (void);
263 static void relax_delay_slots (rtx_insn *);
264 static void make_return_insns (rtx_insn *);
266 /* A wrapper around next_active_insn which takes care to return ret_rtx
267 unchanged. */
269 static rtx
270 first_active_target_insn (rtx insn)
272 if (ANY_RETURN_P (insn))
273 return insn;
274 return next_active_insn (as_a <rtx_insn *> (insn));
277 /* Return true iff INSN is a simplejump, or any kind of return insn. */
279 static bool
280 simplejump_or_return_p (rtx insn)
282 return (JUMP_P (insn)
283 && (simplejump_p (as_a <rtx_insn *> (insn))
284 || ANY_RETURN_P (PATTERN (insn))));
287 /* Return TRUE if this insn should stop the search for insn to fill delay
288 slots. LABELS_P indicates that labels should terminate the search.
289 In all cases, jumps terminate the search. */
291 static int
292 stop_search_p (rtx insn, int labels_p)
294 if (insn == 0)
295 return 1;
297 /* If the insn can throw an exception that is caught within the function,
298 it may effectively perform a jump from the viewpoint of the function.
299 Therefore act like for a jump. */
300 if (can_throw_internal (insn))
301 return 1;
303 switch (GET_CODE (insn))
305 case NOTE:
306 case CALL_INSN:
307 return 0;
309 case CODE_LABEL:
310 return labels_p;
312 case JUMP_INSN:
313 case BARRIER:
314 return 1;
316 case INSN:
317 /* OK unless it contains a delay slot or is an `asm' insn of some type.
318 We don't know anything about these. */
319 return (GET_CODE (PATTERN (insn)) == SEQUENCE
320 || GET_CODE (PATTERN (insn)) == ASM_INPUT
321 || asm_noperands (PATTERN (insn)) >= 0);
323 default:
324 gcc_unreachable ();
328 /* Return TRUE if any resources are marked in both RES1 and RES2 or if either
329 resource set contains a volatile memory reference. Otherwise, return FALSE. */
331 static int
332 resource_conflicts_p (struct resources *res1, struct resources *res2)
334 if ((res1->cc && res2->cc) || (res1->memory && res2->memory)
335 || res1->volatil || res2->volatil)
336 return 1;
338 return hard_reg_set_intersect_p (res1->regs, res2->regs);
341 /* Return TRUE if any resource marked in RES, a `struct resources', is
342 referenced by INSN. If INCLUDE_DELAYED_EFFECTS is set, return if the called
343 routine is using those resources.
345 We compute this by computing all the resources referenced by INSN and
346 seeing if this conflicts with RES. It might be faster to directly check
347 ourselves, and this is the way it used to work, but it means duplicating
348 a large block of complex code. */
350 static int
351 insn_references_resource_p (rtx insn, struct resources *res,
352 bool include_delayed_effects)
354 struct resources insn_res;
356 CLEAR_RESOURCE (&insn_res);
357 mark_referenced_resources (insn, &insn_res, include_delayed_effects);
358 return resource_conflicts_p (&insn_res, res);
361 /* Return TRUE if INSN modifies resources that are marked in RES.
362 INCLUDE_DELAYED_EFFECTS is set if the actions of that routine should be
363 included. CC0 is only modified if it is explicitly set; see comments
364 in front of mark_set_resources for details. */
366 static int
367 insn_sets_resource_p (rtx insn, struct resources *res,
368 bool include_delayed_effects)
370 struct resources insn_sets;
372 CLEAR_RESOURCE (&insn_sets);
373 mark_set_resources (insn, &insn_sets, 0,
374 (include_delayed_effects
375 ? MARK_SRC_DEST_CALL
376 : MARK_SRC_DEST));
377 return resource_conflicts_p (&insn_sets, res);
380 /* Find a label at the end of the function or before a RETURN. If there
381 is none, try to make one. If that fails, returns 0.
383 The property of such a label is that it is placed just before the
384 epilogue or a bare RETURN insn, so that another bare RETURN can be
385 turned into a jump to the label unconditionally. In particular, the
386 label cannot be placed before a RETURN insn with a filled delay slot.
388 ??? There may be a problem with the current implementation. Suppose
389 we start with a bare RETURN insn and call find_end_label. It may set
390 function_return_label just before the RETURN. Suppose the machinery
391 is able to fill the delay slot of the RETURN insn afterwards. Then
392 function_return_label is no longer valid according to the property
393 described above and find_end_label will still return it unmodified.
394 Note that this is probably mitigated by the following observation:
395 once function_return_label is made, it is very likely the target of
396 a jump, so filling the delay slot of the RETURN will be much more
397 difficult.
398 KIND is either simple_return_rtx or ret_rtx, indicating which type of
399 return we're looking for. */
401 static rtx_code_label *
402 find_end_label (rtx kind)
404 rtx_insn *insn;
405 rtx_code_label **plabel;
407 if (kind == ret_rtx)
408 plabel = &function_return_label;
409 else
411 gcc_assert (kind == simple_return_rtx);
412 plabel = &function_simple_return_label;
415 /* If we found one previously, return it. */
416 if (*plabel)
417 return *plabel;
419 /* Otherwise, see if there is a label at the end of the function. If there
420 is, it must be that RETURN insns aren't needed, so that is our return
421 label and we don't have to do anything else. */
423 insn = get_last_insn ();
424 while (NOTE_P (insn)
425 || (NONJUMP_INSN_P (insn)
426 && (GET_CODE (PATTERN (insn)) == USE
427 || GET_CODE (PATTERN (insn)) == CLOBBER)))
428 insn = PREV_INSN (insn);
430 /* When a target threads its epilogue we might already have a
431 suitable return insn. If so put a label before it for the
432 function_return_label. */
433 if (BARRIER_P (insn)
434 && JUMP_P (PREV_INSN (insn))
435 && PATTERN (PREV_INSN (insn)) == kind)
437 rtx_insn *temp = PREV_INSN (PREV_INSN (insn));
438 rtx_code_label *label = gen_label_rtx ();
439 LABEL_NUSES (label) = 0;
441 /* Put the label before any USE insns that may precede the RETURN
442 insn. */
443 while (GET_CODE (temp) == USE)
444 temp = PREV_INSN (temp);
446 emit_label_after (label, temp);
447 *plabel = label;
450 else if (LABEL_P (insn))
451 *plabel = as_a <rtx_code_label *> (insn);
452 else
454 rtx_code_label *label = gen_label_rtx ();
455 LABEL_NUSES (label) = 0;
456 /* If the basic block reorder pass moves the return insn to
457 some other place try to locate it again and put our
458 function_return_label there. */
459 while (insn && ! (JUMP_P (insn) && (PATTERN (insn) == kind)))
460 insn = PREV_INSN (insn);
461 if (insn)
463 insn = PREV_INSN (insn);
465 /* Put the label before any USE insns that may precede the
466 RETURN insn. */
467 while (GET_CODE (insn) == USE)
468 insn = PREV_INSN (insn);
470 emit_label_after (label, insn);
472 else
474 #ifdef HAVE_epilogue
475 if (HAVE_epilogue
476 #ifdef HAVE_return
477 && ! HAVE_return
478 #endif
480 /* The RETURN insn has its delay slot filled so we cannot
481 emit the label just before it. Since we already have
482 an epilogue and cannot emit a new RETURN, we cannot
483 emit the label at all. */
484 return NULL;
485 #endif /* HAVE_epilogue */
487 /* Otherwise, make a new label and emit a RETURN and BARRIER,
488 if needed. */
489 emit_label (label);
490 #ifdef HAVE_return
491 if (HAVE_return)
493 /* The return we make may have delay slots too. */
494 rtx pat = gen_return ();
495 rtx_insn *insn = emit_jump_insn (pat);
496 set_return_jump_label (insn);
497 emit_barrier ();
498 if (num_delay_slots (insn) > 0)
499 obstack_ptr_grow (&unfilled_slots_obstack, insn);
501 #endif
503 *plabel = label;
506 /* Show one additional use for this label so it won't go away until
507 we are done. */
508 ++LABEL_NUSES (*plabel);
510 return *plabel;
513 /* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace
514 the pattern of INSN with the SEQUENCE.
516 Returns the insn containing the SEQUENCE that replaces INSN. */
518 static rtx_insn *
519 emit_delay_sequence (rtx_insn *insn, rtx_insn_list *list, int length)
521 /* Allocate the rtvec to hold the insns and the SEQUENCE. */
522 rtvec seqv = rtvec_alloc (length + 1);
523 rtx seq = gen_rtx_SEQUENCE (VOIDmode, seqv);
524 rtx_insn *seq_insn = make_insn_raw (seq);
526 /* If DELAY_INSN has a location, use it for SEQ_INSN. If DELAY_INSN does
527 not have a location, but one of the delayed insns does, we pick up a
528 location from there later. */
529 INSN_LOCATION (seq_insn) = INSN_LOCATION (insn);
531 /* Unlink INSN from the insn chain, so that we can put it into
532 the SEQUENCE. Remember where we want to emit SEQUENCE in AFTER. */
533 rtx after = PREV_INSN (insn);
534 remove_insn (insn);
535 SET_NEXT_INSN (insn) = SET_PREV_INSN (insn) = NULL;
537 /* Build our SEQUENCE and rebuild the insn chain. */
538 int i = 1;
539 start_sequence ();
540 XVECEXP (seq, 0, 0) = emit_insn (insn);
541 for (rtx_insn_list *li = list; li; li = li->next (), i++)
543 rtx_insn *tem = li->insn ();
544 rtx note, next;
546 /* Show that this copy of the insn isn't deleted. */
547 tem->set_undeleted ();
549 /* Unlink insn from its original place, and re-emit it into
550 the sequence. */
551 SET_NEXT_INSN (tem) = SET_PREV_INSN (tem) = NULL;
552 XVECEXP (seq, 0, i) = emit_insn (tem);
554 /* SPARC assembler, for instance, emit warning when debug info is output
555 into the delay slot. */
556 if (INSN_LOCATION (tem) && !INSN_LOCATION (seq_insn))
557 INSN_LOCATION (seq_insn) = INSN_LOCATION (tem);
558 INSN_LOCATION (tem) = 0;
560 for (note = REG_NOTES (tem); note; note = next)
562 next = XEXP (note, 1);
563 switch (REG_NOTE_KIND (note))
565 case REG_DEAD:
566 /* Remove any REG_DEAD notes because we can't rely on them now
567 that the insn has been moved. */
568 remove_note (tem, note);
569 break;
571 case REG_LABEL_OPERAND:
572 case REG_LABEL_TARGET:
573 /* Keep the label reference count up to date. */
574 if (LABEL_P (XEXP (note, 0)))
575 LABEL_NUSES (XEXP (note, 0)) ++;
576 break;
578 default:
579 break;
583 end_sequence ();
584 gcc_assert (i == length + 1);
586 /* Splice our SEQUENCE into the insn stream where INSN used to be. */
587 add_insn_after (seq_insn, after, NULL);
589 return seq_insn;
592 /* Add INSN to DELAY_LIST and return the head of the new list. The list must
593 be in the order in which the insns are to be executed. */
595 static rtx_insn_list *
596 add_to_delay_list (rtx_insn *insn, rtx_insn_list *delay_list)
598 /* If we have an empty list, just make a new list element. If
599 INSN has its block number recorded, clear it since we may
600 be moving the insn to a new block. */
602 if (delay_list == 0)
604 clear_hashed_info_for_insn (insn);
605 return gen_rtx_INSN_LIST (VOIDmode, insn, NULL_RTX);
608 /* Otherwise this must be an INSN_LIST. Add INSN to the end of the
609 list. */
610 XEXP (delay_list, 1) = add_to_delay_list (insn, delay_list->next ());
612 return delay_list;
615 /* Delete INSN from the delay slot of the insn that it is in, which may
616 produce an insn with no delay slots. Return the new insn. */
618 static rtx_insn *
619 delete_from_delay_slot (rtx_insn *insn)
621 rtx_insn *trial, *seq_insn, *prev;
622 rtx_sequence *seq;
623 rtx_insn_list *delay_list = 0;
624 int i;
625 int had_barrier = 0;
627 /* We first must find the insn containing the SEQUENCE with INSN in its
628 delay slot. Do this by finding an insn, TRIAL, where
629 PREV_INSN (NEXT_INSN (TRIAL)) != TRIAL. */
631 for (trial = insn;
632 PREV_INSN (NEXT_INSN (trial)) == trial;
633 trial = NEXT_INSN (trial))
636 seq_insn = PREV_INSN (NEXT_INSN (trial));
637 seq = as_a <rtx_sequence *> (PATTERN (seq_insn));
639 if (NEXT_INSN (seq_insn) && BARRIER_P (NEXT_INSN (seq_insn)))
640 had_barrier = 1;
642 /* Create a delay list consisting of all the insns other than the one
643 we are deleting (unless we were the only one). */
644 if (seq->len () > 2)
645 for (i = 1; i < seq->len (); i++)
646 if (seq->insn (i) != insn)
647 delay_list = add_to_delay_list (seq->insn (i), delay_list);
649 /* Delete the old SEQUENCE, re-emit the insn that used to have the delay
650 list, and rebuild the delay list if non-empty. */
651 prev = PREV_INSN (seq_insn);
652 trial = seq->insn (0);
653 delete_related_insns (seq_insn);
654 add_insn_after (trial, prev, NULL);
656 /* If there was a barrier after the old SEQUENCE, remit it. */
657 if (had_barrier)
658 emit_barrier_after (trial);
660 /* If there are any delay insns, remit them. Otherwise clear the
661 annul flag. */
662 if (delay_list)
663 trial = emit_delay_sequence (trial, delay_list, XVECLEN (seq, 0) - 2);
664 else if (JUMP_P (trial))
665 INSN_ANNULLED_BRANCH_P (trial) = 0;
667 INSN_FROM_TARGET_P (insn) = 0;
669 /* Show we need to fill this insn again. */
670 obstack_ptr_grow (&unfilled_slots_obstack, trial);
672 return trial;
675 /* Delete INSN, a JUMP_INSN. If it is a conditional jump, we must track down
676 the insn that sets CC0 for it and delete it too. */
678 static void
679 delete_scheduled_jump (rtx_insn *insn)
681 /* Delete the insn that sets cc0 for us. On machines without cc0, we could
682 delete the insn that sets the condition code, but it is hard to find it.
683 Since this case is rare anyway, don't bother trying; there would likely
684 be other insns that became dead anyway, which we wouldn't know to
685 delete. */
687 #ifdef HAVE_cc0
688 if (reg_mentioned_p (cc0_rtx, insn))
690 rtx note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
692 /* If a reg-note was found, it points to an insn to set CC0. This
693 insn is in the delay list of some other insn. So delete it from
694 the delay list it was in. */
695 if (note)
697 if (! FIND_REG_INC_NOTE (XEXP (note, 0), NULL_RTX)
698 && sets_cc0_p (PATTERN (XEXP (note, 0))) == 1)
699 delete_from_delay_slot (as_a <rtx_insn *> (XEXP (note, 0)));
701 else
703 /* The insn setting CC0 is our previous insn, but it may be in
704 a delay slot. It will be the last insn in the delay slot, if
705 it is. */
706 rtx_insn *trial = previous_insn (insn);
707 if (NOTE_P (trial))
708 trial = prev_nonnote_insn (trial);
709 if (sets_cc0_p (PATTERN (trial)) != 1
710 || FIND_REG_INC_NOTE (trial, NULL_RTX))
711 return;
712 if (PREV_INSN (NEXT_INSN (trial)) == trial)
713 delete_related_insns (trial);
714 else
715 delete_from_delay_slot (trial);
718 #endif
720 delete_related_insns (insn);
723 /* Counters for delay-slot filling. */
725 #define NUM_REORG_FUNCTIONS 2
726 #define MAX_DELAY_HISTOGRAM 3
727 #define MAX_REORG_PASSES 2
729 static int num_insns_needing_delays[NUM_REORG_FUNCTIONS][MAX_REORG_PASSES];
731 static int num_filled_delays[NUM_REORG_FUNCTIONS][MAX_DELAY_HISTOGRAM+1][MAX_REORG_PASSES];
733 static int reorg_pass_number;
735 static void
736 note_delay_statistics (int slots_filled, int index)
738 num_insns_needing_delays[index][reorg_pass_number]++;
739 if (slots_filled > MAX_DELAY_HISTOGRAM)
740 slots_filled = MAX_DELAY_HISTOGRAM;
741 num_filled_delays[index][slots_filled][reorg_pass_number]++;
744 #if defined(ANNUL_IFFALSE_SLOTS) || defined(ANNUL_IFTRUE_SLOTS)
746 /* Optimize the following cases:
748 1. When a conditional branch skips over only one instruction,
749 use an annulling branch and put that insn in the delay slot.
750 Use either a branch that annuls when the condition if true or
751 invert the test with a branch that annuls when the condition is
752 false. This saves insns, since otherwise we must copy an insn
753 from the L1 target.
755 (orig) (skip) (otherwise)
756 Bcc.n L1 Bcc',a L1 Bcc,a L1'
757 insn insn insn2
758 L1: L1: L1:
759 insn2 insn2 insn2
760 insn3 insn3 L1':
761 insn3
763 2. When a conditional branch skips over only one instruction,
764 and after that, it unconditionally branches somewhere else,
765 perform the similar optimization. This saves executing the
766 second branch in the case where the inverted condition is true.
768 Bcc.n L1 Bcc',a L2
769 insn insn
770 L1: L1:
771 Bra L2 Bra L2
773 INSN is a JUMP_INSN.
775 This should be expanded to skip over N insns, where N is the number
776 of delay slots required. */
778 static rtx_insn_list *
779 optimize_skip (rtx_insn *insn)
781 rtx_insn *trial = next_nonnote_insn (insn);
782 rtx_insn *next_trial = next_active_insn (trial);
783 rtx_insn_list *delay_list = 0;
784 int flags;
786 flags = get_jump_flags (insn, JUMP_LABEL (insn));
788 if (trial == 0
789 || !NONJUMP_INSN_P (trial)
790 || GET_CODE (PATTERN (trial)) == SEQUENCE
791 || recog_memoized (trial) < 0
792 || (! eligible_for_annul_false (insn, 0, trial, flags)
793 && ! eligible_for_annul_true (insn, 0, trial, flags))
794 || can_throw_internal (trial))
795 return 0;
797 /* There are two cases where we are just executing one insn (we assume
798 here that a branch requires only one insn; this should be generalized
799 at some point): Where the branch goes around a single insn or where
800 we have one insn followed by a branch to the same label we branch to.
801 In both of these cases, inverting the jump and annulling the delay
802 slot give the same effect in fewer insns. */
803 if (next_trial == next_active_insn (JUMP_LABEL (insn))
804 || (next_trial != 0
805 && simplejump_or_return_p (next_trial)
806 && JUMP_LABEL (insn) == JUMP_LABEL (next_trial)))
808 if (eligible_for_annul_false (insn, 0, trial, flags))
810 if (invert_jump (insn, JUMP_LABEL (insn), 1))
811 INSN_FROM_TARGET_P (trial) = 1;
812 else if (! eligible_for_annul_true (insn, 0, trial, flags))
813 return 0;
816 delay_list = add_to_delay_list (trial, NULL);
817 next_trial = next_active_insn (trial);
818 update_block (trial, trial);
819 delete_related_insns (trial);
821 /* Also, if we are targeting an unconditional
822 branch, thread our jump to the target of that branch. Don't
823 change this into a RETURN here, because it may not accept what
824 we have in the delay slot. We'll fix this up later. */
825 if (next_trial && simplejump_or_return_p (next_trial))
827 rtx target_label = JUMP_LABEL (next_trial);
828 if (ANY_RETURN_P (target_label))
829 target_label = find_end_label (target_label);
831 if (target_label)
833 /* Recompute the flags based on TARGET_LABEL since threading
834 the jump to TARGET_LABEL may change the direction of the
835 jump (which may change the circumstances in which the
836 delay slot is nullified). */
837 flags = get_jump_flags (insn, target_label);
838 if (eligible_for_annul_true (insn, 0, trial, flags))
839 reorg_redirect_jump (insn, target_label);
843 INSN_ANNULLED_BRANCH_P (insn) = 1;
846 return delay_list;
848 #endif
850 /* Encode and return branch direction and prediction information for
851 INSN assuming it will jump to LABEL.
853 Non conditional branches return no direction information and
854 are predicted as very likely taken. */
856 static int
857 get_jump_flags (const rtx_insn *insn, rtx label)
859 int flags;
861 /* get_jump_flags can be passed any insn with delay slots, these may
862 be INSNs, CALL_INSNs, or JUMP_INSNs. Only JUMP_INSNs have branch
863 direction information, and only if they are conditional jumps.
865 If LABEL is a return, then there is no way to determine the branch
866 direction. */
867 if (JUMP_P (insn)
868 && (condjump_p (insn) || condjump_in_parallel_p (insn))
869 && !ANY_RETURN_P (label)
870 && INSN_UID (insn) <= max_uid
871 && INSN_UID (label) <= max_uid)
872 flags
873 = (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)])
874 ? ATTR_FLAG_forward : ATTR_FLAG_backward;
875 /* No valid direction information. */
876 else
877 flags = 0;
879 return flags;
882 /* Return truth value of the statement that this branch
883 is mostly taken. If we think that the branch is extremely likely
884 to be taken, we return 2. If the branch is slightly more likely to be
885 taken, return 1. If the branch is slightly less likely to be taken,
886 return 0 and if the branch is highly unlikely to be taken, return -1. */
888 static int
889 mostly_true_jump (rtx jump_insn)
891 /* If branch probabilities are available, then use that number since it
892 always gives a correct answer. */
893 rtx note = find_reg_note (jump_insn, REG_BR_PROB, 0);
894 if (note)
896 int prob = XINT (note, 0);
898 if (prob >= REG_BR_PROB_BASE * 9 / 10)
899 return 2;
900 else if (prob >= REG_BR_PROB_BASE / 2)
901 return 1;
902 else if (prob >= REG_BR_PROB_BASE / 10)
903 return 0;
904 else
905 return -1;
908 /* If there is no note, assume branches are not taken.
909 This should be rare. */
910 return 0;
913 /* Return the condition under which INSN will branch to TARGET. If TARGET
914 is zero, return the condition under which INSN will return. If INSN is
915 an unconditional branch, return const_true_rtx. If INSN isn't a simple
916 type of jump, or it doesn't go to TARGET, return 0. */
918 static rtx
919 get_branch_condition (const rtx_insn *insn, rtx target)
921 rtx pat = PATTERN (insn);
922 rtx src;
924 if (condjump_in_parallel_p (insn))
925 pat = XVECEXP (pat, 0, 0);
927 if (ANY_RETURN_P (pat) && pat == target)
928 return const_true_rtx;
930 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
931 return 0;
933 src = SET_SRC (pat);
934 if (GET_CODE (src) == LABEL_REF && LABEL_REF_LABEL (src) == target)
935 return const_true_rtx;
937 else if (GET_CODE (src) == IF_THEN_ELSE
938 && XEXP (src, 2) == pc_rtx
939 && ((GET_CODE (XEXP (src, 1)) == LABEL_REF
940 && LABEL_REF_LABEL (XEXP (src, 1)) == target)
941 || (ANY_RETURN_P (XEXP (src, 1)) && XEXP (src, 1) == target)))
942 return XEXP (src, 0);
944 else if (GET_CODE (src) == IF_THEN_ELSE
945 && XEXP (src, 1) == pc_rtx
946 && ((GET_CODE (XEXP (src, 2)) == LABEL_REF
947 && LABEL_REF_LABEL (XEXP (src, 2)) == target)
948 || (ANY_RETURN_P (XEXP (src, 2)) && XEXP (src, 2) == target)))
950 enum rtx_code rev;
951 rev = reversed_comparison_code (XEXP (src, 0), insn);
952 if (rev != UNKNOWN)
953 return gen_rtx_fmt_ee (rev, GET_MODE (XEXP (src, 0)),
954 XEXP (XEXP (src, 0), 0),
955 XEXP (XEXP (src, 0), 1));
958 return 0;
961 /* Return nonzero if CONDITION is more strict than the condition of
962 INSN, i.e., if INSN will always branch if CONDITION is true. */
964 static int
965 condition_dominates_p (rtx condition, const rtx_insn *insn)
967 rtx other_condition = get_branch_condition (insn, JUMP_LABEL (insn));
968 enum rtx_code code = GET_CODE (condition);
969 enum rtx_code other_code;
971 if (rtx_equal_p (condition, other_condition)
972 || other_condition == const_true_rtx)
973 return 1;
975 else if (condition == const_true_rtx || other_condition == 0)
976 return 0;
978 other_code = GET_CODE (other_condition);
979 if (GET_RTX_LENGTH (code) != 2 || GET_RTX_LENGTH (other_code) != 2
980 || ! rtx_equal_p (XEXP (condition, 0), XEXP (other_condition, 0))
981 || ! rtx_equal_p (XEXP (condition, 1), XEXP (other_condition, 1)))
982 return 0;
984 return comparison_dominates_p (code, other_code);
987 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
988 any insns already in the delay slot of JUMP. */
990 static int
991 redirect_with_delay_slots_safe_p (rtx_insn *jump, rtx newlabel, rtx seq)
993 int flags, i;
994 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (seq));
996 /* Make sure all the delay slots of this jump would still
997 be valid after threading the jump. If they are still
998 valid, then return nonzero. */
1000 flags = get_jump_flags (jump, newlabel);
1001 for (i = 1; i < pat->len (); i++)
1002 if (! (
1003 #ifdef ANNUL_IFFALSE_SLOTS
1004 (INSN_ANNULLED_BRANCH_P (jump)
1005 && INSN_FROM_TARGET_P (pat->insn (i)))
1006 ? eligible_for_annul_false (jump, i - 1, pat->insn (i), flags) :
1007 #endif
1008 #ifdef ANNUL_IFTRUE_SLOTS
1009 (INSN_ANNULLED_BRANCH_P (jump)
1010 && ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
1011 ? eligible_for_annul_true (jump, i - 1, pat->insn (i), flags) :
1012 #endif
1013 eligible_for_delay (jump, i - 1, pat->insn (i), flags)))
1014 break;
1016 return (i == pat->len ());
1019 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
1020 any insns we wish to place in the delay slot of JUMP. */
1022 static int
1023 redirect_with_delay_list_safe_p (rtx_insn *jump, rtx newlabel,
1024 rtx_insn_list *delay_list)
1026 int flags, i;
1027 rtx_insn_list *li;
1029 /* Make sure all the insns in DELAY_LIST would still be
1030 valid after threading the jump. If they are still
1031 valid, then return nonzero. */
1033 flags = get_jump_flags (jump, newlabel);
1034 for (li = delay_list, i = 0; li; li = li->next (), i++)
1035 if (! (
1036 #ifdef ANNUL_IFFALSE_SLOTS
1037 (INSN_ANNULLED_BRANCH_P (jump)
1038 && INSN_FROM_TARGET_P (li->insn ()))
1039 ? eligible_for_annul_false (jump, i, li->insn (), flags) :
1040 #endif
1041 #ifdef ANNUL_IFTRUE_SLOTS
1042 (INSN_ANNULLED_BRANCH_P (jump)
1043 && ! INSN_FROM_TARGET_P (XEXP (li, 0)))
1044 ? eligible_for_annul_true (jump, i, li->insn (), flags) :
1045 #endif
1046 eligible_for_delay (jump, i, li->insn (), flags)))
1047 break;
1049 return (li == NULL);
1052 /* DELAY_LIST is a list of insns that have already been placed into delay
1053 slots. See if all of them have the same annulling status as ANNUL_TRUE_P.
1054 If not, return 0; otherwise return 1. */
1056 static int
1057 check_annul_list_true_false (int annul_true_p, rtx delay_list)
1059 rtx temp;
1061 if (delay_list)
1063 for (temp = delay_list; temp; temp = XEXP (temp, 1))
1065 rtx trial = XEXP (temp, 0);
1067 if ((annul_true_p && INSN_FROM_TARGET_P (trial))
1068 || (!annul_true_p && !INSN_FROM_TARGET_P (trial)))
1069 return 0;
1073 return 1;
1076 /* INSN branches to an insn whose pattern SEQ is a SEQUENCE. Given that
1077 the condition tested by INSN is CONDITION and the resources shown in
1078 OTHER_NEEDED are needed after INSN, see whether INSN can take all the insns
1079 from SEQ's delay list, in addition to whatever insns it may execute
1080 (in DELAY_LIST). SETS and NEEDED are denote resources already set and
1081 needed while searching for delay slot insns. Return the concatenated
1082 delay list if possible, otherwise, return 0.
1084 SLOTS_TO_FILL is the total number of slots required by INSN, and
1085 PSLOTS_FILLED points to the number filled so far (also the number of
1086 insns in DELAY_LIST). It is updated with the number that have been
1087 filled from the SEQUENCE, if any.
1089 PANNUL_P points to a nonzero value if we already know that we need
1090 to annul INSN. If this routine determines that annulling is needed,
1091 it may set that value nonzero.
1093 PNEW_THREAD points to a location that is to receive the place at which
1094 execution should continue. */
1096 static rtx_insn_list *
1097 steal_delay_list_from_target (rtx_insn *insn, rtx condition, rtx_sequence *seq,
1098 rtx_insn_list *delay_list, struct resources *sets,
1099 struct resources *needed,
1100 struct resources *other_needed,
1101 int slots_to_fill, int *pslots_filled,
1102 int *pannul_p, rtx *pnew_thread)
1104 int slots_remaining = slots_to_fill - *pslots_filled;
1105 int total_slots_filled = *pslots_filled;
1106 rtx_insn_list *new_delay_list = 0;
1107 int must_annul = *pannul_p;
1108 int used_annul = 0;
1109 int i;
1110 struct resources cc_set;
1111 bool *redundant;
1113 /* We can't do anything if there are more delay slots in SEQ than we
1114 can handle, or if we don't know that it will be a taken branch.
1115 We know that it will be a taken branch if it is either an unconditional
1116 branch or a conditional branch with a stricter branch condition.
1118 Also, exit if the branch has more than one set, since then it is computing
1119 other results that can't be ignored, e.g. the HPPA mov&branch instruction.
1120 ??? It may be possible to move other sets into INSN in addition to
1121 moving the instructions in the delay slots.
1123 We can not steal the delay list if one of the instructions in the
1124 current delay_list modifies the condition codes and the jump in the
1125 sequence is a conditional jump. We can not do this because we can
1126 not change the direction of the jump because the condition codes
1127 will effect the direction of the jump in the sequence. */
1129 CLEAR_RESOURCE (&cc_set);
1130 for (rtx_insn_list *temp = delay_list; temp; temp = temp->next ())
1132 rtx_insn *trial = temp->insn ();
1134 mark_set_resources (trial, &cc_set, 0, MARK_SRC_DEST_CALL);
1135 if (insn_references_resource_p (seq->insn (0), &cc_set, false))
1136 return delay_list;
1139 if (XVECLEN (seq, 0) - 1 > slots_remaining
1140 || ! condition_dominates_p (condition, seq->insn (0))
1141 || ! single_set (seq->insn (0)))
1142 return delay_list;
1144 #ifdef MD_CAN_REDIRECT_BRANCH
1145 /* On some targets, branches with delay slots can have a limited
1146 displacement. Give the back end a chance to tell us we can't do
1147 this. */
1148 if (! MD_CAN_REDIRECT_BRANCH (insn, seq->insn (0)))
1149 return delay_list;
1150 #endif
1152 redundant = XALLOCAVEC (bool, XVECLEN (seq, 0));
1153 for (i = 1; i < seq->len (); i++)
1155 rtx_insn *trial = seq->insn (i);
1156 int flags;
1158 if (insn_references_resource_p (trial, sets, false)
1159 || insn_sets_resource_p (trial, needed, false)
1160 || insn_sets_resource_p (trial, sets, false)
1161 #ifdef HAVE_cc0
1162 /* If TRIAL sets CC0, we can't copy it, so we can't steal this
1163 delay list. */
1164 || find_reg_note (trial, REG_CC_USER, NULL_RTX)
1165 #endif
1166 /* If TRIAL is from the fallthrough code of an annulled branch insn
1167 in SEQ, we cannot use it. */
1168 || (INSN_ANNULLED_BRANCH_P (seq->insn (0))
1169 && ! INSN_FROM_TARGET_P (trial)))
1170 return delay_list;
1172 /* If this insn was already done (usually in a previous delay slot),
1173 pretend we put it in our delay slot. */
1174 redundant[i] = redundant_insn (trial, insn, new_delay_list);
1175 if (redundant[i])
1176 continue;
1178 /* We will end up re-vectoring this branch, so compute flags
1179 based on jumping to the new label. */
1180 flags = get_jump_flags (insn, JUMP_LABEL (seq->insn (0)));
1182 if (! must_annul
1183 && ((condition == const_true_rtx
1184 || (! insn_sets_resource_p (trial, other_needed, false)
1185 && ! may_trap_or_fault_p (PATTERN (trial)))))
1186 ? eligible_for_delay (insn, total_slots_filled, trial, flags)
1187 : (must_annul || (delay_list == NULL && new_delay_list == NULL))
1188 && (must_annul = 1,
1189 check_annul_list_true_false (0, delay_list)
1190 && check_annul_list_true_false (0, new_delay_list)
1191 && eligible_for_annul_false (insn, total_slots_filled,
1192 trial, flags)))
1194 if (must_annul)
1195 used_annul = 1;
1196 rtx_insn *temp = copy_delay_slot_insn (trial);
1197 INSN_FROM_TARGET_P (temp) = 1;
1198 new_delay_list = add_to_delay_list (temp, new_delay_list);
1199 total_slots_filled++;
1201 if (--slots_remaining == 0)
1202 break;
1204 else
1205 return delay_list;
1208 /* Record the effect of the instructions that were redundant and which
1209 we therefore decided not to copy. */
1210 for (i = 1; i < seq->len (); i++)
1211 if (redundant[i])
1212 update_block (seq->insn (i), insn);
1214 /* Show the place to which we will be branching. */
1215 *pnew_thread = first_active_target_insn (JUMP_LABEL (seq->insn (0)));
1217 /* Add any new insns to the delay list and update the count of the
1218 number of slots filled. */
1219 *pslots_filled = total_slots_filled;
1220 if (used_annul)
1221 *pannul_p = 1;
1223 if (delay_list == 0)
1224 return new_delay_list;
1226 for (rtx_insn_list *temp = new_delay_list; temp; temp = temp->next ())
1227 delay_list = add_to_delay_list (temp->insn (), delay_list);
1229 return delay_list;
1232 /* Similar to steal_delay_list_from_target except that SEQ is on the
1233 fallthrough path of INSN. Here we only do something if the delay insn
1234 of SEQ is an unconditional branch. In that case we steal its delay slot
1235 for INSN since unconditional branches are much easier to fill. */
1237 static rtx_insn_list *
1238 steal_delay_list_from_fallthrough (rtx_insn *insn, rtx condition,
1239 rtx_sequence *seq,
1240 rtx_insn_list *delay_list,
1241 struct resources *sets,
1242 struct resources *needed,
1243 struct resources *other_needed,
1244 int slots_to_fill, int *pslots_filled,
1245 int *pannul_p)
1247 int i;
1248 int flags;
1249 int must_annul = *pannul_p;
1250 int used_annul = 0;
1252 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1254 /* We can't do anything if SEQ's delay insn isn't an
1255 unconditional branch. */
1257 if (! simplejump_or_return_p (seq->insn (0)))
1258 return delay_list;
1260 for (i = 1; i < seq->len (); i++)
1262 rtx_insn *trial = seq->insn (i);
1264 /* If TRIAL sets CC0, stealing it will move it too far from the use
1265 of CC0. */
1266 if (insn_references_resource_p (trial, sets, false)
1267 || insn_sets_resource_p (trial, needed, false)
1268 || insn_sets_resource_p (trial, sets, false)
1269 #ifdef HAVE_cc0
1270 || sets_cc0_p (PATTERN (trial))
1271 #endif
1274 break;
1276 /* If this insn was already done, we don't need it. */
1277 if (redundant_insn (trial, insn, delay_list))
1279 update_block (trial, insn);
1280 delete_from_delay_slot (trial);
1281 continue;
1284 if (! must_annul
1285 && ((condition == const_true_rtx
1286 || (! insn_sets_resource_p (trial, other_needed, false)
1287 && ! may_trap_or_fault_p (PATTERN (trial)))))
1288 ? eligible_for_delay (insn, *pslots_filled, trial, flags)
1289 : (must_annul || delay_list == NULL) && (must_annul = 1,
1290 check_annul_list_true_false (1, delay_list)
1291 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
1293 if (must_annul)
1294 used_annul = 1;
1295 delete_from_delay_slot (trial);
1296 delay_list = add_to_delay_list (trial, delay_list);
1298 if (++(*pslots_filled) == slots_to_fill)
1299 break;
1301 else
1302 break;
1305 if (used_annul)
1306 *pannul_p = 1;
1307 return delay_list;
1310 /* Try merging insns starting at THREAD which match exactly the insns in
1311 INSN's delay list.
1313 If all insns were matched and the insn was previously annulling, the
1314 annul bit will be cleared.
1316 For each insn that is merged, if the branch is or will be non-annulling,
1317 we delete the merged insn. */
1319 static void
1320 try_merge_delay_insns (rtx insn, rtx_insn *thread)
1322 rtx_insn *trial, *next_trial;
1323 rtx_insn *delay_insn = as_a <rtx_insn *> (XVECEXP (PATTERN (insn), 0, 0));
1324 int annul_p = JUMP_P (delay_insn) && INSN_ANNULLED_BRANCH_P (delay_insn);
1325 int slot_number = 1;
1326 int num_slots = XVECLEN (PATTERN (insn), 0);
1327 rtx next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1328 struct resources set, needed;
1329 rtx_insn_list *merged_insns = 0;
1330 int i;
1331 int flags;
1333 flags = get_jump_flags (delay_insn, JUMP_LABEL (delay_insn));
1335 CLEAR_RESOURCE (&needed);
1336 CLEAR_RESOURCE (&set);
1338 /* If this is not an annulling branch, take into account anything needed in
1339 INSN's delay slot. This prevents two increments from being incorrectly
1340 folded into one. If we are annulling, this would be the correct
1341 thing to do. (The alternative, looking at things set in NEXT_TO_MATCH
1342 will essentially disable this optimization. This method is somewhat of
1343 a kludge, but I don't see a better way.) */
1344 if (! annul_p)
1345 for (i = 1 ; i < num_slots; i++)
1346 if (XVECEXP (PATTERN (insn), 0, i))
1347 mark_referenced_resources (XVECEXP (PATTERN (insn), 0, i), &needed,
1348 true);
1350 for (trial = thread; !stop_search_p (trial, 1); trial = next_trial)
1352 rtx pat = PATTERN (trial);
1353 rtx oldtrial = trial;
1355 next_trial = next_nonnote_insn (trial);
1357 /* TRIAL must be a CALL_INSN or INSN. Skip USE and CLOBBER. */
1358 if (NONJUMP_INSN_P (trial)
1359 && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER))
1360 continue;
1362 if (GET_CODE (next_to_match) == GET_CODE (trial)
1363 #ifdef HAVE_cc0
1364 /* We can't share an insn that sets cc0. */
1365 && ! sets_cc0_p (pat)
1366 #endif
1367 && ! insn_references_resource_p (trial, &set, true)
1368 && ! insn_sets_resource_p (trial, &set, true)
1369 && ! insn_sets_resource_p (trial, &needed, true)
1370 && (trial = try_split (pat, trial, 0)) != 0
1371 /* Update next_trial, in case try_split succeeded. */
1372 && (next_trial = next_nonnote_insn (trial))
1373 /* Likewise THREAD. */
1374 && (thread = oldtrial == thread ? trial : thread)
1375 && rtx_equal_p (PATTERN (next_to_match), PATTERN (trial))
1376 /* Have to test this condition if annul condition is different
1377 from (and less restrictive than) non-annulling one. */
1378 && eligible_for_delay (delay_insn, slot_number - 1, trial, flags))
1381 if (! annul_p)
1383 update_block (trial, thread);
1384 if (trial == thread)
1385 thread = next_active_insn (thread);
1387 delete_related_insns (trial);
1388 INSN_FROM_TARGET_P (next_to_match) = 0;
1390 else
1391 merged_insns = gen_rtx_INSN_LIST (VOIDmode, trial, merged_insns);
1393 if (++slot_number == num_slots)
1394 break;
1396 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1399 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
1400 mark_referenced_resources (trial, &needed, true);
1403 /* See if we stopped on a filled insn. If we did, try to see if its
1404 delay slots match. */
1405 if (slot_number != num_slots
1406 && trial && NONJUMP_INSN_P (trial)
1407 && GET_CODE (PATTERN (trial)) == SEQUENCE
1408 && !(JUMP_P (XVECEXP (PATTERN (trial), 0, 0))
1409 && INSN_ANNULLED_BRANCH_P (XVECEXP (PATTERN (trial), 0, 0))))
1411 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (trial));
1412 rtx filled_insn = XVECEXP (pat, 0, 0);
1414 /* Account for resources set/needed by the filled insn. */
1415 mark_set_resources (filled_insn, &set, 0, MARK_SRC_DEST_CALL);
1416 mark_referenced_resources (filled_insn, &needed, true);
1418 for (i = 1; i < pat->len (); i++)
1420 rtx_insn *dtrial = pat->insn (i);
1422 if (! insn_references_resource_p (dtrial, &set, true)
1423 && ! insn_sets_resource_p (dtrial, &set, true)
1424 && ! insn_sets_resource_p (dtrial, &needed, true)
1425 #ifdef HAVE_cc0
1426 && ! sets_cc0_p (PATTERN (dtrial))
1427 #endif
1428 && rtx_equal_p (PATTERN (next_to_match), PATTERN (dtrial))
1429 && eligible_for_delay (delay_insn, slot_number - 1, dtrial, flags))
1431 if (! annul_p)
1433 rtx_insn *new_rtx;
1435 update_block (dtrial, thread);
1436 new_rtx = delete_from_delay_slot (dtrial);
1437 if (thread->deleted ())
1438 thread = new_rtx;
1439 INSN_FROM_TARGET_P (next_to_match) = 0;
1441 else
1442 merged_insns = gen_rtx_INSN_LIST (SImode, dtrial,
1443 merged_insns);
1445 if (++slot_number == num_slots)
1446 break;
1448 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1450 else
1452 /* Keep track of the set/referenced resources for the delay
1453 slots of any trial insns we encounter. */
1454 mark_set_resources (dtrial, &set, 0, MARK_SRC_DEST_CALL);
1455 mark_referenced_resources (dtrial, &needed, true);
1460 /* If all insns in the delay slot have been matched and we were previously
1461 annulling the branch, we need not any more. In that case delete all the
1462 merged insns. Also clear the INSN_FROM_TARGET_P bit of each insn in
1463 the delay list so that we know that it isn't only being used at the
1464 target. */
1465 if (slot_number == num_slots && annul_p)
1467 for (; merged_insns; merged_insns = merged_insns->next ())
1469 if (GET_MODE (merged_insns) == SImode)
1471 rtx_insn *new_rtx;
1473 update_block (merged_insns->insn (), thread);
1474 new_rtx = delete_from_delay_slot (merged_insns->insn ());
1475 if (thread->deleted ())
1476 thread = new_rtx;
1478 else
1480 update_block (merged_insns->insn (), thread);
1481 delete_related_insns (merged_insns->insn ());
1485 INSN_ANNULLED_BRANCH_P (delay_insn) = 0;
1487 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1488 INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i)) = 0;
1492 /* See if INSN is redundant with an insn in front of TARGET. Often this
1493 is called when INSN is a candidate for a delay slot of TARGET.
1494 DELAY_LIST are insns that will be placed in delay slots of TARGET in front
1495 of INSN. Often INSN will be redundant with an insn in a delay slot of
1496 some previous insn. This happens when we have a series of branches to the
1497 same label; in that case the first insn at the target might want to go
1498 into each of the delay slots.
1500 If we are not careful, this routine can take up a significant fraction
1501 of the total compilation time (4%), but only wins rarely. Hence we
1502 speed this routine up by making two passes. The first pass goes back
1503 until it hits a label and sees if it finds an insn with an identical
1504 pattern. Only in this (relatively rare) event does it check for
1505 data conflicts.
1507 We do not split insns we encounter. This could cause us not to find a
1508 redundant insn, but the cost of splitting seems greater than the possible
1509 gain in rare cases. */
1511 static rtx
1512 redundant_insn (rtx insn, rtx_insn *target, rtx delay_list)
1514 rtx target_main = target;
1515 rtx ipat = PATTERN (insn);
1516 rtx_insn *trial;
1517 rtx pat;
1518 struct resources needed, set;
1519 int i;
1520 unsigned insns_to_search;
1522 /* If INSN has any REG_UNUSED notes, it can't match anything since we
1523 are allowed to not actually assign to such a register. */
1524 if (find_reg_note (insn, REG_UNUSED, NULL_RTX) != 0)
1525 return 0;
1527 /* Scan backwards looking for a match. */
1528 for (trial = PREV_INSN (target),
1529 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1530 trial && insns_to_search > 0;
1531 trial = PREV_INSN (trial))
1533 /* (use (insn))s can come immediately after a barrier if the
1534 label that used to precede them has been deleted as dead.
1535 See delete_related_insns. */
1536 if (LABEL_P (trial) || BARRIER_P (trial))
1537 return 0;
1539 if (!INSN_P (trial))
1540 continue;
1541 --insns_to_search;
1543 pat = PATTERN (trial);
1544 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1545 continue;
1547 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1549 /* Stop for a CALL and its delay slots because it is difficult to
1550 track its resource needs correctly. */
1551 if (CALL_P (seq->element (0)))
1552 return 0;
1554 /* Stop for an INSN or JUMP_INSN with delayed effects and its delay
1555 slots because it is difficult to track its resource needs
1556 correctly. */
1558 #ifdef INSN_SETS_ARE_DELAYED
1559 if (INSN_SETS_ARE_DELAYED (seq->insn (0)))
1560 return 0;
1561 #endif
1563 #ifdef INSN_REFERENCES_ARE_DELAYED
1564 if (INSN_REFERENCES_ARE_DELAYED (seq->insn (0)))
1565 return 0;
1566 #endif
1568 /* See if any of the insns in the delay slot match, updating
1569 resource requirements as we go. */
1570 for (i = seq->len () - 1; i > 0; i--)
1571 if (GET_CODE (seq->element (i)) == GET_CODE (insn)
1572 && rtx_equal_p (PATTERN (seq->element (i)), ipat)
1573 && ! find_reg_note (seq->element (i), REG_UNUSED, NULL_RTX))
1574 break;
1576 /* If found a match, exit this loop early. */
1577 if (i > 0)
1578 break;
1581 else if (GET_CODE (trial) == GET_CODE (insn) && rtx_equal_p (pat, ipat)
1582 && ! find_reg_note (trial, REG_UNUSED, NULL_RTX))
1583 break;
1586 /* If we didn't find an insn that matches, return 0. */
1587 if (trial == 0)
1588 return 0;
1590 /* See what resources this insn sets and needs. If they overlap, or
1591 if this insn references CC0, it can't be redundant. */
1593 CLEAR_RESOURCE (&needed);
1594 CLEAR_RESOURCE (&set);
1595 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
1596 mark_referenced_resources (insn, &needed, true);
1598 /* If TARGET is a SEQUENCE, get the main insn. */
1599 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1600 target_main = XVECEXP (PATTERN (target), 0, 0);
1602 if (resource_conflicts_p (&needed, &set)
1603 #ifdef HAVE_cc0
1604 || reg_mentioned_p (cc0_rtx, ipat)
1605 #endif
1606 /* The insn requiring the delay may not set anything needed or set by
1607 INSN. */
1608 || insn_sets_resource_p (target_main, &needed, true)
1609 || insn_sets_resource_p (target_main, &set, true))
1610 return 0;
1612 /* Insns we pass may not set either NEEDED or SET, so merge them for
1613 simpler tests. */
1614 needed.memory |= set.memory;
1615 IOR_HARD_REG_SET (needed.regs, set.regs);
1617 /* This insn isn't redundant if it conflicts with an insn that either is
1618 or will be in a delay slot of TARGET. */
1620 while (delay_list)
1622 if (insn_sets_resource_p (XEXP (delay_list, 0), &needed, true))
1623 return 0;
1624 delay_list = XEXP (delay_list, 1);
1627 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1628 for (i = 1; i < XVECLEN (PATTERN (target), 0); i++)
1629 if (insn_sets_resource_p (XVECEXP (PATTERN (target), 0, i), &needed,
1630 true))
1631 return 0;
1633 /* Scan backwards until we reach a label or an insn that uses something
1634 INSN sets or sets something insn uses or sets. */
1636 for (trial = PREV_INSN (target),
1637 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1638 trial && !LABEL_P (trial) && insns_to_search > 0;
1639 trial = PREV_INSN (trial))
1641 if (!INSN_P (trial))
1642 continue;
1643 --insns_to_search;
1645 pat = PATTERN (trial);
1646 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1647 continue;
1649 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1651 bool annul_p = false;
1652 rtx_insn *control = seq->insn (0);
1654 /* If this is a CALL_INSN and its delay slots, it is hard to track
1655 the resource needs properly, so give up. */
1656 if (CALL_P (control))
1657 return 0;
1659 /* If this is an INSN or JUMP_INSN with delayed effects, it
1660 is hard to track the resource needs properly, so give up. */
1662 #ifdef INSN_SETS_ARE_DELAYED
1663 if (INSN_SETS_ARE_DELAYED (control))
1664 return 0;
1665 #endif
1667 #ifdef INSN_REFERENCES_ARE_DELAYED
1668 if (INSN_REFERENCES_ARE_DELAYED (control))
1669 return 0;
1670 #endif
1672 if (JUMP_P (control))
1673 annul_p = INSN_ANNULLED_BRANCH_P (control);
1675 /* See if any of the insns in the delay slot match, updating
1676 resource requirements as we go. */
1677 for (i = seq->len () - 1; i > 0; i--)
1679 rtx candidate = seq->element (i);
1681 /* If an insn will be annulled if the branch is false, it isn't
1682 considered as a possible duplicate insn. */
1683 if (rtx_equal_p (PATTERN (candidate), ipat)
1684 && ! (annul_p && INSN_FROM_TARGET_P (candidate)))
1686 /* Show that this insn will be used in the sequel. */
1687 INSN_FROM_TARGET_P (candidate) = 0;
1688 return candidate;
1691 /* Unless this is an annulled insn from the target of a branch,
1692 we must stop if it sets anything needed or set by INSN. */
1693 if ((!annul_p || !INSN_FROM_TARGET_P (candidate))
1694 && insn_sets_resource_p (candidate, &needed, true))
1695 return 0;
1698 /* If the insn requiring the delay slot conflicts with INSN, we
1699 must stop. */
1700 if (insn_sets_resource_p (control, &needed, true))
1701 return 0;
1703 else
1705 /* See if TRIAL is the same as INSN. */
1706 pat = PATTERN (trial);
1707 if (rtx_equal_p (pat, ipat))
1708 return trial;
1710 /* Can't go any further if TRIAL conflicts with INSN. */
1711 if (insn_sets_resource_p (trial, &needed, true))
1712 return 0;
1716 return 0;
1719 /* Return 1 if THREAD can only be executed in one way. If LABEL is nonzero,
1720 it is the target of the branch insn being scanned. If ALLOW_FALLTHROUGH
1721 is nonzero, we are allowed to fall into this thread; otherwise, we are
1722 not.
1724 If LABEL is used more than one or we pass a label other than LABEL before
1725 finding an active insn, we do not own this thread. */
1727 static int
1728 own_thread_p (rtx thread, rtx label, int allow_fallthrough)
1730 rtx_insn *active_insn;
1731 rtx_insn *insn;
1733 /* We don't own the function end. */
1734 if (thread == 0 || ANY_RETURN_P (thread))
1735 return 0;
1737 /* We have a non-NULL insn. */
1738 rtx_insn *thread_insn = as_a <rtx_insn *> (thread);
1740 /* Get the first active insn, or THREAD_INSN, if it is an active insn. */
1741 active_insn = next_active_insn (PREV_INSN (thread_insn));
1743 for (insn = thread_insn; insn != active_insn; insn = NEXT_INSN (insn))
1744 if (LABEL_P (insn)
1745 && (insn != label || LABEL_NUSES (insn) != 1))
1746 return 0;
1748 if (allow_fallthrough)
1749 return 1;
1751 /* Ensure that we reach a BARRIER before any insn or label. */
1752 for (insn = prev_nonnote_insn (thread_insn);
1753 insn == 0 || !BARRIER_P (insn);
1754 insn = prev_nonnote_insn (insn))
1755 if (insn == 0
1756 || LABEL_P (insn)
1757 || (NONJUMP_INSN_P (insn)
1758 && GET_CODE (PATTERN (insn)) != USE
1759 && GET_CODE (PATTERN (insn)) != CLOBBER))
1760 return 0;
1762 return 1;
1765 /* Called when INSN is being moved from a location near the target of a jump.
1766 We leave a marker of the form (use (INSN)) immediately in front
1767 of WHERE for mark_target_live_regs. These markers will be deleted when
1768 reorg finishes.
1770 We used to try to update the live status of registers if WHERE is at
1771 the start of a basic block, but that can't work since we may remove a
1772 BARRIER in relax_delay_slots. */
1774 static void
1775 update_block (rtx_insn *insn, rtx where)
1777 /* Ignore if this was in a delay slot and it came from the target of
1778 a branch. */
1779 if (INSN_FROM_TARGET_P (insn))
1780 return;
1782 emit_insn_before (gen_rtx_USE (VOIDmode, insn), where);
1784 /* INSN might be making a value live in a block where it didn't use to
1785 be. So recompute liveness information for this block. */
1787 incr_ticks_for_insn (insn);
1790 /* Similar to REDIRECT_JUMP except that we update the BB_TICKS entry for
1791 the basic block containing the jump. */
1793 static int
1794 reorg_redirect_jump (rtx_insn *jump, rtx nlabel)
1796 incr_ticks_for_insn (jump);
1797 return redirect_jump (jump, nlabel, 1);
1800 /* Called when INSN is being moved forward into a delay slot of DELAYED_INSN.
1801 We check every instruction between INSN and DELAYED_INSN for REG_DEAD notes
1802 that reference values used in INSN. If we find one, then we move the
1803 REG_DEAD note to INSN.
1805 This is needed to handle the case where a later insn (after INSN) has a
1806 REG_DEAD note for a register used by INSN, and this later insn subsequently
1807 gets moved before a CODE_LABEL because it is a redundant insn. In this
1808 case, mark_target_live_regs may be confused into thinking the register
1809 is dead because it sees a REG_DEAD note immediately before a CODE_LABEL. */
1811 static void
1812 update_reg_dead_notes (rtx insn, rtx delayed_insn)
1814 rtx p, link, next;
1816 for (p = next_nonnote_insn (insn); p != delayed_insn;
1817 p = next_nonnote_insn (p))
1818 for (link = REG_NOTES (p); link; link = next)
1820 next = XEXP (link, 1);
1822 if (REG_NOTE_KIND (link) != REG_DEAD
1823 || !REG_P (XEXP (link, 0)))
1824 continue;
1826 if (reg_referenced_p (XEXP (link, 0), PATTERN (insn)))
1828 /* Move the REG_DEAD note from P to INSN. */
1829 remove_note (p, link);
1830 XEXP (link, 1) = REG_NOTES (insn);
1831 REG_NOTES (insn) = link;
1836 /* Called when an insn redundant with start_insn is deleted. If there
1837 is a REG_DEAD note for the target of start_insn between start_insn
1838 and stop_insn, then the REG_DEAD note needs to be deleted since the
1839 value no longer dies there.
1841 If the REG_DEAD note isn't deleted, then mark_target_live_regs may be
1842 confused into thinking the register is dead. */
1844 static void
1845 fix_reg_dead_note (rtx start_insn, rtx stop_insn)
1847 rtx p, link, next;
1849 for (p = next_nonnote_insn (start_insn); p != stop_insn;
1850 p = next_nonnote_insn (p))
1851 for (link = REG_NOTES (p); link; link = next)
1853 next = XEXP (link, 1);
1855 if (REG_NOTE_KIND (link) != REG_DEAD
1856 || !REG_P (XEXP (link, 0)))
1857 continue;
1859 if (reg_set_p (XEXP (link, 0), PATTERN (start_insn)))
1861 remove_note (p, link);
1862 return;
1867 /* Delete any REG_UNUSED notes that exist on INSN but not on REDUNDANT_INSN.
1869 This handles the case of udivmodXi4 instructions which optimize their
1870 output depending on whether any REG_UNUSED notes are present.
1871 we must make sure that INSN calculates as many results as REDUNDANT_INSN
1872 does. */
1874 static void
1875 update_reg_unused_notes (rtx insn, rtx redundant_insn)
1877 rtx link, next;
1879 for (link = REG_NOTES (insn); link; link = next)
1881 next = XEXP (link, 1);
1883 if (REG_NOTE_KIND (link) != REG_UNUSED
1884 || !REG_P (XEXP (link, 0)))
1885 continue;
1887 if (! find_regno_note (redundant_insn, REG_UNUSED,
1888 REGNO (XEXP (link, 0))))
1889 remove_note (insn, link);
1893 static vec <rtx> sibling_labels;
1895 /* Return the label before INSN, or put a new label there. If SIBLING is
1896 non-zero, it is another label associated with the new label (if any),
1897 typically the former target of the jump that will be redirected to
1898 the new label. */
1900 static rtx_insn *
1901 get_label_before (rtx_insn *insn, rtx sibling)
1903 rtx_insn *label;
1905 /* Find an existing label at this point
1906 or make a new one if there is none. */
1907 label = prev_nonnote_insn (insn);
1909 if (label == 0 || !LABEL_P (label))
1911 rtx_insn *prev = PREV_INSN (insn);
1913 label = gen_label_rtx ();
1914 emit_label_after (label, prev);
1915 LABEL_NUSES (label) = 0;
1916 if (sibling)
1918 sibling_labels.safe_push (label);
1919 sibling_labels.safe_push (sibling);
1922 return label;
1925 /* Scan a function looking for insns that need a delay slot and find insns to
1926 put into the delay slot.
1928 NON_JUMPS_P is nonzero if we are to only try to fill non-jump insns (such
1929 as calls). We do these first since we don't want jump insns (that are
1930 easier to fill) to get the only insns that could be used for non-jump insns.
1931 When it is zero, only try to fill JUMP_INSNs.
1933 When slots are filled in this manner, the insns (including the
1934 delay_insn) are put together in a SEQUENCE rtx. In this fashion,
1935 it is possible to tell whether a delay slot has really been filled
1936 or not. `final' knows how to deal with this, by communicating
1937 through FINAL_SEQUENCE. */
1939 static void
1940 fill_simple_delay_slots (int non_jumps_p)
1942 rtx_insn *insn, *trial, *next_trial;
1943 rtx pat;
1944 int i;
1945 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
1946 struct resources needed, set;
1947 int slots_to_fill, slots_filled;
1948 rtx_insn_list *delay_list;
1950 for (i = 0; i < num_unfilled_slots; i++)
1952 int flags;
1953 /* Get the next insn to fill. If it has already had any slots assigned,
1954 we can't do anything with it. Maybe we'll improve this later. */
1956 insn = unfilled_slots_base[i];
1957 if (insn == 0
1958 || insn->deleted ()
1959 || (NONJUMP_INSN_P (insn)
1960 && GET_CODE (PATTERN (insn)) == SEQUENCE)
1961 || (JUMP_P (insn) && non_jumps_p)
1962 || (!JUMP_P (insn) && ! non_jumps_p))
1963 continue;
1965 /* It may have been that this insn used to need delay slots, but
1966 now doesn't; ignore in that case. This can happen, for example,
1967 on the HP PA RISC, where the number of delay slots depends on
1968 what insns are nearby. */
1969 slots_to_fill = num_delay_slots (insn);
1971 /* Some machine description have defined instructions to have
1972 delay slots only in certain circumstances which may depend on
1973 nearby insns (which change due to reorg's actions).
1975 For example, the PA port normally has delay slots for unconditional
1976 jumps.
1978 However, the PA port claims such jumps do not have a delay slot
1979 if they are immediate successors of certain CALL_INSNs. This
1980 allows the port to favor filling the delay slot of the call with
1981 the unconditional jump. */
1982 if (slots_to_fill == 0)
1983 continue;
1985 /* This insn needs, or can use, some delay slots. SLOTS_TO_FILL
1986 says how many. After initialization, first try optimizing
1988 call _foo call _foo
1989 nop add %o7,.-L1,%o7
1990 b,a L1
1993 If this case applies, the delay slot of the call is filled with
1994 the unconditional jump. This is done first to avoid having the
1995 delay slot of the call filled in the backward scan. Also, since
1996 the unconditional jump is likely to also have a delay slot, that
1997 insn must exist when it is subsequently scanned.
1999 This is tried on each insn with delay slots as some machines
2000 have insns which perform calls, but are not represented as
2001 CALL_INSNs. */
2003 slots_filled = 0;
2004 delay_list = 0;
2006 if (JUMP_P (insn))
2007 flags = get_jump_flags (insn, JUMP_LABEL (insn));
2008 else
2009 flags = get_jump_flags (insn, NULL_RTX);
2011 if ((trial = next_active_insn (insn))
2012 && JUMP_P (trial)
2013 && simplejump_p (trial)
2014 && eligible_for_delay (insn, slots_filled, trial, flags)
2015 && no_labels_between_p (insn, trial)
2016 && ! can_throw_internal (trial))
2018 rtx_insn **tmp;
2019 slots_filled++;
2020 delay_list = add_to_delay_list (trial, delay_list);
2022 /* TRIAL may have had its delay slot filled, then unfilled. When
2023 the delay slot is unfilled, TRIAL is placed back on the unfilled
2024 slots obstack. Unfortunately, it is placed on the end of the
2025 obstack, not in its original location. Therefore, we must search
2026 from entry i + 1 to the end of the unfilled slots obstack to
2027 try and find TRIAL. */
2028 tmp = &unfilled_slots_base[i + 1];
2029 while (*tmp != trial && tmp != unfilled_slots_next)
2030 tmp++;
2032 /* Remove the unconditional jump from consideration for delay slot
2033 filling and unthread it. */
2034 if (*tmp == trial)
2035 *tmp = 0;
2037 rtx_insn *next = NEXT_INSN (trial);
2038 rtx_insn *prev = PREV_INSN (trial);
2039 if (prev)
2040 SET_NEXT_INSN (prev) = next;
2041 if (next)
2042 SET_PREV_INSN (next) = prev;
2046 /* Now, scan backwards from the insn to search for a potential
2047 delay-slot candidate. Stop searching when a label or jump is hit.
2049 For each candidate, if it is to go into the delay slot (moved
2050 forward in execution sequence), it must not need or set any resources
2051 that were set by later insns and must not set any resources that
2052 are needed for those insns.
2054 The delay slot insn itself sets resources unless it is a call
2055 (in which case the called routine, not the insn itself, is doing
2056 the setting). */
2058 if (slots_filled < slots_to_fill)
2060 CLEAR_RESOURCE (&needed);
2061 CLEAR_RESOURCE (&set);
2062 mark_set_resources (insn, &set, 0, MARK_SRC_DEST);
2063 mark_referenced_resources (insn, &needed, false);
2065 for (trial = prev_nonnote_insn (insn); ! stop_search_p (trial, 1);
2066 trial = next_trial)
2068 next_trial = prev_nonnote_insn (trial);
2070 /* This must be an INSN or CALL_INSN. */
2071 pat = PATTERN (trial);
2073 /* Stand-alone USE and CLOBBER are just for flow. */
2074 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2075 continue;
2077 /* Check for resource conflict first, to avoid unnecessary
2078 splitting. */
2079 if (! insn_references_resource_p (trial, &set, true)
2080 && ! insn_sets_resource_p (trial, &set, true)
2081 && ! insn_sets_resource_p (trial, &needed, true)
2082 #ifdef HAVE_cc0
2083 /* Can't separate set of cc0 from its use. */
2084 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat))
2085 #endif
2086 && ! can_throw_internal (trial))
2088 trial = try_split (pat, trial, 1);
2089 next_trial = prev_nonnote_insn (trial);
2090 if (eligible_for_delay (insn, slots_filled, trial, flags))
2092 /* In this case, we are searching backward, so if we
2093 find insns to put on the delay list, we want
2094 to put them at the head, rather than the
2095 tail, of the list. */
2097 update_reg_dead_notes (trial, insn);
2098 delay_list = gen_rtx_INSN_LIST (VOIDmode,
2099 trial, delay_list);
2100 update_block (trial, trial);
2101 delete_related_insns (trial);
2102 if (slots_to_fill == ++slots_filled)
2103 break;
2104 continue;
2108 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2109 mark_referenced_resources (trial, &needed, true);
2113 /* If all needed slots haven't been filled, we come here. */
2115 /* Try to optimize case of jumping around a single insn. */
2116 #if defined(ANNUL_IFFALSE_SLOTS) || defined(ANNUL_IFTRUE_SLOTS)
2117 if (slots_filled != slots_to_fill
2118 && delay_list == 0
2119 && JUMP_P (insn)
2120 && (condjump_p (insn) || condjump_in_parallel_p (insn))
2121 && !ANY_RETURN_P (JUMP_LABEL (insn)))
2123 delay_list = optimize_skip (insn);
2124 if (delay_list)
2125 slots_filled += 1;
2127 #endif
2129 /* Try to get insns from beyond the insn needing the delay slot.
2130 These insns can neither set or reference resources set in insns being
2131 skipped, cannot set resources in the insn being skipped, and, if this
2132 is a CALL_INSN (or a CALL_INSN is passed), cannot trap (because the
2133 call might not return).
2135 There used to be code which continued past the target label if
2136 we saw all uses of the target label. This code did not work,
2137 because it failed to account for some instructions which were
2138 both annulled and marked as from the target. This can happen as a
2139 result of optimize_skip. Since this code was redundant with
2140 fill_eager_delay_slots anyways, it was just deleted. */
2142 if (slots_filled != slots_to_fill
2143 /* If this instruction could throw an exception which is
2144 caught in the same function, then it's not safe to fill
2145 the delay slot with an instruction from beyond this
2146 point. For example, consider:
2148 int i = 2;
2150 try {
2151 f();
2152 i = 3;
2153 } catch (...) {}
2155 return i;
2157 Even though `i' is a local variable, we must be sure not
2158 to put `i = 3' in the delay slot if `f' might throw an
2159 exception.
2161 Presumably, we should also check to see if we could get
2162 back to this function via `setjmp'. */
2163 && ! can_throw_internal (insn)
2164 && !JUMP_P (insn))
2166 int maybe_never = 0;
2167 rtx pat, trial_delay;
2169 CLEAR_RESOURCE (&needed);
2170 CLEAR_RESOURCE (&set);
2171 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
2172 mark_referenced_resources (insn, &needed, true);
2174 if (CALL_P (insn))
2175 maybe_never = 1;
2177 for (trial = next_nonnote_insn (insn); !stop_search_p (trial, 1);
2178 trial = next_trial)
2180 next_trial = next_nonnote_insn (trial);
2182 /* This must be an INSN or CALL_INSN. */
2183 pat = PATTERN (trial);
2185 /* Stand-alone USE and CLOBBER are just for flow. */
2186 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2187 continue;
2189 /* If this already has filled delay slots, get the insn needing
2190 the delay slots. */
2191 if (GET_CODE (pat) == SEQUENCE)
2192 trial_delay = XVECEXP (pat, 0, 0);
2193 else
2194 trial_delay = trial;
2196 /* Stop our search when seeing a jump. */
2197 if (JUMP_P (trial_delay))
2198 break;
2200 /* See if we have a resource problem before we try to split. */
2201 if (GET_CODE (pat) != SEQUENCE
2202 && ! insn_references_resource_p (trial, &set, true)
2203 && ! insn_sets_resource_p (trial, &set, true)
2204 && ! insn_sets_resource_p (trial, &needed, true)
2205 #ifdef HAVE_cc0
2206 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat))
2207 #endif
2208 && ! (maybe_never && may_trap_or_fault_p (pat))
2209 && (trial = try_split (pat, trial, 0))
2210 && eligible_for_delay (insn, slots_filled, trial, flags)
2211 && ! can_throw_internal (trial))
2213 next_trial = next_nonnote_insn (trial);
2214 delay_list = add_to_delay_list (trial, delay_list);
2215 #ifdef HAVE_cc0
2216 if (reg_mentioned_p (cc0_rtx, pat))
2217 link_cc0_insns (trial);
2218 #endif
2219 delete_related_insns (trial);
2220 if (slots_to_fill == ++slots_filled)
2221 break;
2222 continue;
2225 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2226 mark_referenced_resources (trial, &needed, true);
2228 /* Ensure we don't put insns between the setting of cc and the
2229 comparison by moving a setting of cc into an earlier delay
2230 slot since these insns could clobber the condition code. */
2231 set.cc = 1;
2233 /* If this is a call, we might not get here. */
2234 if (CALL_P (trial_delay))
2235 maybe_never = 1;
2238 /* If there are slots left to fill and our search was stopped by an
2239 unconditional branch, try the insn at the branch target. We can
2240 redirect the branch if it works.
2242 Don't do this if the insn at the branch target is a branch. */
2243 if (slots_to_fill != slots_filled
2244 && trial
2245 && jump_to_label_p (trial)
2246 && simplejump_p (trial)
2247 && (next_trial = next_active_insn (JUMP_LABEL (trial))) != 0
2248 && ! (NONJUMP_INSN_P (next_trial)
2249 && GET_CODE (PATTERN (next_trial)) == SEQUENCE)
2250 && !JUMP_P (next_trial)
2251 && ! insn_references_resource_p (next_trial, &set, true)
2252 && ! insn_sets_resource_p (next_trial, &set, true)
2253 && ! insn_sets_resource_p (next_trial, &needed, true)
2254 #ifdef HAVE_cc0
2255 && ! reg_mentioned_p (cc0_rtx, PATTERN (next_trial))
2256 #endif
2257 && ! (maybe_never && may_trap_or_fault_p (PATTERN (next_trial)))
2258 && (next_trial = try_split (PATTERN (next_trial), next_trial, 0))
2259 && eligible_for_delay (insn, slots_filled, next_trial, flags)
2260 && ! can_throw_internal (trial))
2262 /* See comment in relax_delay_slots about necessity of using
2263 next_real_insn here. */
2264 rtx_insn *new_label = next_real_insn (next_trial);
2266 if (new_label != 0)
2267 new_label = get_label_before (new_label, JUMP_LABEL (trial));
2268 else
2269 new_label = find_end_label (simple_return_rtx);
2271 if (new_label)
2273 delay_list
2274 = add_to_delay_list (copy_delay_slot_insn (next_trial),
2275 delay_list);
2276 slots_filled++;
2277 reorg_redirect_jump (trial, new_label);
2282 /* If this is an unconditional jump, then try to get insns from the
2283 target of the jump. */
2284 if (JUMP_P (insn)
2285 && simplejump_p (insn)
2286 && slots_filled != slots_to_fill)
2287 delay_list
2288 = fill_slots_from_thread (insn, const_true_rtx,
2289 next_active_insn (JUMP_LABEL (insn)),
2290 NULL, 1, 1,
2291 own_thread_p (JUMP_LABEL (insn),
2292 JUMP_LABEL (insn), 0),
2293 slots_to_fill, &slots_filled,
2294 delay_list);
2296 if (delay_list)
2297 unfilled_slots_base[i]
2298 = emit_delay_sequence (insn, delay_list, slots_filled);
2300 if (slots_to_fill == slots_filled)
2301 unfilled_slots_base[i] = 0;
2303 note_delay_statistics (slots_filled, 0);
2307 /* Follow any unconditional jump at LABEL, for the purpose of redirecting JUMP;
2308 return the ultimate label reached by any such chain of jumps.
2309 Return a suitable return rtx if the chain ultimately leads to a
2310 return instruction.
2311 If LABEL is not followed by a jump, return LABEL.
2312 If the chain loops or we can't find end, return LABEL,
2313 since that tells caller to avoid changing the insn.
2314 If the returned label is obtained by following a crossing jump,
2315 set *CROSSING to true, otherwise set it to false. */
2317 static rtx
2318 follow_jumps (rtx label, rtx_insn *jump, bool *crossing)
2320 rtx_insn *insn;
2321 rtx_insn *next;
2322 int depth;
2324 *crossing = false;
2325 if (ANY_RETURN_P (label))
2326 return label;
2328 rtx_insn *value = as_a <rtx_insn *> (label);
2330 for (depth = 0;
2331 (depth < 10
2332 && (insn = next_active_insn (value)) != 0
2333 && JUMP_P (insn)
2334 && JUMP_LABEL (insn) != NULL_RTX
2335 && ((any_uncondjump_p (insn) && onlyjump_p (insn))
2336 || ANY_RETURN_P (PATTERN (insn)))
2337 && (next = NEXT_INSN (insn))
2338 && BARRIER_P (next));
2339 depth++)
2341 rtx this_label_or_return = JUMP_LABEL (insn);
2343 /* If we have found a cycle, make the insn jump to itself. */
2344 if (this_label_or_return == label)
2345 return label;
2347 /* Cannot follow returns and cannot look through tablejumps. */
2348 if (ANY_RETURN_P (this_label_or_return))
2349 return this_label_or_return;
2351 rtx_insn *this_label = as_a <rtx_insn *> (this_label_or_return);
2352 if (NEXT_INSN (this_label)
2353 && JUMP_TABLE_DATA_P (NEXT_INSN (this_label)))
2354 break;
2356 if (!targetm.can_follow_jump (jump, insn))
2357 break;
2358 if (!*crossing)
2359 *crossing = CROSSING_JUMP_P (jump);
2360 value = this_label;
2362 if (depth == 10)
2363 return label;
2364 return value;
2367 /* Try to find insns to place in delay slots.
2369 INSN is the jump needing SLOTS_TO_FILL delay slots. It tests CONDITION
2370 or is an unconditional branch if CONDITION is const_true_rtx.
2371 *PSLOTS_FILLED is updated with the number of slots that we have filled.
2373 THREAD is a flow-of-control, either the insns to be executed if the
2374 branch is true or if the branch is false, THREAD_IF_TRUE says which.
2376 OPPOSITE_THREAD is the thread in the opposite direction. It is used
2377 to see if any potential delay slot insns set things needed there.
2379 LIKELY is nonzero if it is extremely likely that the branch will be
2380 taken and THREAD_IF_TRUE is set. This is used for the branch at the
2381 end of a loop back up to the top.
2383 OWN_THREAD and OWN_OPPOSITE_THREAD are true if we are the only user of the
2384 thread. I.e., it is the fallthrough code of our jump or the target of the
2385 jump when we are the only jump going there.
2387 If OWN_THREAD is false, it must be the "true" thread of a jump. In that
2388 case, we can only take insns from the head of the thread for our delay
2389 slot. We then adjust the jump to point after the insns we have taken. */
2391 static rtx_insn_list *
2392 fill_slots_from_thread (rtx_insn *insn, rtx condition, rtx thread_or_return,
2393 rtx opposite_thread, int likely,
2394 int thread_if_true,
2395 int own_thread, int slots_to_fill,
2396 int *pslots_filled, rtx_insn_list *delay_list)
2398 rtx new_thread;
2399 struct resources opposite_needed, set, needed;
2400 rtx_insn *trial;
2401 int lose = 0;
2402 int must_annul = 0;
2403 int flags;
2405 /* Validate our arguments. */
2406 gcc_assert (condition != const_true_rtx || thread_if_true);
2407 gcc_assert (own_thread || thread_if_true);
2409 flags = get_jump_flags (insn, JUMP_LABEL (insn));
2411 /* If our thread is the end of subroutine, we can't get any delay
2412 insns from that. */
2413 if (thread_or_return == NULL_RTX || ANY_RETURN_P (thread_or_return))
2414 return delay_list;
2416 rtx_insn *thread = as_a <rtx_insn *> (thread_or_return);
2418 /* If this is an unconditional branch, nothing is needed at the
2419 opposite thread. Otherwise, compute what is needed there. */
2420 if (condition == const_true_rtx)
2421 CLEAR_RESOURCE (&opposite_needed);
2422 else
2423 mark_target_live_regs (get_insns (), opposite_thread, &opposite_needed);
2425 /* If the insn at THREAD can be split, do it here to avoid having to
2426 update THREAD and NEW_THREAD if it is done in the loop below. Also
2427 initialize NEW_THREAD. */
2429 new_thread = thread = try_split (PATTERN (thread), thread, 0);
2431 /* Scan insns at THREAD. We are looking for an insn that can be removed
2432 from THREAD (it neither sets nor references resources that were set
2433 ahead of it and it doesn't set anything needs by the insns ahead of
2434 it) and that either can be placed in an annulling insn or aren't
2435 needed at OPPOSITE_THREAD. */
2437 CLEAR_RESOURCE (&needed);
2438 CLEAR_RESOURCE (&set);
2440 /* If we do not own this thread, we must stop as soon as we find
2441 something that we can't put in a delay slot, since all we can do
2442 is branch into THREAD at a later point. Therefore, labels stop
2443 the search if this is not the `true' thread. */
2445 for (trial = thread;
2446 ! stop_search_p (trial, ! thread_if_true) && (! lose || own_thread);
2447 trial = next_nonnote_insn (trial))
2449 rtx pat, old_trial;
2451 /* If we have passed a label, we no longer own this thread. */
2452 if (LABEL_P (trial))
2454 own_thread = 0;
2455 continue;
2458 pat = PATTERN (trial);
2459 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2460 continue;
2462 /* If TRIAL conflicts with the insns ahead of it, we lose. Also,
2463 don't separate or copy insns that set and use CC0. */
2464 if (! insn_references_resource_p (trial, &set, true)
2465 && ! insn_sets_resource_p (trial, &set, true)
2466 && ! insn_sets_resource_p (trial, &needed, true)
2467 #ifdef HAVE_cc0
2468 && ! (reg_mentioned_p (cc0_rtx, pat)
2469 && (! own_thread || ! sets_cc0_p (pat)))
2470 #endif
2471 && ! can_throw_internal (trial))
2473 rtx prior_insn;
2475 /* If TRIAL is redundant with some insn before INSN, we don't
2476 actually need to add it to the delay list; we can merely pretend
2477 we did. */
2478 if ((prior_insn = redundant_insn (trial, insn, delay_list)))
2480 fix_reg_dead_note (prior_insn, insn);
2481 if (own_thread)
2483 update_block (trial, thread);
2484 if (trial == thread)
2486 thread = next_active_insn (thread);
2487 if (new_thread == trial)
2488 new_thread = thread;
2491 delete_related_insns (trial);
2493 else
2495 update_reg_unused_notes (prior_insn, trial);
2496 new_thread = next_active_insn (trial);
2499 continue;
2502 /* There are two ways we can win: If TRIAL doesn't set anything
2503 needed at the opposite thread and can't trap, or if it can
2504 go into an annulled delay slot. But we want neither to copy
2505 nor to speculate frame-related insns. */
2506 if (!must_annul
2507 && ((condition == const_true_rtx
2508 && (own_thread || !RTX_FRAME_RELATED_P (trial)))
2509 || (! insn_sets_resource_p (trial, &opposite_needed, true)
2510 && ! may_trap_or_fault_p (pat)
2511 && ! RTX_FRAME_RELATED_P (trial))))
2513 old_trial = trial;
2514 trial = try_split (pat, trial, 0);
2515 if (new_thread == old_trial)
2516 new_thread = trial;
2517 if (thread == old_trial)
2518 thread = trial;
2519 pat = PATTERN (trial);
2520 if (eligible_for_delay (insn, *pslots_filled, trial, flags))
2521 goto winner;
2523 else if (0
2524 #ifdef ANNUL_IFTRUE_SLOTS
2525 || ! thread_if_true
2526 #endif
2527 #ifdef ANNUL_IFFALSE_SLOTS
2528 || thread_if_true
2529 #endif
2532 old_trial = trial;
2533 trial = try_split (pat, trial, 0);
2534 if (new_thread == old_trial)
2535 new_thread = trial;
2536 if (thread == old_trial)
2537 thread = trial;
2538 pat = PATTERN (trial);
2539 if ((must_annul || delay_list == NULL) && (thread_if_true
2540 ? check_annul_list_true_false (0, delay_list)
2541 && eligible_for_annul_false (insn, *pslots_filled, trial, flags)
2542 : check_annul_list_true_false (1, delay_list)
2543 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
2545 rtx_insn *temp;
2547 must_annul = 1;
2548 winner:
2550 #ifdef HAVE_cc0
2551 if (reg_mentioned_p (cc0_rtx, pat))
2552 link_cc0_insns (trial);
2553 #endif
2555 /* If we own this thread, delete the insn. If this is the
2556 destination of a branch, show that a basic block status
2557 may have been updated. In any case, mark the new
2558 starting point of this thread. */
2559 if (own_thread)
2561 rtx note;
2563 update_block (trial, thread);
2564 if (trial == thread)
2566 thread = next_active_insn (thread);
2567 if (new_thread == trial)
2568 new_thread = thread;
2571 /* We are moving this insn, not deleting it. We must
2572 temporarily increment the use count on any referenced
2573 label lest it be deleted by delete_related_insns. */
2574 for (note = REG_NOTES (trial);
2575 note != NULL_RTX;
2576 note = XEXP (note, 1))
2577 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2578 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2580 /* REG_LABEL_OPERAND could be
2581 NOTE_INSN_DELETED_LABEL too. */
2582 if (LABEL_P (XEXP (note, 0)))
2583 LABEL_NUSES (XEXP (note, 0))++;
2584 else
2585 gcc_assert (REG_NOTE_KIND (note)
2586 == REG_LABEL_OPERAND);
2588 if (jump_to_label_p (trial))
2589 LABEL_NUSES (JUMP_LABEL (trial))++;
2591 delete_related_insns (trial);
2593 for (note = REG_NOTES (trial);
2594 note != NULL_RTX;
2595 note = XEXP (note, 1))
2596 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2597 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2599 /* REG_LABEL_OPERAND could be
2600 NOTE_INSN_DELETED_LABEL too. */
2601 if (LABEL_P (XEXP (note, 0)))
2602 LABEL_NUSES (XEXP (note, 0))--;
2603 else
2604 gcc_assert (REG_NOTE_KIND (note)
2605 == REG_LABEL_OPERAND);
2607 if (jump_to_label_p (trial))
2608 LABEL_NUSES (JUMP_LABEL (trial))--;
2610 else
2611 new_thread = next_active_insn (trial);
2613 temp = own_thread ? trial : copy_delay_slot_insn (trial);
2614 if (thread_if_true)
2615 INSN_FROM_TARGET_P (temp) = 1;
2617 delay_list = add_to_delay_list (temp, delay_list);
2619 if (slots_to_fill == ++(*pslots_filled))
2621 /* Even though we have filled all the slots, we
2622 may be branching to a location that has a
2623 redundant insn. Skip any if so. */
2624 while (new_thread && ! own_thread
2625 && ! insn_sets_resource_p (new_thread, &set, true)
2626 && ! insn_sets_resource_p (new_thread, &needed,
2627 true)
2628 && ! insn_references_resource_p (new_thread,
2629 &set, true)
2630 && (prior_insn
2631 = redundant_insn (new_thread, insn,
2632 delay_list)))
2634 /* We know we do not own the thread, so no need
2635 to call update_block and delete_insn. */
2636 fix_reg_dead_note (prior_insn, insn);
2637 update_reg_unused_notes (prior_insn, new_thread);
2638 new_thread = next_active_insn (new_thread);
2640 break;
2643 continue;
2648 /* This insn can't go into a delay slot. */
2649 lose = 1;
2650 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2651 mark_referenced_resources (trial, &needed, true);
2653 /* Ensure we don't put insns between the setting of cc and the comparison
2654 by moving a setting of cc into an earlier delay slot since these insns
2655 could clobber the condition code. */
2656 set.cc = 1;
2658 /* If this insn is a register-register copy and the next insn has
2659 a use of our destination, change it to use our source. That way,
2660 it will become a candidate for our delay slot the next time
2661 through this loop. This case occurs commonly in loops that
2662 scan a list.
2664 We could check for more complex cases than those tested below,
2665 but it doesn't seem worth it. It might also be a good idea to try
2666 to swap the two insns. That might do better.
2668 We can't do this if the next insn modifies our destination, because
2669 that would make the replacement into the insn invalid. We also can't
2670 do this if it modifies our source, because it might be an earlyclobber
2671 operand. This latter test also prevents updating the contents of
2672 a PRE_INC. We also can't do this if there's overlap of source and
2673 destination. Overlap may happen for larger-than-register-size modes. */
2675 if (NONJUMP_INSN_P (trial) && GET_CODE (pat) == SET
2676 && REG_P (SET_SRC (pat))
2677 && REG_P (SET_DEST (pat))
2678 && !reg_overlap_mentioned_p (SET_DEST (pat), SET_SRC (pat)))
2680 rtx next = next_nonnote_insn (trial);
2682 if (next && NONJUMP_INSN_P (next)
2683 && GET_CODE (PATTERN (next)) != USE
2684 && ! reg_set_p (SET_DEST (pat), next)
2685 && ! reg_set_p (SET_SRC (pat), next)
2686 && reg_referenced_p (SET_DEST (pat), PATTERN (next))
2687 && ! modified_in_p (SET_DEST (pat), next))
2688 validate_replace_rtx (SET_DEST (pat), SET_SRC (pat), next);
2692 /* If we stopped on a branch insn that has delay slots, see if we can
2693 steal some of the insns in those slots. */
2694 if (trial && NONJUMP_INSN_P (trial)
2695 && GET_CODE (PATTERN (trial)) == SEQUENCE
2696 && JUMP_P (XVECEXP (PATTERN (trial), 0, 0)))
2698 rtx_sequence *sequence = as_a <rtx_sequence *> (PATTERN (trial));
2699 /* If this is the `true' thread, we will want to follow the jump,
2700 so we can only do this if we have taken everything up to here. */
2701 if (thread_if_true && trial == new_thread)
2703 delay_list
2704 = steal_delay_list_from_target (insn, condition, sequence,
2705 delay_list, &set, &needed,
2706 &opposite_needed, slots_to_fill,
2707 pslots_filled, &must_annul,
2708 &new_thread);
2709 /* If we owned the thread and are told that it branched
2710 elsewhere, make sure we own the thread at the new location. */
2711 if (own_thread && trial != new_thread)
2712 own_thread = own_thread_p (new_thread, new_thread, 0);
2714 else if (! thread_if_true)
2715 delay_list
2716 = steal_delay_list_from_fallthrough (insn, condition,
2717 sequence,
2718 delay_list, &set, &needed,
2719 &opposite_needed, slots_to_fill,
2720 pslots_filled, &must_annul);
2723 /* If we haven't found anything for this delay slot and it is very
2724 likely that the branch will be taken, see if the insn at our target
2725 increments or decrements a register with an increment that does not
2726 depend on the destination register. If so, try to place the opposite
2727 arithmetic insn after the jump insn and put the arithmetic insn in the
2728 delay slot. If we can't do this, return. */
2729 if (delay_list == 0 && likely
2730 && new_thread && !ANY_RETURN_P (new_thread)
2731 && NONJUMP_INSN_P (new_thread)
2732 && !RTX_FRAME_RELATED_P (new_thread)
2733 && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
2734 && asm_noperands (PATTERN (new_thread)) < 0)
2736 rtx pat = PATTERN (new_thread);
2737 rtx dest;
2738 rtx src;
2740 /* We know "new_thread" is an insn due to NONJUMP_INSN_P (new_thread)
2741 above. */
2742 trial = as_a <rtx_insn *> (new_thread);
2743 pat = PATTERN (trial);
2745 if (!NONJUMP_INSN_P (trial)
2746 || GET_CODE (pat) != SET
2747 || ! eligible_for_delay (insn, 0, trial, flags)
2748 || can_throw_internal (trial))
2749 return 0;
2751 dest = SET_DEST (pat), src = SET_SRC (pat);
2752 if ((GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
2753 && rtx_equal_p (XEXP (src, 0), dest)
2754 && (!FLOAT_MODE_P (GET_MODE (src))
2755 || flag_unsafe_math_optimizations)
2756 && ! reg_overlap_mentioned_p (dest, XEXP (src, 1))
2757 && ! side_effects_p (pat))
2759 rtx other = XEXP (src, 1);
2760 rtx new_arith;
2761 rtx_insn *ninsn;
2763 /* If this is a constant adjustment, use the same code with
2764 the negated constant. Otherwise, reverse the sense of the
2765 arithmetic. */
2766 if (CONST_INT_P (other))
2767 new_arith = gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src), dest,
2768 negate_rtx (GET_MODE (src), other));
2769 else
2770 new_arith = gen_rtx_fmt_ee (GET_CODE (src) == PLUS ? MINUS : PLUS,
2771 GET_MODE (src), dest, other);
2773 ninsn = emit_insn_after (gen_rtx_SET (VOIDmode, dest, new_arith),
2774 insn);
2776 if (recog_memoized (ninsn) < 0
2777 || (extract_insn (ninsn),
2778 !constrain_operands (1, get_preferred_alternatives (ninsn))))
2780 delete_related_insns (ninsn);
2781 return 0;
2784 if (own_thread)
2786 update_block (trial, thread);
2787 if (trial == thread)
2789 thread = next_active_insn (thread);
2790 if (new_thread == trial)
2791 new_thread = thread;
2793 delete_related_insns (trial);
2795 else
2796 new_thread = next_active_insn (trial);
2798 ninsn = own_thread ? trial : copy_delay_slot_insn (trial);
2799 if (thread_if_true)
2800 INSN_FROM_TARGET_P (ninsn) = 1;
2802 delay_list = add_to_delay_list (ninsn, NULL);
2803 (*pslots_filled)++;
2807 if (delay_list && must_annul)
2808 INSN_ANNULLED_BRANCH_P (insn) = 1;
2810 /* If we are to branch into the middle of this thread, find an appropriate
2811 label or make a new one if none, and redirect INSN to it. If we hit the
2812 end of the function, use the end-of-function label. */
2813 if (new_thread != thread)
2815 rtx label;
2816 bool crossing = false;
2818 gcc_assert (thread_if_true);
2820 if (new_thread && simplejump_or_return_p (new_thread)
2821 && redirect_with_delay_list_safe_p (insn,
2822 JUMP_LABEL (new_thread),
2823 delay_list))
2824 new_thread = follow_jumps (JUMP_LABEL (new_thread), insn,
2825 &crossing);
2827 if (ANY_RETURN_P (new_thread))
2828 label = find_end_label (new_thread);
2829 else if (LABEL_P (new_thread))
2830 label = new_thread;
2831 else
2832 label = get_label_before (as_a <rtx_insn *> (new_thread),
2833 JUMP_LABEL (insn));
2835 if (label)
2837 reorg_redirect_jump (insn, label);
2838 if (crossing)
2839 CROSSING_JUMP_P (insn) = 1;
2843 return delay_list;
2846 /* Make another attempt to find insns to place in delay slots.
2848 We previously looked for insns located in front of the delay insn
2849 and, for non-jump delay insns, located behind the delay insn.
2851 Here only try to schedule jump insns and try to move insns from either
2852 the target or the following insns into the delay slot. If annulling is
2853 supported, we will be likely to do this. Otherwise, we can do this only
2854 if safe. */
2856 static void
2857 fill_eager_delay_slots (void)
2859 rtx_insn *insn;
2860 int i;
2861 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
2863 for (i = 0; i < num_unfilled_slots; i++)
2865 rtx condition;
2866 rtx target_label, insn_at_target;
2867 rtx_insn *fallthrough_insn;
2868 rtx_insn_list *delay_list = 0;
2869 int own_target;
2870 int own_fallthrough;
2871 int prediction, slots_to_fill, slots_filled;
2873 insn = unfilled_slots_base[i];
2874 if (insn == 0
2875 || insn->deleted ()
2876 || !JUMP_P (insn)
2877 || ! (condjump_p (insn) || condjump_in_parallel_p (insn)))
2878 continue;
2880 slots_to_fill = num_delay_slots (insn);
2881 /* Some machine description have defined instructions to have
2882 delay slots only in certain circumstances which may depend on
2883 nearby insns (which change due to reorg's actions).
2885 For example, the PA port normally has delay slots for unconditional
2886 jumps.
2888 However, the PA port claims such jumps do not have a delay slot
2889 if they are immediate successors of certain CALL_INSNs. This
2890 allows the port to favor filling the delay slot of the call with
2891 the unconditional jump. */
2892 if (slots_to_fill == 0)
2893 continue;
2895 slots_filled = 0;
2896 target_label = JUMP_LABEL (insn);
2897 condition = get_branch_condition (insn, target_label);
2899 if (condition == 0)
2900 continue;
2902 /* Get the next active fallthrough and target insns and see if we own
2903 them. Then see whether the branch is likely true. We don't need
2904 to do a lot of this for unconditional branches. */
2906 insn_at_target = first_active_target_insn (target_label);
2907 own_target = own_thread_p (target_label, target_label, 0);
2909 if (condition == const_true_rtx)
2911 own_fallthrough = 0;
2912 fallthrough_insn = 0;
2913 prediction = 2;
2915 else
2917 fallthrough_insn = next_active_insn (insn);
2918 own_fallthrough = own_thread_p (NEXT_INSN (insn), NULL_RTX, 1);
2919 prediction = mostly_true_jump (insn);
2922 /* If this insn is expected to branch, first try to get insns from our
2923 target, then our fallthrough insns. If it is not expected to branch,
2924 try the other order. */
2926 if (prediction > 0)
2928 delay_list
2929 = fill_slots_from_thread (insn, condition, insn_at_target,
2930 fallthrough_insn, prediction == 2, 1,
2931 own_target,
2932 slots_to_fill, &slots_filled, delay_list);
2934 if (delay_list == 0 && own_fallthrough)
2936 /* Even though we didn't find anything for delay slots,
2937 we might have found a redundant insn which we deleted
2938 from the thread that was filled. So we have to recompute
2939 the next insn at the target. */
2940 target_label = JUMP_LABEL (insn);
2941 insn_at_target = first_active_target_insn (target_label);
2943 delay_list
2944 = fill_slots_from_thread (insn, condition, fallthrough_insn,
2945 insn_at_target, 0, 0,
2946 own_fallthrough,
2947 slots_to_fill, &slots_filled,
2948 delay_list);
2951 else
2953 if (own_fallthrough)
2954 delay_list
2955 = fill_slots_from_thread (insn, condition, fallthrough_insn,
2956 insn_at_target, 0, 0,
2957 own_fallthrough,
2958 slots_to_fill, &slots_filled,
2959 delay_list);
2961 if (delay_list == 0)
2962 delay_list
2963 = fill_slots_from_thread (insn, condition, insn_at_target,
2964 next_active_insn (insn), 0, 1,
2965 own_target,
2966 slots_to_fill, &slots_filled,
2967 delay_list);
2970 if (delay_list)
2971 unfilled_slots_base[i]
2972 = emit_delay_sequence (insn, delay_list, slots_filled);
2974 if (slots_to_fill == slots_filled)
2975 unfilled_slots_base[i] = 0;
2977 note_delay_statistics (slots_filled, 1);
2981 static void delete_computation (rtx insn);
2983 /* Recursively delete prior insns that compute the value (used only by INSN
2984 which the caller is deleting) stored in the register mentioned by NOTE
2985 which is a REG_DEAD note associated with INSN. */
2987 static void
2988 delete_prior_computation (rtx note, rtx insn)
2990 rtx our_prev;
2991 rtx reg = XEXP (note, 0);
2993 for (our_prev = prev_nonnote_insn (insn);
2994 our_prev && (NONJUMP_INSN_P (our_prev)
2995 || CALL_P (our_prev));
2996 our_prev = prev_nonnote_insn (our_prev))
2998 rtx pat = PATTERN (our_prev);
3000 /* If we reach a CALL which is not calling a const function
3001 or the callee pops the arguments, then give up. */
3002 if (CALL_P (our_prev)
3003 && (! RTL_CONST_CALL_P (our_prev)
3004 || GET_CODE (pat) != SET || GET_CODE (SET_SRC (pat)) != CALL))
3005 break;
3007 /* If we reach a SEQUENCE, it is too complex to try to
3008 do anything with it, so give up. We can be run during
3009 and after reorg, so SEQUENCE rtl can legitimately show
3010 up here. */
3011 if (GET_CODE (pat) == SEQUENCE)
3012 break;
3014 if (GET_CODE (pat) == USE
3015 && NONJUMP_INSN_P (XEXP (pat, 0)))
3016 /* reorg creates USEs that look like this. We leave them
3017 alone because reorg needs them for its own purposes. */
3018 break;
3020 if (reg_set_p (reg, pat))
3022 if (side_effects_p (pat) && !CALL_P (our_prev))
3023 break;
3025 if (GET_CODE (pat) == PARALLEL)
3027 /* If we find a SET of something else, we can't
3028 delete the insn. */
3030 int i;
3032 for (i = 0; i < XVECLEN (pat, 0); i++)
3034 rtx part = XVECEXP (pat, 0, i);
3036 if (GET_CODE (part) == SET
3037 && SET_DEST (part) != reg)
3038 break;
3041 if (i == XVECLEN (pat, 0))
3042 delete_computation (our_prev);
3044 else if (GET_CODE (pat) == SET
3045 && REG_P (SET_DEST (pat)))
3047 int dest_regno = REGNO (SET_DEST (pat));
3048 int dest_endregno = END_REGNO (SET_DEST (pat));
3049 int regno = REGNO (reg);
3050 int endregno = END_REGNO (reg);
3052 if (dest_regno >= regno
3053 && dest_endregno <= endregno)
3054 delete_computation (our_prev);
3056 /* We may have a multi-word hard register and some, but not
3057 all, of the words of the register are needed in subsequent
3058 insns. Write REG_UNUSED notes for those parts that were not
3059 needed. */
3060 else if (dest_regno <= regno
3061 && dest_endregno >= endregno)
3063 int i;
3065 add_reg_note (our_prev, REG_UNUSED, reg);
3067 for (i = dest_regno; i < dest_endregno; i++)
3068 if (! find_regno_note (our_prev, REG_UNUSED, i))
3069 break;
3071 if (i == dest_endregno)
3072 delete_computation (our_prev);
3076 break;
3079 /* If PAT references the register that dies here, it is an
3080 additional use. Hence any prior SET isn't dead. However, this
3081 insn becomes the new place for the REG_DEAD note. */
3082 if (reg_overlap_mentioned_p (reg, pat))
3084 XEXP (note, 1) = REG_NOTES (our_prev);
3085 REG_NOTES (our_prev) = note;
3086 break;
3091 /* Delete INSN and recursively delete insns that compute values used only
3092 by INSN. This uses the REG_DEAD notes computed during flow analysis.
3094 Look at all our REG_DEAD notes. If a previous insn does nothing other
3095 than set a register that dies in this insn, we can delete that insn
3096 as well.
3098 On machines with CC0, if CC0 is used in this insn, we may be able to
3099 delete the insn that set it. */
3101 static void
3102 delete_computation (rtx insn)
3104 rtx note, next;
3106 #ifdef HAVE_cc0
3107 if (reg_referenced_p (cc0_rtx, PATTERN (insn)))
3109 rtx prev = prev_nonnote_insn (insn);
3110 /* We assume that at this stage
3111 CC's are always set explicitly
3112 and always immediately before the jump that
3113 will use them. So if the previous insn
3114 exists to set the CC's, delete it
3115 (unless it performs auto-increments, etc.). */
3116 if (prev && NONJUMP_INSN_P (prev)
3117 && sets_cc0_p (PATTERN (prev)))
3119 if (sets_cc0_p (PATTERN (prev)) > 0
3120 && ! side_effects_p (PATTERN (prev)))
3121 delete_computation (prev);
3122 else
3123 /* Otherwise, show that cc0 won't be used. */
3124 add_reg_note (prev, REG_UNUSED, cc0_rtx);
3127 #endif
3129 for (note = REG_NOTES (insn); note; note = next)
3131 next = XEXP (note, 1);
3133 if (REG_NOTE_KIND (note) != REG_DEAD
3134 /* Verify that the REG_NOTE is legitimate. */
3135 || !REG_P (XEXP (note, 0)))
3136 continue;
3138 delete_prior_computation (note, insn);
3141 delete_related_insns (insn);
3144 /* If all INSN does is set the pc, delete it,
3145 and delete the insn that set the condition codes for it
3146 if that's what the previous thing was. */
3148 static void
3149 delete_jump (rtx_insn *insn)
3151 rtx set = single_set (insn);
3153 if (set && GET_CODE (SET_DEST (set)) == PC)
3154 delete_computation (insn);
3157 static rtx_insn *
3158 label_before_next_insn (rtx x, rtx scan_limit)
3160 rtx_insn *insn = next_active_insn (x);
3161 while (insn)
3163 insn = PREV_INSN (insn);
3164 if (insn == scan_limit || insn == NULL_RTX)
3165 return NULL;
3166 if (LABEL_P (insn))
3167 break;
3169 return insn;
3173 /* Once we have tried two ways to fill a delay slot, make a pass over the
3174 code to try to improve the results and to do such things as more jump
3175 threading. */
3177 static void
3178 relax_delay_slots (rtx_insn *first)
3180 rtx_insn *insn, *next;
3181 rtx_sequence *pat;
3182 rtx trial;
3183 rtx_insn *delay_insn;
3184 rtx target_label;
3186 /* Look at every JUMP_INSN and see if we can improve it. */
3187 for (insn = first; insn; insn = next)
3189 rtx_insn *other;
3190 bool crossing;
3192 next = next_active_insn (insn);
3194 /* If this is a jump insn, see if it now jumps to a jump, jumps to
3195 the next insn, or jumps to a label that is not the last of a
3196 group of consecutive labels. */
3197 if (JUMP_P (insn)
3198 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3199 && !ANY_RETURN_P (target_label = JUMP_LABEL (insn)))
3201 target_label
3202 = skip_consecutive_labels (follow_jumps (target_label, insn,
3203 &crossing));
3204 if (ANY_RETURN_P (target_label))
3205 target_label = find_end_label (target_label);
3207 if (target_label && next_active_insn (target_label) == next
3208 && ! condjump_in_parallel_p (insn))
3210 delete_jump (insn);
3211 continue;
3214 if (target_label && target_label != JUMP_LABEL (insn))
3216 reorg_redirect_jump (insn, target_label);
3217 if (crossing)
3218 CROSSING_JUMP_P (insn) = 1;
3221 /* See if this jump conditionally branches around an unconditional
3222 jump. If so, invert this jump and point it to the target of the
3223 second jump. */
3224 if (next && simplejump_or_return_p (next)
3225 && any_condjump_p (insn)
3226 && target_label
3227 && next_active_insn (target_label) == next_active_insn (next)
3228 && no_labels_between_p (insn, next))
3230 rtx label = JUMP_LABEL (next);
3232 /* Be careful how we do this to avoid deleting code or
3233 labels that are momentarily dead. See similar optimization
3234 in jump.c.
3236 We also need to ensure we properly handle the case when
3237 invert_jump fails. */
3239 ++LABEL_NUSES (target_label);
3240 if (!ANY_RETURN_P (label))
3241 ++LABEL_NUSES (label);
3243 if (invert_jump (insn, label, 1))
3245 delete_related_insns (next);
3246 next = insn;
3249 if (!ANY_RETURN_P (label))
3250 --LABEL_NUSES (label);
3252 if (--LABEL_NUSES (target_label) == 0)
3253 delete_related_insns (target_label);
3255 continue;
3259 /* If this is an unconditional jump and the previous insn is a
3260 conditional jump, try reversing the condition of the previous
3261 insn and swapping our targets. The next pass might be able to
3262 fill the slots.
3264 Don't do this if we expect the conditional branch to be true, because
3265 we would then be making the more common case longer. */
3267 if (simplejump_or_return_p (insn)
3268 && (other = prev_active_insn (insn)) != 0
3269 && any_condjump_p (other)
3270 && no_labels_between_p (other, insn)
3271 && 0 > mostly_true_jump (other))
3273 rtx other_target = JUMP_LABEL (other);
3274 target_label = JUMP_LABEL (insn);
3276 if (invert_jump (other, target_label, 0))
3277 reorg_redirect_jump (insn, other_target);
3280 /* Now look only at cases where we have a filled delay slot. */
3281 if (!NONJUMP_INSN_P (insn) || GET_CODE (PATTERN (insn)) != SEQUENCE)
3282 continue;
3284 pat = as_a <rtx_sequence *> (PATTERN (insn));
3285 delay_insn = pat->insn (0);
3287 /* See if the first insn in the delay slot is redundant with some
3288 previous insn. Remove it from the delay slot if so; then set up
3289 to reprocess this insn. */
3290 if (redundant_insn (pat->insn (1), delay_insn, 0))
3292 update_block (pat->insn (1), insn);
3293 delete_from_delay_slot (pat->insn (1));
3294 next = prev_active_insn (next);
3295 continue;
3298 /* See if we have a RETURN insn with a filled delay slot followed
3299 by a RETURN insn with an unfilled a delay slot. If so, we can delete
3300 the first RETURN (but not its delay insn). This gives the same
3301 effect in fewer instructions.
3303 Only do so if optimizing for size since this results in slower, but
3304 smaller code. */
3305 if (optimize_function_for_size_p (cfun)
3306 && ANY_RETURN_P (PATTERN (delay_insn))
3307 && next
3308 && JUMP_P (next)
3309 && PATTERN (next) == PATTERN (delay_insn))
3311 rtx_insn *after;
3312 int i;
3314 /* Delete the RETURN and just execute the delay list insns.
3316 We do this by deleting the INSN containing the SEQUENCE, then
3317 re-emitting the insns separately, and then deleting the RETURN.
3318 This allows the count of the jump target to be properly
3319 decremented.
3321 Note that we need to change the INSN_UID of the re-emitted insns
3322 since it is used to hash the insns for mark_target_live_regs and
3323 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3325 Clear the from target bit, since these insns are no longer
3326 in delay slots. */
3327 for (i = 0; i < XVECLEN (pat, 0); i++)
3328 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3330 trial = PREV_INSN (insn);
3331 delete_related_insns (insn);
3332 gcc_assert (GET_CODE (pat) == SEQUENCE);
3333 add_insn_after (delay_insn, trial, NULL);
3334 after = delay_insn;
3335 for (i = 1; i < pat->len (); i++)
3336 after = emit_copy_of_insn_after (pat->insn (i), after);
3337 delete_scheduled_jump (delay_insn);
3338 continue;
3341 /* Now look only at the cases where we have a filled JUMP_INSN. */
3342 if (!JUMP_P (delay_insn)
3343 || !(condjump_p (delay_insn) || condjump_in_parallel_p (delay_insn)))
3344 continue;
3346 target_label = JUMP_LABEL (delay_insn);
3347 if (target_label && ANY_RETURN_P (target_label))
3348 continue;
3350 /* If this jump goes to another unconditional jump, thread it, but
3351 don't convert a jump into a RETURN here. */
3352 trial = skip_consecutive_labels (follow_jumps (target_label, delay_insn,
3353 &crossing));
3354 if (ANY_RETURN_P (trial))
3355 trial = find_end_label (trial);
3357 if (trial && trial != target_label
3358 && redirect_with_delay_slots_safe_p (delay_insn, trial, insn))
3360 reorg_redirect_jump (delay_insn, trial);
3361 target_label = trial;
3362 if (crossing)
3363 CROSSING_JUMP_P (insn) = 1;
3366 /* If the first insn at TARGET_LABEL is redundant with a previous
3367 insn, redirect the jump to the following insn and process again.
3368 We use next_real_insn instead of next_active_insn so we
3369 don't skip USE-markers, or we'll end up with incorrect
3370 liveness info. */
3371 trial = next_real_insn (target_label);
3372 if (trial && GET_CODE (PATTERN (trial)) != SEQUENCE
3373 && redundant_insn (trial, insn, 0)
3374 && ! can_throw_internal (trial))
3376 /* Figure out where to emit the special USE insn so we don't
3377 later incorrectly compute register live/death info. */
3378 rtx_insn *tmp = next_active_insn (trial);
3379 if (tmp == 0)
3380 tmp = find_end_label (simple_return_rtx);
3382 if (tmp)
3384 /* Insert the special USE insn and update dataflow info.
3385 We know "trial" is an insn here as it is the output of
3386 next_real_insn () above. */
3387 update_block (as_a <rtx_insn *> (trial), tmp);
3389 /* Now emit a label before the special USE insn, and
3390 redirect our jump to the new label. */
3391 target_label = get_label_before (PREV_INSN (tmp), target_label);
3392 reorg_redirect_jump (delay_insn, target_label);
3393 next = insn;
3394 continue;
3398 /* Similarly, if it is an unconditional jump with one insn in its
3399 delay list and that insn is redundant, thread the jump. */
3400 rtx_sequence *trial_seq =
3401 trial ? dyn_cast <rtx_sequence *> (PATTERN (trial)) : NULL;
3402 if (trial_seq
3403 && trial_seq->len () == 2
3404 && JUMP_P (trial_seq->insn (0))
3405 && simplejump_or_return_p (trial_seq->insn (0))
3406 && redundant_insn (trial_seq->insn (1), insn, 0))
3408 target_label = JUMP_LABEL (trial_seq->insn (0));
3409 if (ANY_RETURN_P (target_label))
3410 target_label = find_end_label (target_label);
3412 if (target_label
3413 && redirect_with_delay_slots_safe_p (delay_insn, target_label,
3414 insn))
3416 update_block (trial_seq->insn (1), insn);
3417 reorg_redirect_jump (delay_insn, target_label);
3418 next = insn;
3419 continue;
3423 /* See if we have a simple (conditional) jump that is useless. */
3424 if (! INSN_ANNULLED_BRANCH_P (delay_insn)
3425 && ! condjump_in_parallel_p (delay_insn)
3426 && prev_active_insn (target_label) == insn
3427 && ! BARRIER_P (prev_nonnote_insn (target_label))
3428 #ifdef HAVE_cc0
3429 /* If the last insn in the delay slot sets CC0 for some insn,
3430 various code assumes that it is in a delay slot. We could
3431 put it back where it belonged and delete the register notes,
3432 but it doesn't seem worthwhile in this uncommon case. */
3433 && ! find_reg_note (XVECEXP (pat, 0, XVECLEN (pat, 0) - 1),
3434 REG_CC_USER, NULL_RTX)
3435 #endif
3438 rtx_insn *after;
3439 int i;
3441 /* All this insn does is execute its delay list and jump to the
3442 following insn. So delete the jump and just execute the delay
3443 list insns.
3445 We do this by deleting the INSN containing the SEQUENCE, then
3446 re-emitting the insns separately, and then deleting the jump.
3447 This allows the count of the jump target to be properly
3448 decremented.
3450 Note that we need to change the INSN_UID of the re-emitted insns
3451 since it is used to hash the insns for mark_target_live_regs and
3452 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3454 Clear the from target bit, since these insns are no longer
3455 in delay slots. */
3456 for (i = 0; i < XVECLEN (pat, 0); i++)
3457 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3459 trial = PREV_INSN (insn);
3460 delete_related_insns (insn);
3461 gcc_assert (GET_CODE (pat) == SEQUENCE);
3462 add_insn_after (delay_insn, trial, NULL);
3463 after = delay_insn;
3464 for (i = 1; i < pat->len (); i++)
3465 after = emit_copy_of_insn_after (pat->insn (i), after);
3466 delete_scheduled_jump (delay_insn);
3467 continue;
3470 /* See if this is an unconditional jump around a single insn which is
3471 identical to the one in its delay slot. In this case, we can just
3472 delete the branch and the insn in its delay slot. */
3473 if (next && NONJUMP_INSN_P (next)
3474 && label_before_next_insn (next, insn) == target_label
3475 && simplejump_p (insn)
3476 && XVECLEN (pat, 0) == 2
3477 && rtx_equal_p (PATTERN (next), PATTERN (pat->insn (1))))
3479 delete_related_insns (insn);
3480 continue;
3483 /* See if this jump (with its delay slots) conditionally branches
3484 around an unconditional jump (without delay slots). If so, invert
3485 this jump and point it to the target of the second jump. We cannot
3486 do this for annulled jumps, though. Again, don't convert a jump to
3487 a RETURN here. */
3488 if (! INSN_ANNULLED_BRANCH_P (delay_insn)
3489 && any_condjump_p (delay_insn)
3490 && next && simplejump_or_return_p (next)
3491 && next_active_insn (target_label) == next_active_insn (next)
3492 && no_labels_between_p (insn, next))
3494 rtx label = JUMP_LABEL (next);
3495 rtx old_label = JUMP_LABEL (delay_insn);
3497 if (ANY_RETURN_P (label))
3498 label = find_end_label (label);
3500 /* find_end_label can generate a new label. Check this first. */
3501 if (label
3502 && no_labels_between_p (insn, next)
3503 && redirect_with_delay_slots_safe_p (delay_insn, label, insn))
3505 /* Be careful how we do this to avoid deleting code or labels
3506 that are momentarily dead. See similar optimization in
3507 jump.c */
3508 if (old_label)
3509 ++LABEL_NUSES (old_label);
3511 if (invert_jump (delay_insn, label, 1))
3513 int i;
3515 /* Must update the INSN_FROM_TARGET_P bits now that
3516 the branch is reversed, so that mark_target_live_regs
3517 will handle the delay slot insn correctly. */
3518 for (i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
3520 rtx slot = XVECEXP (PATTERN (insn), 0, i);
3521 INSN_FROM_TARGET_P (slot) = ! INSN_FROM_TARGET_P (slot);
3524 delete_related_insns (next);
3525 next = insn;
3528 if (old_label && --LABEL_NUSES (old_label) == 0)
3529 delete_related_insns (old_label);
3530 continue;
3534 /* If we own the thread opposite the way this insn branches, see if we
3535 can merge its delay slots with following insns. */
3536 if (INSN_FROM_TARGET_P (pat->insn (1))
3537 && own_thread_p (NEXT_INSN (insn), 0, 1))
3538 try_merge_delay_insns (insn, next);
3539 else if (! INSN_FROM_TARGET_P (pat->insn (1))
3540 && own_thread_p (target_label, target_label, 0))
3541 try_merge_delay_insns (insn, next_active_insn (target_label));
3543 /* If we get here, we haven't deleted INSN. But we may have deleted
3544 NEXT, so recompute it. */
3545 next = next_active_insn (insn);
3550 /* Look for filled jumps to the end of function label. We can try to convert
3551 them into RETURN insns if the insns in the delay slot are valid for the
3552 RETURN as well. */
3554 static void
3555 make_return_insns (rtx_insn *first)
3557 rtx_insn *insn;
3558 rtx_insn *jump_insn;
3559 rtx real_return_label = function_return_label;
3560 rtx real_simple_return_label = function_simple_return_label;
3561 int slots, i;
3563 /* See if there is a RETURN insn in the function other than the one we
3564 made for END_OF_FUNCTION_LABEL. If so, set up anything we can't change
3565 into a RETURN to jump to it. */
3566 for (insn = first; insn; insn = NEXT_INSN (insn))
3567 if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
3569 rtx t = get_label_before (insn, NULL_RTX);
3570 if (PATTERN (insn) == ret_rtx)
3571 real_return_label = t;
3572 else
3573 real_simple_return_label = t;
3574 break;
3577 /* Show an extra usage of REAL_RETURN_LABEL so it won't go away if it
3578 was equal to END_OF_FUNCTION_LABEL. */
3579 if (real_return_label)
3580 LABEL_NUSES (real_return_label)++;
3581 if (real_simple_return_label)
3582 LABEL_NUSES (real_simple_return_label)++;
3584 /* Clear the list of insns to fill so we can use it. */
3585 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3587 for (insn = first; insn; insn = NEXT_INSN (insn))
3589 int flags;
3590 rtx kind, real_label;
3592 /* Only look at filled JUMP_INSNs that go to the end of function
3593 label. */
3594 if (!NONJUMP_INSN_P (insn))
3595 continue;
3597 if (GET_CODE (PATTERN (insn)) != SEQUENCE)
3598 continue;
3600 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (insn));
3602 if (!jump_to_label_p (pat->insn (0)))
3603 continue;
3605 if (JUMP_LABEL (pat->insn (0)) == function_return_label)
3607 kind = ret_rtx;
3608 real_label = real_return_label;
3610 else if (JUMP_LABEL (pat->insn (0)) == function_simple_return_label)
3612 kind = simple_return_rtx;
3613 real_label = real_simple_return_label;
3615 else
3616 continue;
3618 jump_insn = pat->insn (0);
3620 /* If we can't make the jump into a RETURN, try to redirect it to the best
3621 RETURN and go on to the next insn. */
3622 if (!reorg_redirect_jump (jump_insn, kind))
3624 /* Make sure redirecting the jump will not invalidate the delay
3625 slot insns. */
3626 if (redirect_with_delay_slots_safe_p (jump_insn, real_label, insn))
3627 reorg_redirect_jump (jump_insn, real_label);
3628 continue;
3631 /* See if this RETURN can accept the insns current in its delay slot.
3632 It can if it has more or an equal number of slots and the contents
3633 of each is valid. */
3635 flags = get_jump_flags (jump_insn, JUMP_LABEL (jump_insn));
3636 slots = num_delay_slots (jump_insn);
3637 if (slots >= XVECLEN (pat, 0) - 1)
3639 for (i = 1; i < XVECLEN (pat, 0); i++)
3640 if (! (
3641 #ifdef ANNUL_IFFALSE_SLOTS
3642 (INSN_ANNULLED_BRANCH_P (jump_insn)
3643 && INSN_FROM_TARGET_P (pat->insn (i)))
3644 ? eligible_for_annul_false (jump_insn, i - 1,
3645 pat->insn (i), flags) :
3646 #endif
3647 #ifdef ANNUL_IFTRUE_SLOTS
3648 (INSN_ANNULLED_BRANCH_P (jump_insn)
3649 && ! INSN_FROM_TARGET_P (pat->insn (i)))
3650 ? eligible_for_annul_true (jump_insn, i - 1,
3651 pat->insn (i), flags) :
3652 #endif
3653 eligible_for_delay (jump_insn, i - 1,
3654 pat->insn (i), flags)))
3655 break;
3657 else
3658 i = 0;
3660 if (i == XVECLEN (pat, 0))
3661 continue;
3663 /* We have to do something with this insn. If it is an unconditional
3664 RETURN, delete the SEQUENCE and output the individual insns,
3665 followed by the RETURN. Then set things up so we try to find
3666 insns for its delay slots, if it needs some. */
3667 if (ANY_RETURN_P (PATTERN (jump_insn)))
3669 rtx_insn *prev = PREV_INSN (insn);
3671 delete_related_insns (insn);
3672 for (i = 1; i < XVECLEN (pat, 0); i++)
3673 prev = emit_insn_after (PATTERN (XVECEXP (pat, 0, i)), prev);
3675 insn = emit_jump_insn_after (PATTERN (jump_insn), prev);
3676 emit_barrier_after (insn);
3678 if (slots)
3679 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3681 else
3682 /* It is probably more efficient to keep this with its current
3683 delay slot as a branch to a RETURN. */
3684 reorg_redirect_jump (jump_insn, real_label);
3687 /* Now delete REAL_RETURN_LABEL if we never used it. Then try to fill any
3688 new delay slots we have created. */
3689 if (real_return_label != NULL_RTX && --LABEL_NUSES (real_return_label) == 0)
3690 delete_related_insns (real_return_label);
3691 if (real_simple_return_label != NULL_RTX
3692 && --LABEL_NUSES (real_simple_return_label) == 0)
3693 delete_related_insns (real_simple_return_label);
3695 fill_simple_delay_slots (1);
3696 fill_simple_delay_slots (0);
3699 /* Try to find insns to place in delay slots. */
3701 static void
3702 dbr_schedule (rtx_insn *first)
3704 rtx_insn *insn, *next, *epilogue_insn = 0;
3705 int i;
3706 bool need_return_insns;
3708 /* If the current function has no insns other than the prologue and
3709 epilogue, then do not try to fill any delay slots. */
3710 if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS)
3711 return;
3713 /* Find the highest INSN_UID and allocate and initialize our map from
3714 INSN_UID's to position in code. */
3715 for (max_uid = 0, insn = first; insn; insn = NEXT_INSN (insn))
3717 if (INSN_UID (insn) > max_uid)
3718 max_uid = INSN_UID (insn);
3719 if (NOTE_P (insn)
3720 && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
3721 epilogue_insn = insn;
3724 uid_to_ruid = XNEWVEC (int, max_uid + 1);
3725 for (i = 0, insn = first; insn; i++, insn = NEXT_INSN (insn))
3726 uid_to_ruid[INSN_UID (insn)] = i;
3728 /* Initialize the list of insns that need filling. */
3729 if (unfilled_firstobj == 0)
3731 gcc_obstack_init (&unfilled_slots_obstack);
3732 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3735 for (insn = next_active_insn (first); insn; insn = next_active_insn (insn))
3737 rtx target;
3739 /* Skip vector tables. We can't get attributes for them. */
3740 if (JUMP_TABLE_DATA_P (insn))
3741 continue;
3743 if (JUMP_P (insn))
3744 INSN_ANNULLED_BRANCH_P (insn) = 0;
3745 INSN_FROM_TARGET_P (insn) = 0;
3747 if (num_delay_slots (insn) > 0)
3748 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3750 /* Ensure all jumps go to the last of a set of consecutive labels. */
3751 if (JUMP_P (insn)
3752 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3753 && !ANY_RETURN_P (JUMP_LABEL (insn))
3754 && ((target = skip_consecutive_labels (JUMP_LABEL (insn)))
3755 != JUMP_LABEL (insn)))
3756 redirect_jump (insn, target, 1);
3759 init_resource_info (epilogue_insn);
3761 /* Show we haven't computed an end-of-function label yet. */
3762 function_return_label = function_simple_return_label = NULL;
3764 /* Initialize the statistics for this function. */
3765 memset (num_insns_needing_delays, 0, sizeof num_insns_needing_delays);
3766 memset (num_filled_delays, 0, sizeof num_filled_delays);
3768 /* Now do the delay slot filling. Try everything twice in case earlier
3769 changes make more slots fillable. */
3771 for (reorg_pass_number = 0;
3772 reorg_pass_number < MAX_REORG_PASSES;
3773 reorg_pass_number++)
3775 fill_simple_delay_slots (1);
3776 fill_simple_delay_slots (0);
3777 fill_eager_delay_slots ();
3778 relax_delay_slots (first);
3781 /* If we made an end of function label, indicate that it is now
3782 safe to delete it by undoing our prior adjustment to LABEL_NUSES.
3783 If it is now unused, delete it. */
3784 if (function_return_label && --LABEL_NUSES (function_return_label) == 0)
3785 delete_related_insns (function_return_label);
3786 if (function_simple_return_label
3787 && --LABEL_NUSES (function_simple_return_label) == 0)
3788 delete_related_insns (function_simple_return_label);
3790 need_return_insns = false;
3791 #ifdef HAVE_return
3792 need_return_insns |= HAVE_return && function_return_label != 0;
3793 #endif
3794 #ifdef HAVE_simple_return
3795 need_return_insns |= HAVE_simple_return && function_simple_return_label != 0;
3796 #endif
3797 if (need_return_insns)
3798 make_return_insns (first);
3800 /* Delete any USE insns made by update_block; subsequent passes don't need
3801 them or know how to deal with them. */
3802 for (insn = first; insn; insn = next)
3804 next = NEXT_INSN (insn);
3806 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
3807 && INSN_P (XEXP (PATTERN (insn), 0)))
3808 next = delete_related_insns (insn);
3811 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3813 /* It is not clear why the line below is needed, but it does seem to be. */
3814 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3816 if (dump_file)
3818 int i, j, need_comma;
3819 int total_delay_slots[MAX_DELAY_HISTOGRAM + 1];
3820 int total_annul_slots[MAX_DELAY_HISTOGRAM + 1];
3822 for (reorg_pass_number = 0;
3823 reorg_pass_number < MAX_REORG_PASSES;
3824 reorg_pass_number++)
3826 fprintf (dump_file, ";; Reorg pass #%d:\n", reorg_pass_number + 1);
3827 for (i = 0; i < NUM_REORG_FUNCTIONS; i++)
3829 need_comma = 0;
3830 fprintf (dump_file, ";; Reorg function #%d\n", i);
3832 fprintf (dump_file, ";; %d insns needing delay slots\n;; ",
3833 num_insns_needing_delays[i][reorg_pass_number]);
3835 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3836 if (num_filled_delays[i][j][reorg_pass_number])
3838 if (need_comma)
3839 fprintf (dump_file, ", ");
3840 need_comma = 1;
3841 fprintf (dump_file, "%d got %d delays",
3842 num_filled_delays[i][j][reorg_pass_number], j);
3844 fprintf (dump_file, "\n");
3847 memset (total_delay_slots, 0, sizeof total_delay_slots);
3848 memset (total_annul_slots, 0, sizeof total_annul_slots);
3849 for (insn = first; insn; insn = NEXT_INSN (insn))
3851 if (! insn->deleted ()
3852 && NONJUMP_INSN_P (insn)
3853 && GET_CODE (PATTERN (insn)) != USE
3854 && GET_CODE (PATTERN (insn)) != CLOBBER)
3856 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
3858 rtx control;
3859 j = XVECLEN (PATTERN (insn), 0) - 1;
3860 if (j > MAX_DELAY_HISTOGRAM)
3861 j = MAX_DELAY_HISTOGRAM;
3862 control = XVECEXP (PATTERN (insn), 0, 0);
3863 if (JUMP_P (control) && INSN_ANNULLED_BRANCH_P (control))
3864 total_annul_slots[j]++;
3865 else
3866 total_delay_slots[j]++;
3868 else if (num_delay_slots (insn) > 0)
3869 total_delay_slots[0]++;
3872 fprintf (dump_file, ";; Reorg totals: ");
3873 need_comma = 0;
3874 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3876 if (total_delay_slots[j])
3878 if (need_comma)
3879 fprintf (dump_file, ", ");
3880 need_comma = 1;
3881 fprintf (dump_file, "%d got %d delays", total_delay_slots[j], j);
3884 fprintf (dump_file, "\n");
3885 #if defined (ANNUL_IFTRUE_SLOTS) || defined (ANNUL_IFFALSE_SLOTS)
3886 fprintf (dump_file, ";; Reorg annuls: ");
3887 need_comma = 0;
3888 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3890 if (total_annul_slots[j])
3892 if (need_comma)
3893 fprintf (dump_file, ", ");
3894 need_comma = 1;
3895 fprintf (dump_file, "%d got %d delays", total_annul_slots[j], j);
3898 fprintf (dump_file, "\n");
3899 #endif
3900 fprintf (dump_file, "\n");
3903 if (!sibling_labels.is_empty ())
3905 update_alignments (sibling_labels);
3906 sibling_labels.release ();
3909 free_resource_info ();
3910 free (uid_to_ruid);
3911 crtl->dbr_scheduled_p = true;
3913 #endif /* DELAY_SLOTS */
3915 /* Run delay slot optimization. */
3916 static unsigned int
3917 rest_of_handle_delay_slots (void)
3919 #ifdef DELAY_SLOTS
3920 dbr_schedule (get_insns ());
3921 #endif
3922 return 0;
3925 namespace {
3927 const pass_data pass_data_delay_slots =
3929 RTL_PASS, /* type */
3930 "dbr", /* name */
3931 OPTGROUP_NONE, /* optinfo_flags */
3932 TV_DBR_SCHED, /* tv_id */
3933 0, /* properties_required */
3934 0, /* properties_provided */
3935 0, /* properties_destroyed */
3936 0, /* todo_flags_start */
3937 0, /* todo_flags_finish */
3940 class pass_delay_slots : public rtl_opt_pass
3942 public:
3943 pass_delay_slots (gcc::context *ctxt)
3944 : rtl_opt_pass (pass_data_delay_slots, ctxt)
3947 /* opt_pass methods: */
3948 virtual bool gate (function *);
3949 virtual unsigned int execute (function *)
3951 return rest_of_handle_delay_slots ();
3954 }; // class pass_delay_slots
3956 bool
3957 pass_delay_slots::gate (function *)
3959 #ifdef DELAY_SLOTS
3960 /* At -O0 dataflow info isn't updated after RA. */
3961 return optimize > 0 && flag_delayed_branch && !crtl->dbr_scheduled_p;
3962 #else
3963 return 0;
3964 #endif
3967 } // anon namespace
3969 rtl_opt_pass *
3970 make_pass_delay_slots (gcc::context *ctxt)
3972 return new pass_delay_slots (ctxt);
3975 /* Machine dependent reorg pass. */
3977 namespace {
3979 const pass_data pass_data_machine_reorg =
3981 RTL_PASS, /* type */
3982 "mach", /* name */
3983 OPTGROUP_NONE, /* optinfo_flags */
3984 TV_MACH_DEP, /* tv_id */
3985 0, /* properties_required */
3986 0, /* properties_provided */
3987 0, /* properties_destroyed */
3988 0, /* todo_flags_start */
3989 0, /* todo_flags_finish */
3992 class pass_machine_reorg : public rtl_opt_pass
3994 public:
3995 pass_machine_reorg (gcc::context *ctxt)
3996 : rtl_opt_pass (pass_data_machine_reorg, ctxt)
3999 /* opt_pass methods: */
4000 virtual bool gate (function *)
4002 return targetm.machine_dependent_reorg != 0;
4005 virtual unsigned int execute (function *)
4007 targetm.machine_dependent_reorg ();
4008 return 0;
4011 }; // class pass_machine_reorg
4013 } // anon namespace
4015 rtl_opt_pass *
4016 make_pass_machine_reorg (gcc::context *ctxt)
4018 return new pass_machine_reorg (ctxt);