* c-c++-common/ubsan/float-cast-overflow-6.c: Add i?86-*-* target.
[official-gcc.git] / gcc / reorg.c
blob6ade95cbe6dc46bc3ea6059b3cd698d743acbe4b
1 /* Perform instruction reorganizations for delay slot filling.
2 Copyright (C) 1992-2014 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu).
4 Hacked by Michael Tiemann (tiemann@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Instruction reorganization pass.
24 This pass runs after register allocation and final jump
25 optimization. It should be the last pass to run before peephole.
26 It serves primarily to fill delay slots of insns, typically branch
27 and call insns. Other insns typically involve more complicated
28 interactions of data dependencies and resource constraints, and
29 are better handled by scheduling before register allocation (by the
30 function `schedule_insns').
32 The Branch Penalty is the number of extra cycles that are needed to
33 execute a branch insn. On an ideal machine, branches take a single
34 cycle, and the Branch Penalty is 0. Several RISC machines approach
35 branch delays differently:
37 The MIPS has a single branch delay slot. Most insns
38 (except other branches) can be used to fill this slot. When the
39 slot is filled, two insns execute in two cycles, reducing the
40 branch penalty to zero.
42 The SPARC always has a branch delay slot, but its effects can be
43 annulled when the branch is not taken. This means that failing to
44 find other sources of insns, we can hoist an insn from the branch
45 target that would only be safe to execute knowing that the branch
46 is taken.
48 The HP-PA always has a branch delay slot. For unconditional branches
49 its effects can be annulled when the branch is taken. The effects
50 of the delay slot in a conditional branch can be nullified for forward
51 taken branches, or for untaken backward branches. This means
52 we can hoist insns from the fall-through path for forward branches or
53 steal insns from the target of backward branches.
55 The TMS320C3x and C4x have three branch delay slots. When the three
56 slots are filled, the branch penalty is zero. Most insns can fill the
57 delay slots except jump insns.
59 Three techniques for filling delay slots have been implemented so far:
61 (1) `fill_simple_delay_slots' is the simplest, most efficient way
62 to fill delay slots. This pass first looks for insns which come
63 from before the branch and which are safe to execute after the
64 branch. Then it searches after the insn requiring delay slots or,
65 in the case of a branch, for insns that are after the point at
66 which the branch merges into the fallthrough code, if such a point
67 exists. When such insns are found, the branch penalty decreases
68 and no code expansion takes place.
70 (2) `fill_eager_delay_slots' is more complicated: it is used for
71 scheduling conditional jumps, or for scheduling jumps which cannot
72 be filled using (1). A machine need not have annulled jumps to use
73 this strategy, but it helps (by keeping more options open).
74 `fill_eager_delay_slots' tries to guess the direction the branch
75 will go; if it guesses right 100% of the time, it can reduce the
76 branch penalty as much as `fill_simple_delay_slots' does. If it
77 guesses wrong 100% of the time, it might as well schedule nops. When
78 `fill_eager_delay_slots' takes insns from the fall-through path of
79 the jump, usually there is no code expansion; when it takes insns
80 from the branch target, there is code expansion if it is not the
81 only way to reach that target.
83 (3) `relax_delay_slots' uses a set of rules to simplify code that
84 has been reorganized by (1) and (2). It finds cases where
85 conditional test can be eliminated, jumps can be threaded, extra
86 insns can be eliminated, etc. It is the job of (1) and (2) to do a
87 good job of scheduling locally; `relax_delay_slots' takes care of
88 making the various individual schedules work well together. It is
89 especially tuned to handle the control flow interactions of branch
90 insns. It does nothing for insns with delay slots that do not
91 branch.
93 On machines that use CC0, we are very conservative. We will not make
94 a copy of an insn involving CC0 since we want to maintain a 1-1
95 correspondence between the insn that sets and uses CC0. The insns are
96 allowed to be separated by placing an insn that sets CC0 (but not an insn
97 that uses CC0; we could do this, but it doesn't seem worthwhile) in a
98 delay slot. In that case, we point each insn at the other with REG_CC_USER
99 and REG_CC_SETTER notes. Note that these restrictions affect very few
100 machines because most RISC machines with delay slots will not use CC0
101 (the RT is the only known exception at this point). */
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "tm.h"
107 #include "diagnostic-core.h"
108 #include "rtl.h"
109 #include "tm_p.h"
110 #include "expr.h"
111 #include "hashtab.h"
112 #include "hash-set.h"
113 #include "vec.h"
114 #include "machmode.h"
115 #include "hard-reg-set.h"
116 #include "input.h"
117 #include "function.h"
118 #include "insn-config.h"
119 #include "conditions.h"
120 #include "predict.h"
121 #include "dominance.h"
122 #include "cfg.h"
123 #include "basic-block.h"
124 #include "regs.h"
125 #include "recog.h"
126 #include "flags.h"
127 #include "obstack.h"
128 #include "insn-attr.h"
129 #include "resource.h"
130 #include "except.h"
131 #include "params.h"
132 #include "target.h"
133 #include "tree-pass.h"
134 #include "emit-rtl.h"
136 #ifdef DELAY_SLOTS
138 #ifndef ANNUL_IFTRUE_SLOTS
139 #define eligible_for_annul_true(INSN, SLOTS, TRIAL, FLAGS) 0
140 #endif
141 #ifndef ANNUL_IFFALSE_SLOTS
142 #define eligible_for_annul_false(INSN, SLOTS, TRIAL, FLAGS) 0
143 #endif
146 /* First, some functions that were used before GCC got a control flow graph.
147 These functions are now only used here in reorg.c, and have therefore
148 been moved here to avoid inadvertent misuse elsewhere in the compiler. */
150 /* Return the last label to mark the same position as LABEL. Return LABEL
151 itself if it is null or any return rtx. */
153 static rtx
154 skip_consecutive_labels (rtx label_or_return)
156 rtx_insn *insn;
158 if (label_or_return && ANY_RETURN_P (label_or_return))
159 return label_or_return;
161 rtx_insn *label = as_a <rtx_insn *> (label_or_return);
163 for (insn = label; insn != 0 && !INSN_P (insn); insn = NEXT_INSN (insn))
164 if (LABEL_P (insn))
165 label = insn;
167 return label;
170 #ifdef HAVE_cc0
171 /* INSN uses CC0 and is being moved into a delay slot. Set up REG_CC_SETTER
172 and REG_CC_USER notes so we can find it. */
174 static void
175 link_cc0_insns (rtx insn)
177 rtx user = next_nonnote_insn (insn);
179 if (NONJUMP_INSN_P (user) && GET_CODE (PATTERN (user)) == SEQUENCE)
180 user = XVECEXP (PATTERN (user), 0, 0);
182 add_reg_note (user, REG_CC_SETTER, insn);
183 add_reg_note (insn, REG_CC_USER, user);
185 #endif
187 /* Insns which have delay slots that have not yet been filled. */
189 static struct obstack unfilled_slots_obstack;
190 static rtx *unfilled_firstobj;
192 /* Define macros to refer to the first and last slot containing unfilled
193 insns. These are used because the list may move and its address
194 should be recomputed at each use. */
196 #define unfilled_slots_base \
197 ((rtx_insn **) obstack_base (&unfilled_slots_obstack))
199 #define unfilled_slots_next \
200 ((rtx_insn **) obstack_next_free (&unfilled_slots_obstack))
202 /* Points to the label before the end of the function, or before a
203 return insn. */
204 static rtx_code_label *function_return_label;
205 /* Likewise for a simple_return. */
206 static rtx_code_label *function_simple_return_label;
208 /* Mapping between INSN_UID's and position in the code since INSN_UID's do
209 not always monotonically increase. */
210 static int *uid_to_ruid;
212 /* Highest valid index in `uid_to_ruid'. */
213 static int max_uid;
215 static int stop_search_p (rtx, int);
216 static int resource_conflicts_p (struct resources *, struct resources *);
217 static int insn_references_resource_p (rtx, struct resources *, bool);
218 static int insn_sets_resource_p (rtx, struct resources *, bool);
219 static rtx_code_label *find_end_label (rtx);
220 static rtx_insn *emit_delay_sequence (rtx_insn *, rtx_insn_list *, int);
221 static rtx_insn_list *add_to_delay_list (rtx_insn *, rtx_insn_list *);
222 static rtx_insn *delete_from_delay_slot (rtx_insn *);
223 static void delete_scheduled_jump (rtx_insn *);
224 static void note_delay_statistics (int, int);
225 #if defined(ANNUL_IFFALSE_SLOTS) || defined(ANNUL_IFTRUE_SLOTS)
226 static rtx_insn_list *optimize_skip (rtx_insn *);
227 #endif
228 static int get_jump_flags (const rtx_insn *, rtx);
229 static int mostly_true_jump (rtx);
230 static rtx get_branch_condition (const rtx_insn *, rtx);
231 static int condition_dominates_p (rtx, const rtx_insn *);
232 static int redirect_with_delay_slots_safe_p (rtx_insn *, rtx, rtx);
233 static int redirect_with_delay_list_safe_p (rtx_insn *, rtx, rtx_insn_list *);
234 static int check_annul_list_true_false (int, rtx);
235 static rtx_insn_list *steal_delay_list_from_target (rtx_insn *, rtx,
236 rtx_sequence *,
237 rtx_insn_list *,
238 struct resources *,
239 struct resources *,
240 struct resources *,
241 int, int *, int *,
242 rtx *);
243 static rtx_insn_list *steal_delay_list_from_fallthrough (rtx_insn *, rtx,
244 rtx_sequence *,
245 rtx_insn_list *,
246 struct resources *,
247 struct resources *,
248 struct resources *,
249 int, int *, int *);
250 static void try_merge_delay_insns (rtx, rtx_insn *);
251 static rtx redundant_insn (rtx, rtx_insn *, rtx);
252 static int own_thread_p (rtx, rtx, int);
253 static void update_block (rtx_insn *, rtx);
254 static int reorg_redirect_jump (rtx_insn *, rtx);
255 static void update_reg_dead_notes (rtx, rtx);
256 static void fix_reg_dead_note (rtx, rtx);
257 static void update_reg_unused_notes (rtx, rtx);
258 static void fill_simple_delay_slots (int);
259 static rtx_insn_list *fill_slots_from_thread (rtx_insn *, rtx, rtx, rtx,
260 int, int, int, int,
261 int *, rtx_insn_list *);
262 static void fill_eager_delay_slots (void);
263 static void relax_delay_slots (rtx_insn *);
264 static void make_return_insns (rtx_insn *);
266 /* A wrapper around next_active_insn which takes care to return ret_rtx
267 unchanged. */
269 static rtx
270 first_active_target_insn (rtx insn)
272 if (ANY_RETURN_P (insn))
273 return insn;
274 return next_active_insn (as_a <rtx_insn *> (insn));
277 /* Return true iff INSN is a simplejump, or any kind of return insn. */
279 static bool
280 simplejump_or_return_p (rtx insn)
282 return (JUMP_P (insn)
283 && (simplejump_p (as_a <rtx_insn *> (insn))
284 || ANY_RETURN_P (PATTERN (insn))));
287 /* Return TRUE if this insn should stop the search for insn to fill delay
288 slots. LABELS_P indicates that labels should terminate the search.
289 In all cases, jumps terminate the search. */
291 static int
292 stop_search_p (rtx insn, int labels_p)
294 if (insn == 0)
295 return 1;
297 /* If the insn can throw an exception that is caught within the function,
298 it may effectively perform a jump from the viewpoint of the function.
299 Therefore act like for a jump. */
300 if (can_throw_internal (insn))
301 return 1;
303 switch (GET_CODE (insn))
305 case NOTE:
306 case CALL_INSN:
307 return 0;
309 case CODE_LABEL:
310 return labels_p;
312 case JUMP_INSN:
313 case BARRIER:
314 return 1;
316 case INSN:
317 /* OK unless it contains a delay slot or is an `asm' insn of some type.
318 We don't know anything about these. */
319 return (GET_CODE (PATTERN (insn)) == SEQUENCE
320 || GET_CODE (PATTERN (insn)) == ASM_INPUT
321 || asm_noperands (PATTERN (insn)) >= 0);
323 default:
324 gcc_unreachable ();
328 /* Return TRUE if any resources are marked in both RES1 and RES2 or if either
329 resource set contains a volatile memory reference. Otherwise, return FALSE. */
331 static int
332 resource_conflicts_p (struct resources *res1, struct resources *res2)
334 if ((res1->cc && res2->cc) || (res1->memory && res2->memory)
335 || res1->volatil || res2->volatil)
336 return 1;
338 return hard_reg_set_intersect_p (res1->regs, res2->regs);
341 /* Return TRUE if any resource marked in RES, a `struct resources', is
342 referenced by INSN. If INCLUDE_DELAYED_EFFECTS is set, return if the called
343 routine is using those resources.
345 We compute this by computing all the resources referenced by INSN and
346 seeing if this conflicts with RES. It might be faster to directly check
347 ourselves, and this is the way it used to work, but it means duplicating
348 a large block of complex code. */
350 static int
351 insn_references_resource_p (rtx insn, struct resources *res,
352 bool include_delayed_effects)
354 struct resources insn_res;
356 CLEAR_RESOURCE (&insn_res);
357 mark_referenced_resources (insn, &insn_res, include_delayed_effects);
358 return resource_conflicts_p (&insn_res, res);
361 /* Return TRUE if INSN modifies resources that are marked in RES.
362 INCLUDE_DELAYED_EFFECTS is set if the actions of that routine should be
363 included. CC0 is only modified if it is explicitly set; see comments
364 in front of mark_set_resources for details. */
366 static int
367 insn_sets_resource_p (rtx insn, struct resources *res,
368 bool include_delayed_effects)
370 struct resources insn_sets;
372 CLEAR_RESOURCE (&insn_sets);
373 mark_set_resources (insn, &insn_sets, 0,
374 (include_delayed_effects
375 ? MARK_SRC_DEST_CALL
376 : MARK_SRC_DEST));
377 return resource_conflicts_p (&insn_sets, res);
380 /* Find a label at the end of the function or before a RETURN. If there
381 is none, try to make one. If that fails, returns 0.
383 The property of such a label is that it is placed just before the
384 epilogue or a bare RETURN insn, so that another bare RETURN can be
385 turned into a jump to the label unconditionally. In particular, the
386 label cannot be placed before a RETURN insn with a filled delay slot.
388 ??? There may be a problem with the current implementation. Suppose
389 we start with a bare RETURN insn and call find_end_label. It may set
390 function_return_label just before the RETURN. Suppose the machinery
391 is able to fill the delay slot of the RETURN insn afterwards. Then
392 function_return_label is no longer valid according to the property
393 described above and find_end_label will still return it unmodified.
394 Note that this is probably mitigated by the following observation:
395 once function_return_label is made, it is very likely the target of
396 a jump, so filling the delay slot of the RETURN will be much more
397 difficult.
398 KIND is either simple_return_rtx or ret_rtx, indicating which type of
399 return we're looking for. */
401 static rtx_code_label *
402 find_end_label (rtx kind)
404 rtx_insn *insn;
405 rtx_code_label **plabel;
407 if (kind == ret_rtx)
408 plabel = &function_return_label;
409 else
411 gcc_assert (kind == simple_return_rtx);
412 plabel = &function_simple_return_label;
415 /* If we found one previously, return it. */
416 if (*plabel)
417 return *plabel;
419 /* Otherwise, see if there is a label at the end of the function. If there
420 is, it must be that RETURN insns aren't needed, so that is our return
421 label and we don't have to do anything else. */
423 insn = get_last_insn ();
424 while (NOTE_P (insn)
425 || (NONJUMP_INSN_P (insn)
426 && (GET_CODE (PATTERN (insn)) == USE
427 || GET_CODE (PATTERN (insn)) == CLOBBER)))
428 insn = PREV_INSN (insn);
430 /* When a target threads its epilogue we might already have a
431 suitable return insn. If so put a label before it for the
432 function_return_label. */
433 if (BARRIER_P (insn)
434 && JUMP_P (PREV_INSN (insn))
435 && PATTERN (PREV_INSN (insn)) == kind)
437 rtx_insn *temp = PREV_INSN (PREV_INSN (insn));
438 rtx_code_label *label = gen_label_rtx ();
439 LABEL_NUSES (label) = 0;
441 /* Put the label before any USE insns that may precede the RETURN
442 insn. */
443 while (GET_CODE (temp) == USE)
444 temp = PREV_INSN (temp);
446 emit_label_after (label, temp);
447 *plabel = label;
450 else if (LABEL_P (insn))
451 *plabel = as_a <rtx_code_label *> (insn);
452 else
454 rtx_code_label *label = gen_label_rtx ();
455 LABEL_NUSES (label) = 0;
456 /* If the basic block reorder pass moves the return insn to
457 some other place try to locate it again and put our
458 function_return_label there. */
459 while (insn && ! (JUMP_P (insn) && (PATTERN (insn) == kind)))
460 insn = PREV_INSN (insn);
461 if (insn)
463 insn = PREV_INSN (insn);
465 /* Put the label before any USE insns that may precede the
466 RETURN insn. */
467 while (GET_CODE (insn) == USE)
468 insn = PREV_INSN (insn);
470 emit_label_after (label, insn);
472 else
474 #ifdef HAVE_epilogue
475 if (HAVE_epilogue
476 #ifdef HAVE_return
477 && ! HAVE_return
478 #endif
480 /* The RETURN insn has its delay slot filled so we cannot
481 emit the label just before it. Since we already have
482 an epilogue and cannot emit a new RETURN, we cannot
483 emit the label at all. */
484 return NULL;
485 #endif /* HAVE_epilogue */
487 /* Otherwise, make a new label and emit a RETURN and BARRIER,
488 if needed. */
489 emit_label (label);
490 #ifdef HAVE_return
491 if (HAVE_return)
493 /* The return we make may have delay slots too. */
494 rtx pat = gen_return ();
495 rtx_insn *insn = emit_jump_insn (pat);
496 set_return_jump_label (insn);
497 emit_barrier ();
498 if (num_delay_slots (insn) > 0)
499 obstack_ptr_grow (&unfilled_slots_obstack, insn);
501 #endif
503 *plabel = label;
506 /* Show one additional use for this label so it won't go away until
507 we are done. */
508 ++LABEL_NUSES (*plabel);
510 return *plabel;
513 /* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace
514 the pattern of INSN with the SEQUENCE.
516 Returns the insn containing the SEQUENCE that replaces INSN. */
518 static rtx_insn *
519 emit_delay_sequence (rtx_insn *insn, rtx_insn_list *list, int length)
521 /* Allocate the rtvec to hold the insns and the SEQUENCE. */
522 rtvec seqv = rtvec_alloc (length + 1);
523 rtx seq = gen_rtx_SEQUENCE (VOIDmode, seqv);
524 rtx_insn *seq_insn = make_insn_raw (seq);
526 /* If DELAY_INSN has a location, use it for SEQ_INSN. If DELAY_INSN does
527 not have a location, but one of the delayed insns does, we pick up a
528 location from there later. */
529 INSN_LOCATION (seq_insn) = INSN_LOCATION (insn);
531 /* Unlink INSN from the insn chain, so that we can put it into
532 the SEQUENCE. Remember where we want to emit SEQUENCE in AFTER. */
533 rtx after = PREV_INSN (insn);
534 remove_insn (insn);
535 SET_NEXT_INSN (insn) = SET_PREV_INSN (insn) = NULL;
537 /* Build our SEQUENCE and rebuild the insn chain. */
538 int i = 1;
539 start_sequence ();
540 XVECEXP (seq, 0, 0) = emit_insn (insn);
541 for (rtx_insn_list *li = list; li; li = li->next (), i++)
543 rtx_insn *tem = li->insn ();
544 rtx note, next;
546 /* Show that this copy of the insn isn't deleted. */
547 tem->set_undeleted ();
549 /* Unlink insn from its original place, and re-emit it into
550 the sequence. */
551 SET_NEXT_INSN (tem) = SET_PREV_INSN (tem) = NULL;
552 XVECEXP (seq, 0, i) = emit_insn (tem);
554 /* SPARC assembler, for instance, emit warning when debug info is output
555 into the delay slot. */
556 if (INSN_LOCATION (tem) && !INSN_LOCATION (seq_insn))
557 INSN_LOCATION (seq_insn) = INSN_LOCATION (tem);
558 INSN_LOCATION (tem) = 0;
560 for (note = REG_NOTES (tem); note; note = next)
562 next = XEXP (note, 1);
563 switch (REG_NOTE_KIND (note))
565 case REG_DEAD:
566 /* Remove any REG_DEAD notes because we can't rely on them now
567 that the insn has been moved. */
568 remove_note (tem, note);
569 break;
571 case REG_LABEL_OPERAND:
572 case REG_LABEL_TARGET:
573 /* Keep the label reference count up to date. */
574 if (LABEL_P (XEXP (note, 0)))
575 LABEL_NUSES (XEXP (note, 0)) ++;
576 break;
578 default:
579 break;
583 end_sequence ();
584 gcc_assert (i == length + 1);
586 /* Splice our SEQUENCE into the insn stream where INSN used to be. */
587 add_insn_after (seq_insn, after, NULL);
589 return seq_insn;
592 /* Add INSN to DELAY_LIST and return the head of the new list. The list must
593 be in the order in which the insns are to be executed. */
595 static rtx_insn_list *
596 add_to_delay_list (rtx_insn *insn, rtx_insn_list *delay_list)
598 /* If we have an empty list, just make a new list element. If
599 INSN has its block number recorded, clear it since we may
600 be moving the insn to a new block. */
602 if (delay_list == 0)
604 clear_hashed_info_for_insn (insn);
605 return gen_rtx_INSN_LIST (VOIDmode, insn, NULL_RTX);
608 /* Otherwise this must be an INSN_LIST. Add INSN to the end of the
609 list. */
610 XEXP (delay_list, 1) = add_to_delay_list (insn, delay_list->next ());
612 return delay_list;
615 /* Delete INSN from the delay slot of the insn that it is in, which may
616 produce an insn with no delay slots. Return the new insn. */
618 static rtx_insn *
619 delete_from_delay_slot (rtx_insn *insn)
621 rtx_insn *trial, *seq_insn, *prev;
622 rtx_sequence *seq;
623 rtx_insn_list *delay_list = 0;
624 int i;
625 int had_barrier = 0;
627 /* We first must find the insn containing the SEQUENCE with INSN in its
628 delay slot. Do this by finding an insn, TRIAL, where
629 PREV_INSN (NEXT_INSN (TRIAL)) != TRIAL. */
631 for (trial = insn;
632 PREV_INSN (NEXT_INSN (trial)) == trial;
633 trial = NEXT_INSN (trial))
636 seq_insn = PREV_INSN (NEXT_INSN (trial));
637 seq = as_a <rtx_sequence *> (PATTERN (seq_insn));
639 if (NEXT_INSN (seq_insn) && BARRIER_P (NEXT_INSN (seq_insn)))
640 had_barrier = 1;
642 /* Create a delay list consisting of all the insns other than the one
643 we are deleting (unless we were the only one). */
644 if (seq->len () > 2)
645 for (i = 1; i < seq->len (); i++)
646 if (seq->insn (i) != insn)
647 delay_list = add_to_delay_list (seq->insn (i), delay_list);
649 /* Delete the old SEQUENCE, re-emit the insn that used to have the delay
650 list, and rebuild the delay list if non-empty. */
651 prev = PREV_INSN (seq_insn);
652 trial = seq->insn (0);
653 delete_related_insns (seq_insn);
654 add_insn_after (trial, prev, NULL);
656 /* If there was a barrier after the old SEQUENCE, remit it. */
657 if (had_barrier)
658 emit_barrier_after (trial);
660 /* If there are any delay insns, remit them. Otherwise clear the
661 annul flag. */
662 if (delay_list)
663 trial = emit_delay_sequence (trial, delay_list, XVECLEN (seq, 0) - 2);
664 else if (JUMP_P (trial))
665 INSN_ANNULLED_BRANCH_P (trial) = 0;
667 INSN_FROM_TARGET_P (insn) = 0;
669 /* Show we need to fill this insn again. */
670 obstack_ptr_grow (&unfilled_slots_obstack, trial);
672 return trial;
675 /* Delete INSN, a JUMP_INSN. If it is a conditional jump, we must track down
676 the insn that sets CC0 for it and delete it too. */
678 static void
679 delete_scheduled_jump (rtx_insn *insn)
681 /* Delete the insn that sets cc0 for us. On machines without cc0, we could
682 delete the insn that sets the condition code, but it is hard to find it.
683 Since this case is rare anyway, don't bother trying; there would likely
684 be other insns that became dead anyway, which we wouldn't know to
685 delete. */
687 #ifdef HAVE_cc0
688 if (reg_mentioned_p (cc0_rtx, insn))
690 rtx note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
692 /* If a reg-note was found, it points to an insn to set CC0. This
693 insn is in the delay list of some other insn. So delete it from
694 the delay list it was in. */
695 if (note)
697 if (! FIND_REG_INC_NOTE (XEXP (note, 0), NULL_RTX)
698 && sets_cc0_p (PATTERN (XEXP (note, 0))) == 1)
699 delete_from_delay_slot (as_a <rtx_insn *> (XEXP (note, 0)));
701 else
703 /* The insn setting CC0 is our previous insn, but it may be in
704 a delay slot. It will be the last insn in the delay slot, if
705 it is. */
706 rtx_insn *trial = previous_insn (insn);
707 if (NOTE_P (trial))
708 trial = prev_nonnote_insn (trial);
709 if (sets_cc0_p (PATTERN (trial)) != 1
710 || FIND_REG_INC_NOTE (trial, NULL_RTX))
711 return;
712 if (PREV_INSN (NEXT_INSN (trial)) == trial)
713 delete_related_insns (trial);
714 else
715 delete_from_delay_slot (trial);
718 #endif
720 delete_related_insns (insn);
723 /* Counters for delay-slot filling. */
725 #define NUM_REORG_FUNCTIONS 2
726 #define MAX_DELAY_HISTOGRAM 3
727 #define MAX_REORG_PASSES 2
729 static int num_insns_needing_delays[NUM_REORG_FUNCTIONS][MAX_REORG_PASSES];
731 static int num_filled_delays[NUM_REORG_FUNCTIONS][MAX_DELAY_HISTOGRAM+1][MAX_REORG_PASSES];
733 static int reorg_pass_number;
735 static void
736 note_delay_statistics (int slots_filled, int index)
738 num_insns_needing_delays[index][reorg_pass_number]++;
739 if (slots_filled > MAX_DELAY_HISTOGRAM)
740 slots_filled = MAX_DELAY_HISTOGRAM;
741 num_filled_delays[index][slots_filled][reorg_pass_number]++;
744 #if defined(ANNUL_IFFALSE_SLOTS) || defined(ANNUL_IFTRUE_SLOTS)
746 /* Optimize the following cases:
748 1. When a conditional branch skips over only one instruction,
749 use an annulling branch and put that insn in the delay slot.
750 Use either a branch that annuls when the condition if true or
751 invert the test with a branch that annuls when the condition is
752 false. This saves insns, since otherwise we must copy an insn
753 from the L1 target.
755 (orig) (skip) (otherwise)
756 Bcc.n L1 Bcc',a L1 Bcc,a L1'
757 insn insn insn2
758 L1: L1: L1:
759 insn2 insn2 insn2
760 insn3 insn3 L1':
761 insn3
763 2. When a conditional branch skips over only one instruction,
764 and after that, it unconditionally branches somewhere else,
765 perform the similar optimization. This saves executing the
766 second branch in the case where the inverted condition is true.
768 Bcc.n L1 Bcc',a L2
769 insn insn
770 L1: L1:
771 Bra L2 Bra L2
773 INSN is a JUMP_INSN.
775 This should be expanded to skip over N insns, where N is the number
776 of delay slots required. */
778 static rtx_insn_list *
779 optimize_skip (rtx_insn *insn)
781 rtx_insn *trial = next_nonnote_insn (insn);
782 rtx_insn *next_trial = next_active_insn (trial);
783 rtx_insn_list *delay_list = 0;
784 int flags;
786 flags = get_jump_flags (insn, JUMP_LABEL (insn));
788 if (trial == 0
789 || !NONJUMP_INSN_P (trial)
790 || GET_CODE (PATTERN (trial)) == SEQUENCE
791 || recog_memoized (trial) < 0
792 || (! eligible_for_annul_false (insn, 0, trial, flags)
793 && ! eligible_for_annul_true (insn, 0, trial, flags))
794 || can_throw_internal (trial))
795 return 0;
797 /* There are two cases where we are just executing one insn (we assume
798 here that a branch requires only one insn; this should be generalized
799 at some point): Where the branch goes around a single insn or where
800 we have one insn followed by a branch to the same label we branch to.
801 In both of these cases, inverting the jump and annulling the delay
802 slot give the same effect in fewer insns. */
803 if (next_trial == next_active_insn (JUMP_LABEL (insn))
804 || (next_trial != 0
805 && simplejump_or_return_p (next_trial)
806 && JUMP_LABEL (insn) == JUMP_LABEL (next_trial)))
808 if (eligible_for_annul_false (insn, 0, trial, flags))
810 if (invert_jump (insn, JUMP_LABEL (insn), 1))
811 INSN_FROM_TARGET_P (trial) = 1;
812 else if (! eligible_for_annul_true (insn, 0, trial, flags))
813 return 0;
816 delay_list = add_to_delay_list (trial, NULL);
817 next_trial = next_active_insn (trial);
818 update_block (trial, trial);
819 delete_related_insns (trial);
821 /* Also, if we are targeting an unconditional
822 branch, thread our jump to the target of that branch. Don't
823 change this into a RETURN here, because it may not accept what
824 we have in the delay slot. We'll fix this up later. */
825 if (next_trial && simplejump_or_return_p (next_trial))
827 rtx target_label = JUMP_LABEL (next_trial);
828 if (ANY_RETURN_P (target_label))
829 target_label = find_end_label (target_label);
831 if (target_label)
833 /* Recompute the flags based on TARGET_LABEL since threading
834 the jump to TARGET_LABEL may change the direction of the
835 jump (which may change the circumstances in which the
836 delay slot is nullified). */
837 flags = get_jump_flags (insn, target_label);
838 if (eligible_for_annul_true (insn, 0, trial, flags))
839 reorg_redirect_jump (insn, target_label);
843 INSN_ANNULLED_BRANCH_P (insn) = 1;
846 return delay_list;
848 #endif
850 /* Encode and return branch direction and prediction information for
851 INSN assuming it will jump to LABEL.
853 Non conditional branches return no direction information and
854 are predicted as very likely taken. */
856 static int
857 get_jump_flags (const rtx_insn *insn, rtx label)
859 int flags;
861 /* get_jump_flags can be passed any insn with delay slots, these may
862 be INSNs, CALL_INSNs, or JUMP_INSNs. Only JUMP_INSNs have branch
863 direction information, and only if they are conditional jumps.
865 If LABEL is a return, then there is no way to determine the branch
866 direction. */
867 if (JUMP_P (insn)
868 && (condjump_p (insn) || condjump_in_parallel_p (insn))
869 && !ANY_RETURN_P (label)
870 && INSN_UID (insn) <= max_uid
871 && INSN_UID (label) <= max_uid)
872 flags
873 = (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)])
874 ? ATTR_FLAG_forward : ATTR_FLAG_backward;
875 /* No valid direction information. */
876 else
877 flags = 0;
879 return flags;
882 /* Return truth value of the statement that this branch
883 is mostly taken. If we think that the branch is extremely likely
884 to be taken, we return 2. If the branch is slightly more likely to be
885 taken, return 1. If the branch is slightly less likely to be taken,
886 return 0 and if the branch is highly unlikely to be taken, return -1. */
888 static int
889 mostly_true_jump (rtx jump_insn)
891 /* If branch probabilities are available, then use that number since it
892 always gives a correct answer. */
893 rtx note = find_reg_note (jump_insn, REG_BR_PROB, 0);
894 if (note)
896 int prob = XINT (note, 0);
898 if (prob >= REG_BR_PROB_BASE * 9 / 10)
899 return 2;
900 else if (prob >= REG_BR_PROB_BASE / 2)
901 return 1;
902 else if (prob >= REG_BR_PROB_BASE / 10)
903 return 0;
904 else
905 return -1;
908 /* If there is no note, assume branches are not taken.
909 This should be rare. */
910 return 0;
913 /* Return the condition under which INSN will branch to TARGET. If TARGET
914 is zero, return the condition under which INSN will return. If INSN is
915 an unconditional branch, return const_true_rtx. If INSN isn't a simple
916 type of jump, or it doesn't go to TARGET, return 0. */
918 static rtx
919 get_branch_condition (const rtx_insn *insn, rtx target)
921 rtx pat = PATTERN (insn);
922 rtx src;
924 if (condjump_in_parallel_p (insn))
925 pat = XVECEXP (pat, 0, 0);
927 if (ANY_RETURN_P (pat) && pat == target)
928 return const_true_rtx;
930 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
931 return 0;
933 src = SET_SRC (pat);
934 if (GET_CODE (src) == LABEL_REF && LABEL_REF_LABEL (src) == target)
935 return const_true_rtx;
937 else if (GET_CODE (src) == IF_THEN_ELSE
938 && XEXP (src, 2) == pc_rtx
939 && ((GET_CODE (XEXP (src, 1)) == LABEL_REF
940 && LABEL_REF_LABEL (XEXP (src, 1)) == target)
941 || (ANY_RETURN_P (XEXP (src, 1)) && XEXP (src, 1) == target)))
942 return XEXP (src, 0);
944 else if (GET_CODE (src) == IF_THEN_ELSE
945 && XEXP (src, 1) == pc_rtx
946 && ((GET_CODE (XEXP (src, 2)) == LABEL_REF
947 && LABEL_REF_LABEL (XEXP (src, 2)) == target)
948 || (ANY_RETURN_P (XEXP (src, 2)) && XEXP (src, 2) == target)))
950 enum rtx_code rev;
951 rev = reversed_comparison_code (XEXP (src, 0), insn);
952 if (rev != UNKNOWN)
953 return gen_rtx_fmt_ee (rev, GET_MODE (XEXP (src, 0)),
954 XEXP (XEXP (src, 0), 0),
955 XEXP (XEXP (src, 0), 1));
958 return 0;
961 /* Return nonzero if CONDITION is more strict than the condition of
962 INSN, i.e., if INSN will always branch if CONDITION is true. */
964 static int
965 condition_dominates_p (rtx condition, const rtx_insn *insn)
967 rtx other_condition = get_branch_condition (insn, JUMP_LABEL (insn));
968 enum rtx_code code = GET_CODE (condition);
969 enum rtx_code other_code;
971 if (rtx_equal_p (condition, other_condition)
972 || other_condition == const_true_rtx)
973 return 1;
975 else if (condition == const_true_rtx || other_condition == 0)
976 return 0;
978 other_code = GET_CODE (other_condition);
979 if (GET_RTX_LENGTH (code) != 2 || GET_RTX_LENGTH (other_code) != 2
980 || ! rtx_equal_p (XEXP (condition, 0), XEXP (other_condition, 0))
981 || ! rtx_equal_p (XEXP (condition, 1), XEXP (other_condition, 1)))
982 return 0;
984 return comparison_dominates_p (code, other_code);
987 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
988 any insns already in the delay slot of JUMP. */
990 static int
991 redirect_with_delay_slots_safe_p (rtx_insn *jump, rtx newlabel, rtx seq)
993 int flags, i;
994 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (seq));
996 /* Make sure all the delay slots of this jump would still
997 be valid after threading the jump. If they are still
998 valid, then return nonzero. */
1000 flags = get_jump_flags (jump, newlabel);
1001 for (i = 1; i < pat->len (); i++)
1002 if (! (
1003 #ifdef ANNUL_IFFALSE_SLOTS
1004 (INSN_ANNULLED_BRANCH_P (jump)
1005 && INSN_FROM_TARGET_P (pat->insn (i)))
1006 ? eligible_for_annul_false (jump, i - 1, pat->insn (i), flags) :
1007 #endif
1008 #ifdef ANNUL_IFTRUE_SLOTS
1009 (INSN_ANNULLED_BRANCH_P (jump)
1010 && ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
1011 ? eligible_for_annul_true (jump, i - 1, pat->insn (i), flags) :
1012 #endif
1013 eligible_for_delay (jump, i - 1, pat->insn (i), flags)))
1014 break;
1016 return (i == pat->len ());
1019 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
1020 any insns we wish to place in the delay slot of JUMP. */
1022 static int
1023 redirect_with_delay_list_safe_p (rtx_insn *jump, rtx newlabel,
1024 rtx_insn_list *delay_list)
1026 int flags, i;
1027 rtx_insn_list *li;
1029 /* Make sure all the insns in DELAY_LIST would still be
1030 valid after threading the jump. If they are still
1031 valid, then return nonzero. */
1033 flags = get_jump_flags (jump, newlabel);
1034 for (li = delay_list, i = 0; li; li = li->next (), i++)
1035 if (! (
1036 #ifdef ANNUL_IFFALSE_SLOTS
1037 (INSN_ANNULLED_BRANCH_P (jump)
1038 && INSN_FROM_TARGET_P (li->insn ()))
1039 ? eligible_for_annul_false (jump, i, li->insn (), flags) :
1040 #endif
1041 #ifdef ANNUL_IFTRUE_SLOTS
1042 (INSN_ANNULLED_BRANCH_P (jump)
1043 && ! INSN_FROM_TARGET_P (XEXP (li, 0)))
1044 ? eligible_for_annul_true (jump, i, li->insn (), flags) :
1045 #endif
1046 eligible_for_delay (jump, i, li->insn (), flags)))
1047 break;
1049 return (li == NULL);
1052 /* DELAY_LIST is a list of insns that have already been placed into delay
1053 slots. See if all of them have the same annulling status as ANNUL_TRUE_P.
1054 If not, return 0; otherwise return 1. */
1056 static int
1057 check_annul_list_true_false (int annul_true_p, rtx delay_list)
1059 rtx temp;
1061 if (delay_list)
1063 for (temp = delay_list; temp; temp = XEXP (temp, 1))
1065 rtx trial = XEXP (temp, 0);
1067 if ((annul_true_p && INSN_FROM_TARGET_P (trial))
1068 || (!annul_true_p && !INSN_FROM_TARGET_P (trial)))
1069 return 0;
1073 return 1;
1076 /* INSN branches to an insn whose pattern SEQ is a SEQUENCE. Given that
1077 the condition tested by INSN is CONDITION and the resources shown in
1078 OTHER_NEEDED are needed after INSN, see whether INSN can take all the insns
1079 from SEQ's delay list, in addition to whatever insns it may execute
1080 (in DELAY_LIST). SETS and NEEDED are denote resources already set and
1081 needed while searching for delay slot insns. Return the concatenated
1082 delay list if possible, otherwise, return 0.
1084 SLOTS_TO_FILL is the total number of slots required by INSN, and
1085 PSLOTS_FILLED points to the number filled so far (also the number of
1086 insns in DELAY_LIST). It is updated with the number that have been
1087 filled from the SEQUENCE, if any.
1089 PANNUL_P points to a nonzero value if we already know that we need
1090 to annul INSN. If this routine determines that annulling is needed,
1091 it may set that value nonzero.
1093 PNEW_THREAD points to a location that is to receive the place at which
1094 execution should continue. */
1096 static rtx_insn_list *
1097 steal_delay_list_from_target (rtx_insn *insn, rtx condition, rtx_sequence *seq,
1098 rtx_insn_list *delay_list, struct resources *sets,
1099 struct resources *needed,
1100 struct resources *other_needed,
1101 int slots_to_fill, int *pslots_filled,
1102 int *pannul_p, rtx *pnew_thread)
1104 int slots_remaining = slots_to_fill - *pslots_filled;
1105 int total_slots_filled = *pslots_filled;
1106 rtx_insn_list *new_delay_list = 0;
1107 int must_annul = *pannul_p;
1108 int used_annul = 0;
1109 int i;
1110 struct resources cc_set;
1111 bool *redundant;
1113 /* We can't do anything if there are more delay slots in SEQ than we
1114 can handle, or if we don't know that it will be a taken branch.
1115 We know that it will be a taken branch if it is either an unconditional
1116 branch or a conditional branch with a stricter branch condition.
1118 Also, exit if the branch has more than one set, since then it is computing
1119 other results that can't be ignored, e.g. the HPPA mov&branch instruction.
1120 ??? It may be possible to move other sets into INSN in addition to
1121 moving the instructions in the delay slots.
1123 We can not steal the delay list if one of the instructions in the
1124 current delay_list modifies the condition codes and the jump in the
1125 sequence is a conditional jump. We can not do this because we can
1126 not change the direction of the jump because the condition codes
1127 will effect the direction of the jump in the sequence. */
1129 CLEAR_RESOURCE (&cc_set);
1130 for (rtx_insn_list *temp = delay_list; temp; temp = temp->next ())
1132 rtx_insn *trial = temp->insn ();
1134 mark_set_resources (trial, &cc_set, 0, MARK_SRC_DEST_CALL);
1135 if (insn_references_resource_p (seq->insn (0), &cc_set, false))
1136 return delay_list;
1139 if (XVECLEN (seq, 0) - 1 > slots_remaining
1140 || ! condition_dominates_p (condition, seq->insn (0))
1141 || ! single_set (seq->insn (0)))
1142 return delay_list;
1144 #ifdef MD_CAN_REDIRECT_BRANCH
1145 /* On some targets, branches with delay slots can have a limited
1146 displacement. Give the back end a chance to tell us we can't do
1147 this. */
1148 if (! MD_CAN_REDIRECT_BRANCH (insn, seq->insn (0)))
1149 return delay_list;
1150 #endif
1152 redundant = XALLOCAVEC (bool, XVECLEN (seq, 0));
1153 for (i = 1; i < seq->len (); i++)
1155 rtx_insn *trial = seq->insn (i);
1156 int flags;
1158 if (insn_references_resource_p (trial, sets, false)
1159 || insn_sets_resource_p (trial, needed, false)
1160 || insn_sets_resource_p (trial, sets, false)
1161 #ifdef HAVE_cc0
1162 /* If TRIAL sets CC0, we can't copy it, so we can't steal this
1163 delay list. */
1164 || find_reg_note (trial, REG_CC_USER, NULL_RTX)
1165 #endif
1166 /* If TRIAL is from the fallthrough code of an annulled branch insn
1167 in SEQ, we cannot use it. */
1168 || (INSN_ANNULLED_BRANCH_P (seq->insn (0))
1169 && ! INSN_FROM_TARGET_P (trial)))
1170 return delay_list;
1172 /* If this insn was already done (usually in a previous delay slot),
1173 pretend we put it in our delay slot. */
1174 redundant[i] = redundant_insn (trial, insn, new_delay_list);
1175 if (redundant[i])
1176 continue;
1178 /* We will end up re-vectoring this branch, so compute flags
1179 based on jumping to the new label. */
1180 flags = get_jump_flags (insn, JUMP_LABEL (seq->insn (0)));
1182 if (! must_annul
1183 && ((condition == const_true_rtx
1184 || (! insn_sets_resource_p (trial, other_needed, false)
1185 && ! may_trap_or_fault_p (PATTERN (trial)))))
1186 ? eligible_for_delay (insn, total_slots_filled, trial, flags)
1187 : (must_annul || (delay_list == NULL && new_delay_list == NULL))
1188 && (must_annul = 1,
1189 check_annul_list_true_false (0, delay_list)
1190 && check_annul_list_true_false (0, new_delay_list)
1191 && eligible_for_annul_false (insn, total_slots_filled,
1192 trial, flags)))
1194 if (must_annul)
1195 used_annul = 1;
1196 rtx_insn *temp = copy_delay_slot_insn (trial);
1197 INSN_FROM_TARGET_P (temp) = 1;
1198 new_delay_list = add_to_delay_list (temp, new_delay_list);
1199 total_slots_filled++;
1201 if (--slots_remaining == 0)
1202 break;
1204 else
1205 return delay_list;
1208 /* Record the effect of the instructions that were redundant and which
1209 we therefore decided not to copy. */
1210 for (i = 1; i < seq->len (); i++)
1211 if (redundant[i])
1212 update_block (seq->insn (i), insn);
1214 /* Show the place to which we will be branching. */
1215 *pnew_thread = first_active_target_insn (JUMP_LABEL (seq->insn (0)));
1217 /* Add any new insns to the delay list and update the count of the
1218 number of slots filled. */
1219 *pslots_filled = total_slots_filled;
1220 if (used_annul)
1221 *pannul_p = 1;
1223 if (delay_list == 0)
1224 return new_delay_list;
1226 for (rtx_insn_list *temp = new_delay_list; temp; temp = temp->next ())
1227 delay_list = add_to_delay_list (temp->insn (), delay_list);
1229 return delay_list;
1232 /* Similar to steal_delay_list_from_target except that SEQ is on the
1233 fallthrough path of INSN. Here we only do something if the delay insn
1234 of SEQ is an unconditional branch. In that case we steal its delay slot
1235 for INSN since unconditional branches are much easier to fill. */
1237 static rtx_insn_list *
1238 steal_delay_list_from_fallthrough (rtx_insn *insn, rtx condition,
1239 rtx_sequence *seq,
1240 rtx_insn_list *delay_list,
1241 struct resources *sets,
1242 struct resources *needed,
1243 struct resources *other_needed,
1244 int slots_to_fill, int *pslots_filled,
1245 int *pannul_p)
1247 int i;
1248 int flags;
1249 int must_annul = *pannul_p;
1250 int used_annul = 0;
1252 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1254 /* We can't do anything if SEQ's delay insn isn't an
1255 unconditional branch. */
1257 if (! simplejump_or_return_p (seq->insn (0)))
1258 return delay_list;
1260 for (i = 1; i < seq->len (); i++)
1262 rtx_insn *trial = seq->insn (i);
1264 /* If TRIAL sets CC0, stealing it will move it too far from the use
1265 of CC0. */
1266 if (insn_references_resource_p (trial, sets, false)
1267 || insn_sets_resource_p (trial, needed, false)
1268 || insn_sets_resource_p (trial, sets, false)
1269 #ifdef HAVE_cc0
1270 || sets_cc0_p (PATTERN (trial))
1271 #endif
1274 break;
1276 /* If this insn was already done, we don't need it. */
1277 if (redundant_insn (trial, insn, delay_list))
1279 update_block (trial, insn);
1280 delete_from_delay_slot (trial);
1281 continue;
1284 if (! must_annul
1285 && ((condition == const_true_rtx
1286 || (! insn_sets_resource_p (trial, other_needed, false)
1287 && ! may_trap_or_fault_p (PATTERN (trial)))))
1288 ? eligible_for_delay (insn, *pslots_filled, trial, flags)
1289 : (must_annul || delay_list == NULL) && (must_annul = 1,
1290 check_annul_list_true_false (1, delay_list)
1291 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
1293 if (must_annul)
1294 used_annul = 1;
1295 delete_from_delay_slot (trial);
1296 delay_list = add_to_delay_list (trial, delay_list);
1298 if (++(*pslots_filled) == slots_to_fill)
1299 break;
1301 else
1302 break;
1305 if (used_annul)
1306 *pannul_p = 1;
1307 return delay_list;
1310 /* Try merging insns starting at THREAD which match exactly the insns in
1311 INSN's delay list.
1313 If all insns were matched and the insn was previously annulling, the
1314 annul bit will be cleared.
1316 For each insn that is merged, if the branch is or will be non-annulling,
1317 we delete the merged insn. */
1319 static void
1320 try_merge_delay_insns (rtx insn, rtx_insn *thread)
1322 rtx_insn *trial, *next_trial;
1323 rtx_insn *delay_insn = as_a <rtx_insn *> (XVECEXP (PATTERN (insn), 0, 0));
1324 int annul_p = JUMP_P (delay_insn) && INSN_ANNULLED_BRANCH_P (delay_insn);
1325 int slot_number = 1;
1326 int num_slots = XVECLEN (PATTERN (insn), 0);
1327 rtx next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1328 struct resources set, needed;
1329 rtx_insn_list *merged_insns = 0;
1330 int i;
1331 int flags;
1333 flags = get_jump_flags (delay_insn, JUMP_LABEL (delay_insn));
1335 CLEAR_RESOURCE (&needed);
1336 CLEAR_RESOURCE (&set);
1338 /* If this is not an annulling branch, take into account anything needed in
1339 INSN's delay slot. This prevents two increments from being incorrectly
1340 folded into one. If we are annulling, this would be the correct
1341 thing to do. (The alternative, looking at things set in NEXT_TO_MATCH
1342 will essentially disable this optimization. This method is somewhat of
1343 a kludge, but I don't see a better way.) */
1344 if (! annul_p)
1345 for (i = 1 ; i < num_slots; i++)
1346 if (XVECEXP (PATTERN (insn), 0, i))
1347 mark_referenced_resources (XVECEXP (PATTERN (insn), 0, i), &needed,
1348 true);
1350 for (trial = thread; !stop_search_p (trial, 1); trial = next_trial)
1352 rtx pat = PATTERN (trial);
1353 rtx oldtrial = trial;
1355 next_trial = next_nonnote_insn (trial);
1357 /* TRIAL must be a CALL_INSN or INSN. Skip USE and CLOBBER. */
1358 if (NONJUMP_INSN_P (trial)
1359 && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER))
1360 continue;
1362 if (GET_CODE (next_to_match) == GET_CODE (trial)
1363 #ifdef HAVE_cc0
1364 /* We can't share an insn that sets cc0. */
1365 && ! sets_cc0_p (pat)
1366 #endif
1367 && ! insn_references_resource_p (trial, &set, true)
1368 && ! insn_sets_resource_p (trial, &set, true)
1369 && ! insn_sets_resource_p (trial, &needed, true)
1370 && (trial = try_split (pat, trial, 0)) != 0
1371 /* Update next_trial, in case try_split succeeded. */
1372 && (next_trial = next_nonnote_insn (trial))
1373 /* Likewise THREAD. */
1374 && (thread = oldtrial == thread ? trial : thread)
1375 && rtx_equal_p (PATTERN (next_to_match), PATTERN (trial))
1376 /* Have to test this condition if annul condition is different
1377 from (and less restrictive than) non-annulling one. */
1378 && eligible_for_delay (delay_insn, slot_number - 1, trial, flags))
1381 if (! annul_p)
1383 update_block (trial, thread);
1384 if (trial == thread)
1385 thread = next_active_insn (thread);
1387 delete_related_insns (trial);
1388 INSN_FROM_TARGET_P (next_to_match) = 0;
1390 else
1391 merged_insns = gen_rtx_INSN_LIST (VOIDmode, trial, merged_insns);
1393 if (++slot_number == num_slots)
1394 break;
1396 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1399 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
1400 mark_referenced_resources (trial, &needed, true);
1403 /* See if we stopped on a filled insn. If we did, try to see if its
1404 delay slots match. */
1405 if (slot_number != num_slots
1406 && trial && NONJUMP_INSN_P (trial)
1407 && GET_CODE (PATTERN (trial)) == SEQUENCE
1408 && !(JUMP_P (XVECEXP (PATTERN (trial), 0, 0))
1409 && INSN_ANNULLED_BRANCH_P (XVECEXP (PATTERN (trial), 0, 0))))
1411 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (trial));
1412 rtx filled_insn = XVECEXP (pat, 0, 0);
1414 /* Account for resources set/needed by the filled insn. */
1415 mark_set_resources (filled_insn, &set, 0, MARK_SRC_DEST_CALL);
1416 mark_referenced_resources (filled_insn, &needed, true);
1418 for (i = 1; i < pat->len (); i++)
1420 rtx_insn *dtrial = pat->insn (i);
1422 if (! insn_references_resource_p (dtrial, &set, true)
1423 && ! insn_sets_resource_p (dtrial, &set, true)
1424 && ! insn_sets_resource_p (dtrial, &needed, true)
1425 #ifdef HAVE_cc0
1426 && ! sets_cc0_p (PATTERN (dtrial))
1427 #endif
1428 && rtx_equal_p (PATTERN (next_to_match), PATTERN (dtrial))
1429 && eligible_for_delay (delay_insn, slot_number - 1, dtrial, flags))
1431 if (! annul_p)
1433 rtx_insn *new_rtx;
1435 update_block (dtrial, thread);
1436 new_rtx = delete_from_delay_slot (dtrial);
1437 if (thread->deleted ())
1438 thread = new_rtx;
1439 INSN_FROM_TARGET_P (next_to_match) = 0;
1441 else
1442 merged_insns = gen_rtx_INSN_LIST (SImode, dtrial,
1443 merged_insns);
1445 if (++slot_number == num_slots)
1446 break;
1448 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1450 else
1452 /* Keep track of the set/referenced resources for the delay
1453 slots of any trial insns we encounter. */
1454 mark_set_resources (dtrial, &set, 0, MARK_SRC_DEST_CALL);
1455 mark_referenced_resources (dtrial, &needed, true);
1460 /* If all insns in the delay slot have been matched and we were previously
1461 annulling the branch, we need not any more. In that case delete all the
1462 merged insns. Also clear the INSN_FROM_TARGET_P bit of each insn in
1463 the delay list so that we know that it isn't only being used at the
1464 target. */
1465 if (slot_number == num_slots && annul_p)
1467 for (; merged_insns; merged_insns = merged_insns->next ())
1469 if (GET_MODE (merged_insns) == SImode)
1471 rtx_insn *new_rtx;
1473 update_block (merged_insns->insn (), thread);
1474 new_rtx = delete_from_delay_slot (merged_insns->insn ());
1475 if (thread->deleted ())
1476 thread = new_rtx;
1478 else
1480 update_block (merged_insns->insn (), thread);
1481 delete_related_insns (merged_insns->insn ());
1485 INSN_ANNULLED_BRANCH_P (delay_insn) = 0;
1487 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1488 INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i)) = 0;
1492 /* See if INSN is redundant with an insn in front of TARGET. Often this
1493 is called when INSN is a candidate for a delay slot of TARGET.
1494 DELAY_LIST are insns that will be placed in delay slots of TARGET in front
1495 of INSN. Often INSN will be redundant with an insn in a delay slot of
1496 some previous insn. This happens when we have a series of branches to the
1497 same label; in that case the first insn at the target might want to go
1498 into each of the delay slots.
1500 If we are not careful, this routine can take up a significant fraction
1501 of the total compilation time (4%), but only wins rarely. Hence we
1502 speed this routine up by making two passes. The first pass goes back
1503 until it hits a label and sees if it finds an insn with an identical
1504 pattern. Only in this (relatively rare) event does it check for
1505 data conflicts.
1507 We do not split insns we encounter. This could cause us not to find a
1508 redundant insn, but the cost of splitting seems greater than the possible
1509 gain in rare cases. */
1511 static rtx
1512 redundant_insn (rtx insn, rtx_insn *target, rtx delay_list)
1514 rtx target_main = target;
1515 rtx ipat = PATTERN (insn);
1516 rtx_insn *trial;
1517 rtx pat;
1518 struct resources needed, set;
1519 int i;
1520 unsigned insns_to_search;
1522 /* If INSN has any REG_UNUSED notes, it can't match anything since we
1523 are allowed to not actually assign to such a register. */
1524 if (find_reg_note (insn, REG_UNUSED, NULL_RTX) != 0)
1525 return 0;
1527 /* Scan backwards looking for a match. */
1528 for (trial = PREV_INSN (target),
1529 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1530 trial && insns_to_search > 0;
1531 trial = PREV_INSN (trial))
1533 /* (use (insn))s can come immediately after a barrier if the
1534 label that used to precede them has been deleted as dead.
1535 See delete_related_insns. */
1536 if (LABEL_P (trial) || BARRIER_P (trial))
1537 return 0;
1539 if (!INSN_P (trial))
1540 continue;
1541 --insns_to_search;
1543 pat = PATTERN (trial);
1544 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1545 continue;
1547 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1549 /* Stop for a CALL and its delay slots because it is difficult to
1550 track its resource needs correctly. */
1551 if (CALL_P (seq->element (0)))
1552 return 0;
1554 /* Stop for an INSN or JUMP_INSN with delayed effects and its delay
1555 slots because it is difficult to track its resource needs
1556 correctly. */
1558 #ifdef INSN_SETS_ARE_DELAYED
1559 if (INSN_SETS_ARE_DELAYED (seq->insn (0)))
1560 return 0;
1561 #endif
1563 #ifdef INSN_REFERENCES_ARE_DELAYED
1564 if (INSN_REFERENCES_ARE_DELAYED (seq->insn (0)))
1565 return 0;
1566 #endif
1568 /* See if any of the insns in the delay slot match, updating
1569 resource requirements as we go. */
1570 for (i = seq->len () - 1; i > 0; i--)
1571 if (GET_CODE (seq->element (i)) == GET_CODE (insn)
1572 && rtx_equal_p (PATTERN (seq->element (i)), ipat)
1573 && ! find_reg_note (seq->element (i), REG_UNUSED, NULL_RTX))
1574 break;
1576 /* If found a match, exit this loop early. */
1577 if (i > 0)
1578 break;
1581 else if (GET_CODE (trial) == GET_CODE (insn) && rtx_equal_p (pat, ipat)
1582 && ! find_reg_note (trial, REG_UNUSED, NULL_RTX))
1583 break;
1586 /* If we didn't find an insn that matches, return 0. */
1587 if (trial == 0)
1588 return 0;
1590 /* See what resources this insn sets and needs. If they overlap, or
1591 if this insn references CC0, it can't be redundant. */
1593 CLEAR_RESOURCE (&needed);
1594 CLEAR_RESOURCE (&set);
1595 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
1596 mark_referenced_resources (insn, &needed, true);
1598 /* If TARGET is a SEQUENCE, get the main insn. */
1599 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1600 target_main = XVECEXP (PATTERN (target), 0, 0);
1602 if (resource_conflicts_p (&needed, &set)
1603 #ifdef HAVE_cc0
1604 || reg_mentioned_p (cc0_rtx, ipat)
1605 #endif
1606 /* The insn requiring the delay may not set anything needed or set by
1607 INSN. */
1608 || insn_sets_resource_p (target_main, &needed, true)
1609 || insn_sets_resource_p (target_main, &set, true))
1610 return 0;
1612 /* Insns we pass may not set either NEEDED or SET, so merge them for
1613 simpler tests. */
1614 needed.memory |= set.memory;
1615 IOR_HARD_REG_SET (needed.regs, set.regs);
1617 /* This insn isn't redundant if it conflicts with an insn that either is
1618 or will be in a delay slot of TARGET. */
1620 while (delay_list)
1622 if (insn_sets_resource_p (XEXP (delay_list, 0), &needed, true))
1623 return 0;
1624 delay_list = XEXP (delay_list, 1);
1627 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1628 for (i = 1; i < XVECLEN (PATTERN (target), 0); i++)
1629 if (insn_sets_resource_p (XVECEXP (PATTERN (target), 0, i), &needed,
1630 true))
1631 return 0;
1633 /* Scan backwards until we reach a label or an insn that uses something
1634 INSN sets or sets something insn uses or sets. */
1636 for (trial = PREV_INSN (target),
1637 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1638 trial && !LABEL_P (trial) && insns_to_search > 0;
1639 trial = PREV_INSN (trial))
1641 if (!INSN_P (trial))
1642 continue;
1643 --insns_to_search;
1645 pat = PATTERN (trial);
1646 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1647 continue;
1649 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1651 bool annul_p = false;
1652 rtx_insn *control = seq->insn (0);
1654 /* If this is a CALL_INSN and its delay slots, it is hard to track
1655 the resource needs properly, so give up. */
1656 if (CALL_P (control))
1657 return 0;
1659 /* If this is an INSN or JUMP_INSN with delayed effects, it
1660 is hard to track the resource needs properly, so give up. */
1662 #ifdef INSN_SETS_ARE_DELAYED
1663 if (INSN_SETS_ARE_DELAYED (control))
1664 return 0;
1665 #endif
1667 #ifdef INSN_REFERENCES_ARE_DELAYED
1668 if (INSN_REFERENCES_ARE_DELAYED (control))
1669 return 0;
1670 #endif
1672 if (JUMP_P (control))
1673 annul_p = INSN_ANNULLED_BRANCH_P (control);
1675 /* See if any of the insns in the delay slot match, updating
1676 resource requirements as we go. */
1677 for (i = seq->len () - 1; i > 0; i--)
1679 rtx candidate = seq->element (i);
1681 /* If an insn will be annulled if the branch is false, it isn't
1682 considered as a possible duplicate insn. */
1683 if (rtx_equal_p (PATTERN (candidate), ipat)
1684 && ! (annul_p && INSN_FROM_TARGET_P (candidate)))
1686 /* Show that this insn will be used in the sequel. */
1687 INSN_FROM_TARGET_P (candidate) = 0;
1688 return candidate;
1691 /* Unless this is an annulled insn from the target of a branch,
1692 we must stop if it sets anything needed or set by INSN. */
1693 if ((!annul_p || !INSN_FROM_TARGET_P (candidate))
1694 && insn_sets_resource_p (candidate, &needed, true))
1695 return 0;
1698 /* If the insn requiring the delay slot conflicts with INSN, we
1699 must stop. */
1700 if (insn_sets_resource_p (control, &needed, true))
1701 return 0;
1703 else
1705 /* See if TRIAL is the same as INSN. */
1706 pat = PATTERN (trial);
1707 if (rtx_equal_p (pat, ipat))
1708 return trial;
1710 /* Can't go any further if TRIAL conflicts with INSN. */
1711 if (insn_sets_resource_p (trial, &needed, true))
1712 return 0;
1716 return 0;
1719 /* Return 1 if THREAD can only be executed in one way. If LABEL is nonzero,
1720 it is the target of the branch insn being scanned. If ALLOW_FALLTHROUGH
1721 is nonzero, we are allowed to fall into this thread; otherwise, we are
1722 not.
1724 If LABEL is used more than one or we pass a label other than LABEL before
1725 finding an active insn, we do not own this thread. */
1727 static int
1728 own_thread_p (rtx thread, rtx label, int allow_fallthrough)
1730 rtx_insn *active_insn;
1731 rtx_insn *insn;
1733 /* We don't own the function end. */
1734 if (thread == 0 || ANY_RETURN_P (thread))
1735 return 0;
1737 /* We have a non-NULL insn. */
1738 rtx_insn *thread_insn = as_a <rtx_insn *> (thread);
1740 /* Get the first active insn, or THREAD_INSN, if it is an active insn. */
1741 active_insn = next_active_insn (PREV_INSN (thread_insn));
1743 for (insn = thread_insn; insn != active_insn; insn = NEXT_INSN (insn))
1744 if (LABEL_P (insn)
1745 && (insn != label || LABEL_NUSES (insn) != 1))
1746 return 0;
1748 if (allow_fallthrough)
1749 return 1;
1751 /* Ensure that we reach a BARRIER before any insn or label. */
1752 for (insn = prev_nonnote_insn (thread_insn);
1753 insn == 0 || !BARRIER_P (insn);
1754 insn = prev_nonnote_insn (insn))
1755 if (insn == 0
1756 || LABEL_P (insn)
1757 || (NONJUMP_INSN_P (insn)
1758 && GET_CODE (PATTERN (insn)) != USE
1759 && GET_CODE (PATTERN (insn)) != CLOBBER))
1760 return 0;
1762 return 1;
1765 /* Called when INSN is being moved from a location near the target of a jump.
1766 We leave a marker of the form (use (INSN)) immediately in front
1767 of WHERE for mark_target_live_regs. These markers will be deleted when
1768 reorg finishes.
1770 We used to try to update the live status of registers if WHERE is at
1771 the start of a basic block, but that can't work since we may remove a
1772 BARRIER in relax_delay_slots. */
1774 static void
1775 update_block (rtx_insn *insn, rtx where)
1777 /* Ignore if this was in a delay slot and it came from the target of
1778 a branch. */
1779 if (INSN_FROM_TARGET_P (insn))
1780 return;
1782 emit_insn_before (gen_rtx_USE (VOIDmode, insn), where);
1784 /* INSN might be making a value live in a block where it didn't use to
1785 be. So recompute liveness information for this block. */
1787 incr_ticks_for_insn (insn);
1790 /* Similar to REDIRECT_JUMP except that we update the BB_TICKS entry for
1791 the basic block containing the jump. */
1793 static int
1794 reorg_redirect_jump (rtx_insn *jump, rtx nlabel)
1796 incr_ticks_for_insn (jump);
1797 return redirect_jump (jump, nlabel, 1);
1800 /* Called when INSN is being moved forward into a delay slot of DELAYED_INSN.
1801 We check every instruction between INSN and DELAYED_INSN for REG_DEAD notes
1802 that reference values used in INSN. If we find one, then we move the
1803 REG_DEAD note to INSN.
1805 This is needed to handle the case where a later insn (after INSN) has a
1806 REG_DEAD note for a register used by INSN, and this later insn subsequently
1807 gets moved before a CODE_LABEL because it is a redundant insn. In this
1808 case, mark_target_live_regs may be confused into thinking the register
1809 is dead because it sees a REG_DEAD note immediately before a CODE_LABEL. */
1811 static void
1812 update_reg_dead_notes (rtx insn, rtx delayed_insn)
1814 rtx p, link, next;
1816 for (p = next_nonnote_insn (insn); p != delayed_insn;
1817 p = next_nonnote_insn (p))
1818 for (link = REG_NOTES (p); link; link = next)
1820 next = XEXP (link, 1);
1822 if (REG_NOTE_KIND (link) != REG_DEAD
1823 || !REG_P (XEXP (link, 0)))
1824 continue;
1826 if (reg_referenced_p (XEXP (link, 0), PATTERN (insn)))
1828 /* Move the REG_DEAD note from P to INSN. */
1829 remove_note (p, link);
1830 XEXP (link, 1) = REG_NOTES (insn);
1831 REG_NOTES (insn) = link;
1836 /* Called when an insn redundant with start_insn is deleted. If there
1837 is a REG_DEAD note for the target of start_insn between start_insn
1838 and stop_insn, then the REG_DEAD note needs to be deleted since the
1839 value no longer dies there.
1841 If the REG_DEAD note isn't deleted, then mark_target_live_regs may be
1842 confused into thinking the register is dead. */
1844 static void
1845 fix_reg_dead_note (rtx start_insn, rtx stop_insn)
1847 rtx p, link, next;
1849 for (p = next_nonnote_insn (start_insn); p != stop_insn;
1850 p = next_nonnote_insn (p))
1851 for (link = REG_NOTES (p); link; link = next)
1853 next = XEXP (link, 1);
1855 if (REG_NOTE_KIND (link) != REG_DEAD
1856 || !REG_P (XEXP (link, 0)))
1857 continue;
1859 if (reg_set_p (XEXP (link, 0), PATTERN (start_insn)))
1861 remove_note (p, link);
1862 return;
1867 /* Delete any REG_UNUSED notes that exist on INSN but not on REDUNDANT_INSN.
1869 This handles the case of udivmodXi4 instructions which optimize their
1870 output depending on whether any REG_UNUSED notes are present.
1871 we must make sure that INSN calculates as many results as REDUNDANT_INSN
1872 does. */
1874 static void
1875 update_reg_unused_notes (rtx insn, rtx redundant_insn)
1877 rtx link, next;
1879 for (link = REG_NOTES (insn); link; link = next)
1881 next = XEXP (link, 1);
1883 if (REG_NOTE_KIND (link) != REG_UNUSED
1884 || !REG_P (XEXP (link, 0)))
1885 continue;
1887 if (! find_regno_note (redundant_insn, REG_UNUSED,
1888 REGNO (XEXP (link, 0))))
1889 remove_note (insn, link);
1893 static vec <rtx> sibling_labels;
1895 /* Return the label before INSN, or put a new label there. If SIBLING is
1896 non-zero, it is another label associated with the new label (if any),
1897 typically the former target of the jump that will be redirected to
1898 the new label. */
1900 static rtx_insn *
1901 get_label_before (rtx_insn *insn, rtx sibling)
1903 rtx_insn *label;
1905 /* Find an existing label at this point
1906 or make a new one if there is none. */
1907 label = prev_nonnote_insn (insn);
1909 if (label == 0 || !LABEL_P (label))
1911 rtx_insn *prev = PREV_INSN (insn);
1913 label = gen_label_rtx ();
1914 emit_label_after (label, prev);
1915 LABEL_NUSES (label) = 0;
1916 if (sibling)
1918 sibling_labels.safe_push (label);
1919 sibling_labels.safe_push (sibling);
1922 return label;
1925 /* Scan a function looking for insns that need a delay slot and find insns to
1926 put into the delay slot.
1928 NON_JUMPS_P is nonzero if we are to only try to fill non-jump insns (such
1929 as calls). We do these first since we don't want jump insns (that are
1930 easier to fill) to get the only insns that could be used for non-jump insns.
1931 When it is zero, only try to fill JUMP_INSNs.
1933 When slots are filled in this manner, the insns (including the
1934 delay_insn) are put together in a SEQUENCE rtx. In this fashion,
1935 it is possible to tell whether a delay slot has really been filled
1936 or not. `final' knows how to deal with this, by communicating
1937 through FINAL_SEQUENCE. */
1939 static void
1940 fill_simple_delay_slots (int non_jumps_p)
1942 rtx_insn *insn, *trial, *next_trial;
1943 rtx pat;
1944 int i;
1945 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
1946 struct resources needed, set;
1947 int slots_to_fill, slots_filled;
1948 rtx_insn_list *delay_list;
1950 for (i = 0; i < num_unfilled_slots; i++)
1952 int flags;
1953 /* Get the next insn to fill. If it has already had any slots assigned,
1954 we can't do anything with it. Maybe we'll improve this later. */
1956 insn = unfilled_slots_base[i];
1957 if (insn == 0
1958 || insn->deleted ()
1959 || (NONJUMP_INSN_P (insn)
1960 && GET_CODE (PATTERN (insn)) == SEQUENCE)
1961 || (JUMP_P (insn) && non_jumps_p)
1962 || (!JUMP_P (insn) && ! non_jumps_p))
1963 continue;
1965 /* It may have been that this insn used to need delay slots, but
1966 now doesn't; ignore in that case. This can happen, for example,
1967 on the HP PA RISC, where the number of delay slots depends on
1968 what insns are nearby. */
1969 slots_to_fill = num_delay_slots (insn);
1971 /* Some machine description have defined instructions to have
1972 delay slots only in certain circumstances which may depend on
1973 nearby insns (which change due to reorg's actions).
1975 For example, the PA port normally has delay slots for unconditional
1976 jumps.
1978 However, the PA port claims such jumps do not have a delay slot
1979 if they are immediate successors of certain CALL_INSNs. This
1980 allows the port to favor filling the delay slot of the call with
1981 the unconditional jump. */
1982 if (slots_to_fill == 0)
1983 continue;
1985 /* This insn needs, or can use, some delay slots. SLOTS_TO_FILL
1986 says how many. After initialization, first try optimizing
1988 call _foo call _foo
1989 nop add %o7,.-L1,%o7
1990 b,a L1
1993 If this case applies, the delay slot of the call is filled with
1994 the unconditional jump. This is done first to avoid having the
1995 delay slot of the call filled in the backward scan. Also, since
1996 the unconditional jump is likely to also have a delay slot, that
1997 insn must exist when it is subsequently scanned.
1999 This is tried on each insn with delay slots as some machines
2000 have insns which perform calls, but are not represented as
2001 CALL_INSNs. */
2003 slots_filled = 0;
2004 delay_list = 0;
2006 if (JUMP_P (insn))
2007 flags = get_jump_flags (insn, JUMP_LABEL (insn));
2008 else
2009 flags = get_jump_flags (insn, NULL_RTX);
2011 if ((trial = next_active_insn (insn))
2012 && JUMP_P (trial)
2013 && simplejump_p (trial)
2014 && eligible_for_delay (insn, slots_filled, trial, flags)
2015 && no_labels_between_p (insn, trial)
2016 && ! can_throw_internal (trial))
2018 rtx_insn **tmp;
2019 slots_filled++;
2020 delay_list = add_to_delay_list (trial, delay_list);
2022 /* TRIAL may have had its delay slot filled, then unfilled. When
2023 the delay slot is unfilled, TRIAL is placed back on the unfilled
2024 slots obstack. Unfortunately, it is placed on the end of the
2025 obstack, not in its original location. Therefore, we must search
2026 from entry i + 1 to the end of the unfilled slots obstack to
2027 try and find TRIAL. */
2028 tmp = &unfilled_slots_base[i + 1];
2029 while (*tmp != trial && tmp != unfilled_slots_next)
2030 tmp++;
2032 /* Remove the unconditional jump from consideration for delay slot
2033 filling and unthread it. */
2034 if (*tmp == trial)
2035 *tmp = 0;
2037 rtx_insn *next = NEXT_INSN (trial);
2038 rtx_insn *prev = PREV_INSN (trial);
2039 if (prev)
2040 SET_NEXT_INSN (prev) = next;
2041 if (next)
2042 SET_PREV_INSN (next) = prev;
2046 /* Now, scan backwards from the insn to search for a potential
2047 delay-slot candidate. Stop searching when a label or jump is hit.
2049 For each candidate, if it is to go into the delay slot (moved
2050 forward in execution sequence), it must not need or set any resources
2051 that were set by later insns and must not set any resources that
2052 are needed for those insns.
2054 The delay slot insn itself sets resources unless it is a call
2055 (in which case the called routine, not the insn itself, is doing
2056 the setting). */
2058 if (slots_filled < slots_to_fill)
2060 CLEAR_RESOURCE (&needed);
2061 CLEAR_RESOURCE (&set);
2062 mark_set_resources (insn, &set, 0, MARK_SRC_DEST);
2063 mark_referenced_resources (insn, &needed, false);
2065 for (trial = prev_nonnote_insn (insn); ! stop_search_p (trial, 1);
2066 trial = next_trial)
2068 next_trial = prev_nonnote_insn (trial);
2070 /* This must be an INSN or CALL_INSN. */
2071 pat = PATTERN (trial);
2073 /* Stand-alone USE and CLOBBER are just for flow. */
2074 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2075 continue;
2077 /* Check for resource conflict first, to avoid unnecessary
2078 splitting. */
2079 if (! insn_references_resource_p (trial, &set, true)
2080 && ! insn_sets_resource_p (trial, &set, true)
2081 && ! insn_sets_resource_p (trial, &needed, true)
2082 #ifdef HAVE_cc0
2083 /* Can't separate set of cc0 from its use. */
2084 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat))
2085 #endif
2086 && ! can_throw_internal (trial))
2088 trial = try_split (pat, trial, 1);
2089 next_trial = prev_nonnote_insn (trial);
2090 if (eligible_for_delay (insn, slots_filled, trial, flags))
2092 /* In this case, we are searching backward, so if we
2093 find insns to put on the delay list, we want
2094 to put them at the head, rather than the
2095 tail, of the list. */
2097 update_reg_dead_notes (trial, insn);
2098 delay_list = gen_rtx_INSN_LIST (VOIDmode,
2099 trial, delay_list);
2100 update_block (trial, trial);
2101 delete_related_insns (trial);
2102 if (slots_to_fill == ++slots_filled)
2103 break;
2104 continue;
2108 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2109 mark_referenced_resources (trial, &needed, true);
2113 /* If all needed slots haven't been filled, we come here. */
2115 /* Try to optimize case of jumping around a single insn. */
2116 #if defined(ANNUL_IFFALSE_SLOTS) || defined(ANNUL_IFTRUE_SLOTS)
2117 if (slots_filled != slots_to_fill
2118 && delay_list == 0
2119 && JUMP_P (insn)
2120 && (condjump_p (insn) || condjump_in_parallel_p (insn))
2121 && !ANY_RETURN_P (JUMP_LABEL (insn)))
2123 delay_list = optimize_skip (insn);
2124 if (delay_list)
2125 slots_filled += 1;
2127 #endif
2129 /* Try to get insns from beyond the insn needing the delay slot.
2130 These insns can neither set or reference resources set in insns being
2131 skipped, cannot set resources in the insn being skipped, and, if this
2132 is a CALL_INSN (or a CALL_INSN is passed), cannot trap (because the
2133 call might not return).
2135 There used to be code which continued past the target label if
2136 we saw all uses of the target label. This code did not work,
2137 because it failed to account for some instructions which were
2138 both annulled and marked as from the target. This can happen as a
2139 result of optimize_skip. Since this code was redundant with
2140 fill_eager_delay_slots anyways, it was just deleted. */
2142 if (slots_filled != slots_to_fill
2143 /* If this instruction could throw an exception which is
2144 caught in the same function, then it's not safe to fill
2145 the delay slot with an instruction from beyond this
2146 point. For example, consider:
2148 int i = 2;
2150 try {
2151 f();
2152 i = 3;
2153 } catch (...) {}
2155 return i;
2157 Even though `i' is a local variable, we must be sure not
2158 to put `i = 3' in the delay slot if `f' might throw an
2159 exception.
2161 Presumably, we should also check to see if we could get
2162 back to this function via `setjmp'. */
2163 && ! can_throw_internal (insn)
2164 && !JUMP_P (insn))
2166 int maybe_never = 0;
2167 rtx pat, trial_delay;
2169 CLEAR_RESOURCE (&needed);
2170 CLEAR_RESOURCE (&set);
2171 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
2172 mark_referenced_resources (insn, &needed, true);
2174 if (CALL_P (insn))
2175 maybe_never = 1;
2177 for (trial = next_nonnote_insn (insn); !stop_search_p (trial, 1);
2178 trial = next_trial)
2180 next_trial = next_nonnote_insn (trial);
2182 /* This must be an INSN or CALL_INSN. */
2183 pat = PATTERN (trial);
2185 /* Stand-alone USE and CLOBBER are just for flow. */
2186 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2187 continue;
2189 /* If this already has filled delay slots, get the insn needing
2190 the delay slots. */
2191 if (GET_CODE (pat) == SEQUENCE)
2192 trial_delay = XVECEXP (pat, 0, 0);
2193 else
2194 trial_delay = trial;
2196 /* Stop our search when seeing a jump. */
2197 if (JUMP_P (trial_delay))
2198 break;
2200 /* See if we have a resource problem before we try to split. */
2201 if (GET_CODE (pat) != SEQUENCE
2202 && ! insn_references_resource_p (trial, &set, true)
2203 && ! insn_sets_resource_p (trial, &set, true)
2204 && ! insn_sets_resource_p (trial, &needed, true)
2205 #ifdef HAVE_cc0
2206 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat))
2207 #endif
2208 && ! (maybe_never && may_trap_or_fault_p (pat))
2209 && (trial = try_split (pat, trial, 0))
2210 && eligible_for_delay (insn, slots_filled, trial, flags)
2211 && ! can_throw_internal (trial))
2213 next_trial = next_nonnote_insn (trial);
2214 delay_list = add_to_delay_list (trial, delay_list);
2215 #ifdef HAVE_cc0
2216 if (reg_mentioned_p (cc0_rtx, pat))
2217 link_cc0_insns (trial);
2218 #endif
2219 delete_related_insns (trial);
2220 if (slots_to_fill == ++slots_filled)
2221 break;
2222 continue;
2225 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2226 mark_referenced_resources (trial, &needed, true);
2228 /* Ensure we don't put insns between the setting of cc and the
2229 comparison by moving a setting of cc into an earlier delay
2230 slot since these insns could clobber the condition code. */
2231 set.cc = 1;
2233 /* If this is a call, we might not get here. */
2234 if (CALL_P (trial_delay))
2235 maybe_never = 1;
2238 /* If there are slots left to fill and our search was stopped by an
2239 unconditional branch, try the insn at the branch target. We can
2240 redirect the branch if it works.
2242 Don't do this if the insn at the branch target is a branch. */
2243 if (slots_to_fill != slots_filled
2244 && trial
2245 && jump_to_label_p (trial)
2246 && simplejump_p (trial)
2247 && (next_trial = next_active_insn (JUMP_LABEL (trial))) != 0
2248 && ! (NONJUMP_INSN_P (next_trial)
2249 && GET_CODE (PATTERN (next_trial)) == SEQUENCE)
2250 && !JUMP_P (next_trial)
2251 && ! insn_references_resource_p (next_trial, &set, true)
2252 && ! insn_sets_resource_p (next_trial, &set, true)
2253 && ! insn_sets_resource_p (next_trial, &needed, true)
2254 #ifdef HAVE_cc0
2255 && ! reg_mentioned_p (cc0_rtx, PATTERN (next_trial))
2256 #endif
2257 && ! (maybe_never && may_trap_or_fault_p (PATTERN (next_trial)))
2258 && (next_trial = try_split (PATTERN (next_trial), next_trial, 0))
2259 && eligible_for_delay (insn, slots_filled, next_trial, flags)
2260 && ! can_throw_internal (trial))
2262 /* See comment in relax_delay_slots about necessity of using
2263 next_real_insn here. */
2264 rtx_insn *new_label = next_real_insn (next_trial);
2266 if (new_label != 0)
2267 new_label = get_label_before (new_label, JUMP_LABEL (trial));
2268 else
2269 new_label = find_end_label (simple_return_rtx);
2271 if (new_label)
2273 delay_list
2274 = add_to_delay_list (copy_delay_slot_insn (next_trial),
2275 delay_list);
2276 slots_filled++;
2277 reorg_redirect_jump (trial, new_label);
2282 /* If this is an unconditional jump, then try to get insns from the
2283 target of the jump. */
2284 if (JUMP_P (insn)
2285 && simplejump_p (insn)
2286 && slots_filled != slots_to_fill)
2287 delay_list
2288 = fill_slots_from_thread (insn, const_true_rtx,
2289 next_active_insn (JUMP_LABEL (insn)),
2290 NULL, 1, 1,
2291 own_thread_p (JUMP_LABEL (insn),
2292 JUMP_LABEL (insn), 0),
2293 slots_to_fill, &slots_filled,
2294 delay_list);
2296 if (delay_list)
2297 unfilled_slots_base[i]
2298 = emit_delay_sequence (insn, delay_list, slots_filled);
2300 if (slots_to_fill == slots_filled)
2301 unfilled_slots_base[i] = 0;
2303 note_delay_statistics (slots_filled, 0);
2307 /* Follow any unconditional jump at LABEL, for the purpose of redirecting JUMP;
2308 return the ultimate label reached by any such chain of jumps.
2309 Return a suitable return rtx if the chain ultimately leads to a
2310 return instruction.
2311 If LABEL is not followed by a jump, return LABEL.
2312 If the chain loops or we can't find end, return LABEL,
2313 since that tells caller to avoid changing the insn.
2314 If the returned label is obtained by following a crossing jump,
2315 set *CROSSING to true, otherwise set it to false. */
2317 static rtx
2318 follow_jumps (rtx label, rtx_insn *jump, bool *crossing)
2320 rtx_insn *insn;
2321 rtx_insn *next;
2322 int depth;
2324 *crossing = false;
2325 if (ANY_RETURN_P (label))
2326 return label;
2328 rtx_insn *value = as_a <rtx_insn *> (label);
2330 for (depth = 0;
2331 (depth < 10
2332 && (insn = next_active_insn (value)) != 0
2333 && JUMP_P (insn)
2334 && JUMP_LABEL (insn) != NULL_RTX
2335 && ((any_uncondjump_p (insn) && onlyjump_p (insn))
2336 || ANY_RETURN_P (PATTERN (insn)))
2337 && (next = NEXT_INSN (insn))
2338 && BARRIER_P (next));
2339 depth++)
2341 rtx this_label_or_return = JUMP_LABEL (insn);
2343 /* If we have found a cycle, make the insn jump to itself. */
2344 if (this_label_or_return == label)
2345 return label;
2347 /* Cannot follow returns and cannot look through tablejumps. */
2348 if (ANY_RETURN_P (this_label_or_return))
2349 return this_label_or_return;
2351 rtx_insn *this_label = as_a <rtx_insn *> (this_label_or_return);
2352 if (NEXT_INSN (this_label)
2353 && JUMP_TABLE_DATA_P (NEXT_INSN (this_label)))
2354 break;
2356 if (!targetm.can_follow_jump (jump, insn))
2357 break;
2358 if (!*crossing)
2359 *crossing = CROSSING_JUMP_P (jump);
2360 value = this_label;
2362 if (depth == 10)
2363 return label;
2364 return value;
2367 /* Try to find insns to place in delay slots.
2369 INSN is the jump needing SLOTS_TO_FILL delay slots. It tests CONDITION
2370 or is an unconditional branch if CONDITION is const_true_rtx.
2371 *PSLOTS_FILLED is updated with the number of slots that we have filled.
2373 THREAD is a flow-of-control, either the insns to be executed if the
2374 branch is true or if the branch is false, THREAD_IF_TRUE says which.
2376 OPPOSITE_THREAD is the thread in the opposite direction. It is used
2377 to see if any potential delay slot insns set things needed there.
2379 LIKELY is nonzero if it is extremely likely that the branch will be
2380 taken and THREAD_IF_TRUE is set. This is used for the branch at the
2381 end of a loop back up to the top.
2383 OWN_THREAD and OWN_OPPOSITE_THREAD are true if we are the only user of the
2384 thread. I.e., it is the fallthrough code of our jump or the target of the
2385 jump when we are the only jump going there.
2387 If OWN_THREAD is false, it must be the "true" thread of a jump. In that
2388 case, we can only take insns from the head of the thread for our delay
2389 slot. We then adjust the jump to point after the insns we have taken. */
2391 static rtx_insn_list *
2392 fill_slots_from_thread (rtx_insn *insn, rtx condition, rtx thread_or_return,
2393 rtx opposite_thread, int likely,
2394 int thread_if_true,
2395 int own_thread, int slots_to_fill,
2396 int *pslots_filled, rtx_insn_list *delay_list)
2398 rtx new_thread;
2399 struct resources opposite_needed, set, needed;
2400 rtx_insn *trial;
2401 int lose = 0;
2402 int must_annul = 0;
2403 int flags;
2405 /* Validate our arguments. */
2406 gcc_assert (condition != const_true_rtx || thread_if_true);
2407 gcc_assert (own_thread || thread_if_true);
2409 flags = get_jump_flags (insn, JUMP_LABEL (insn));
2411 /* If our thread is the end of subroutine, we can't get any delay
2412 insns from that. */
2413 if (thread_or_return == NULL_RTX || ANY_RETURN_P (thread_or_return))
2414 return delay_list;
2416 rtx_insn *thread = as_a <rtx_insn *> (thread_or_return);
2418 /* If this is an unconditional branch, nothing is needed at the
2419 opposite thread. Otherwise, compute what is needed there. */
2420 if (condition == const_true_rtx)
2421 CLEAR_RESOURCE (&opposite_needed);
2422 else
2423 mark_target_live_regs (get_insns (), opposite_thread, &opposite_needed);
2425 /* If the insn at THREAD can be split, do it here to avoid having to
2426 update THREAD and NEW_THREAD if it is done in the loop below. Also
2427 initialize NEW_THREAD. */
2429 new_thread = thread = try_split (PATTERN (thread), thread, 0);
2431 /* Scan insns at THREAD. We are looking for an insn that can be removed
2432 from THREAD (it neither sets nor references resources that were set
2433 ahead of it and it doesn't set anything needs by the insns ahead of
2434 it) and that either can be placed in an annulling insn or aren't
2435 needed at OPPOSITE_THREAD. */
2437 CLEAR_RESOURCE (&needed);
2438 CLEAR_RESOURCE (&set);
2440 /* If we do not own this thread, we must stop as soon as we find
2441 something that we can't put in a delay slot, since all we can do
2442 is branch into THREAD at a later point. Therefore, labels stop
2443 the search if this is not the `true' thread. */
2445 for (trial = thread;
2446 ! stop_search_p (trial, ! thread_if_true) && (! lose || own_thread);
2447 trial = next_nonnote_insn (trial))
2449 rtx pat, old_trial;
2451 /* If we have passed a label, we no longer own this thread. */
2452 if (LABEL_P (trial))
2454 own_thread = 0;
2455 continue;
2458 pat = PATTERN (trial);
2459 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2460 continue;
2462 /* If TRIAL conflicts with the insns ahead of it, we lose. Also,
2463 don't separate or copy insns that set and use CC0. */
2464 if (! insn_references_resource_p (trial, &set, true)
2465 && ! insn_sets_resource_p (trial, &set, true)
2466 && ! insn_sets_resource_p (trial, &needed, true)
2467 #ifdef HAVE_cc0
2468 && ! (reg_mentioned_p (cc0_rtx, pat)
2469 && (! own_thread || ! sets_cc0_p (pat)))
2470 #endif
2471 && ! can_throw_internal (trial))
2473 rtx prior_insn;
2475 /* If TRIAL is redundant with some insn before INSN, we don't
2476 actually need to add it to the delay list; we can merely pretend
2477 we did. */
2478 if ((prior_insn = redundant_insn (trial, insn, delay_list)))
2480 fix_reg_dead_note (prior_insn, insn);
2481 if (own_thread)
2483 update_block (trial, thread);
2484 if (trial == thread)
2486 thread = next_active_insn (thread);
2487 if (new_thread == trial)
2488 new_thread = thread;
2491 delete_related_insns (trial);
2493 else
2495 update_reg_unused_notes (prior_insn, trial);
2496 new_thread = next_active_insn (trial);
2499 continue;
2502 /* There are two ways we can win: If TRIAL doesn't set anything
2503 needed at the opposite thread and can't trap, or if it can
2504 go into an annulled delay slot. */
2505 if (!must_annul
2506 && (condition == const_true_rtx
2507 || (! insn_sets_resource_p (trial, &opposite_needed, true)
2508 && ! may_trap_or_fault_p (pat)
2509 && ! RTX_FRAME_RELATED_P (trial))))
2511 old_trial = trial;
2512 trial = try_split (pat, trial, 0);
2513 if (new_thread == old_trial)
2514 new_thread = trial;
2515 if (thread == old_trial)
2516 thread = trial;
2517 pat = PATTERN (trial);
2518 if (eligible_for_delay (insn, *pslots_filled, trial, flags))
2519 goto winner;
2521 else if (0
2522 #ifdef ANNUL_IFTRUE_SLOTS
2523 || ! thread_if_true
2524 #endif
2525 #ifdef ANNUL_IFFALSE_SLOTS
2526 || thread_if_true
2527 #endif
2530 old_trial = trial;
2531 trial = try_split (pat, trial, 0);
2532 if (new_thread == old_trial)
2533 new_thread = trial;
2534 if (thread == old_trial)
2535 thread = trial;
2536 pat = PATTERN (trial);
2537 if ((must_annul || delay_list == NULL) && (thread_if_true
2538 ? check_annul_list_true_false (0, delay_list)
2539 && eligible_for_annul_false (insn, *pslots_filled, trial, flags)
2540 : check_annul_list_true_false (1, delay_list)
2541 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
2543 rtx_insn *temp;
2545 must_annul = 1;
2546 winner:
2548 #ifdef HAVE_cc0
2549 if (reg_mentioned_p (cc0_rtx, pat))
2550 link_cc0_insns (trial);
2551 #endif
2553 /* If we own this thread, delete the insn. If this is the
2554 destination of a branch, show that a basic block status
2555 may have been updated. In any case, mark the new
2556 starting point of this thread. */
2557 if (own_thread)
2559 rtx note;
2561 update_block (trial, thread);
2562 if (trial == thread)
2564 thread = next_active_insn (thread);
2565 if (new_thread == trial)
2566 new_thread = thread;
2569 /* We are moving this insn, not deleting it. We must
2570 temporarily increment the use count on any referenced
2571 label lest it be deleted by delete_related_insns. */
2572 for (note = REG_NOTES (trial);
2573 note != NULL_RTX;
2574 note = XEXP (note, 1))
2575 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2576 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2578 /* REG_LABEL_OPERAND could be
2579 NOTE_INSN_DELETED_LABEL too. */
2580 if (LABEL_P (XEXP (note, 0)))
2581 LABEL_NUSES (XEXP (note, 0))++;
2582 else
2583 gcc_assert (REG_NOTE_KIND (note)
2584 == REG_LABEL_OPERAND);
2586 if (jump_to_label_p (trial))
2587 LABEL_NUSES (JUMP_LABEL (trial))++;
2589 delete_related_insns (trial);
2591 for (note = REG_NOTES (trial);
2592 note != NULL_RTX;
2593 note = XEXP (note, 1))
2594 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2595 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2597 /* REG_LABEL_OPERAND could be
2598 NOTE_INSN_DELETED_LABEL too. */
2599 if (LABEL_P (XEXP (note, 0)))
2600 LABEL_NUSES (XEXP (note, 0))--;
2601 else
2602 gcc_assert (REG_NOTE_KIND (note)
2603 == REG_LABEL_OPERAND);
2605 if (jump_to_label_p (trial))
2606 LABEL_NUSES (JUMP_LABEL (trial))--;
2608 else
2609 new_thread = next_active_insn (trial);
2611 temp = own_thread ? trial : copy_delay_slot_insn (trial);
2612 if (thread_if_true)
2613 INSN_FROM_TARGET_P (temp) = 1;
2615 delay_list = add_to_delay_list (temp, delay_list);
2617 if (slots_to_fill == ++(*pslots_filled))
2619 /* Even though we have filled all the slots, we
2620 may be branching to a location that has a
2621 redundant insn. Skip any if so. */
2622 while (new_thread && ! own_thread
2623 && ! insn_sets_resource_p (new_thread, &set, true)
2624 && ! insn_sets_resource_p (new_thread, &needed,
2625 true)
2626 && ! insn_references_resource_p (new_thread,
2627 &set, true)
2628 && (prior_insn
2629 = redundant_insn (new_thread, insn,
2630 delay_list)))
2632 /* We know we do not own the thread, so no need
2633 to call update_block and delete_insn. */
2634 fix_reg_dead_note (prior_insn, insn);
2635 update_reg_unused_notes (prior_insn, new_thread);
2636 new_thread = next_active_insn (new_thread);
2638 break;
2641 continue;
2646 /* This insn can't go into a delay slot. */
2647 lose = 1;
2648 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2649 mark_referenced_resources (trial, &needed, true);
2651 /* Ensure we don't put insns between the setting of cc and the comparison
2652 by moving a setting of cc into an earlier delay slot since these insns
2653 could clobber the condition code. */
2654 set.cc = 1;
2656 /* If this insn is a register-register copy and the next insn has
2657 a use of our destination, change it to use our source. That way,
2658 it will become a candidate for our delay slot the next time
2659 through this loop. This case occurs commonly in loops that
2660 scan a list.
2662 We could check for more complex cases than those tested below,
2663 but it doesn't seem worth it. It might also be a good idea to try
2664 to swap the two insns. That might do better.
2666 We can't do this if the next insn modifies our destination, because
2667 that would make the replacement into the insn invalid. We also can't
2668 do this if it modifies our source, because it might be an earlyclobber
2669 operand. This latter test also prevents updating the contents of
2670 a PRE_INC. We also can't do this if there's overlap of source and
2671 destination. Overlap may happen for larger-than-register-size modes. */
2673 if (NONJUMP_INSN_P (trial) && GET_CODE (pat) == SET
2674 && REG_P (SET_SRC (pat))
2675 && REG_P (SET_DEST (pat))
2676 && !reg_overlap_mentioned_p (SET_DEST (pat), SET_SRC (pat)))
2678 rtx next = next_nonnote_insn (trial);
2680 if (next && NONJUMP_INSN_P (next)
2681 && GET_CODE (PATTERN (next)) != USE
2682 && ! reg_set_p (SET_DEST (pat), next)
2683 && ! reg_set_p (SET_SRC (pat), next)
2684 && reg_referenced_p (SET_DEST (pat), PATTERN (next))
2685 && ! modified_in_p (SET_DEST (pat), next))
2686 validate_replace_rtx (SET_DEST (pat), SET_SRC (pat), next);
2690 /* If we stopped on a branch insn that has delay slots, see if we can
2691 steal some of the insns in those slots. */
2692 if (trial && NONJUMP_INSN_P (trial)
2693 && GET_CODE (PATTERN (trial)) == SEQUENCE
2694 && JUMP_P (XVECEXP (PATTERN (trial), 0, 0)))
2696 rtx_sequence *sequence = as_a <rtx_sequence *> (PATTERN (trial));
2697 /* If this is the `true' thread, we will want to follow the jump,
2698 so we can only do this if we have taken everything up to here. */
2699 if (thread_if_true && trial == new_thread)
2701 delay_list
2702 = steal_delay_list_from_target (insn, condition, sequence,
2703 delay_list, &set, &needed,
2704 &opposite_needed, slots_to_fill,
2705 pslots_filled, &must_annul,
2706 &new_thread);
2707 /* If we owned the thread and are told that it branched
2708 elsewhere, make sure we own the thread at the new location. */
2709 if (own_thread && trial != new_thread)
2710 own_thread = own_thread_p (new_thread, new_thread, 0);
2712 else if (! thread_if_true)
2713 delay_list
2714 = steal_delay_list_from_fallthrough (insn, condition,
2715 sequence,
2716 delay_list, &set, &needed,
2717 &opposite_needed, slots_to_fill,
2718 pslots_filled, &must_annul);
2721 /* If we haven't found anything for this delay slot and it is very
2722 likely that the branch will be taken, see if the insn at our target
2723 increments or decrements a register with an increment that does not
2724 depend on the destination register. If so, try to place the opposite
2725 arithmetic insn after the jump insn and put the arithmetic insn in the
2726 delay slot. If we can't do this, return. */
2727 if (delay_list == 0 && likely
2728 && new_thread && !ANY_RETURN_P (new_thread)
2729 && NONJUMP_INSN_P (new_thread)
2730 && !RTX_FRAME_RELATED_P (new_thread)
2731 && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
2732 && asm_noperands (PATTERN (new_thread)) < 0)
2734 rtx pat = PATTERN (new_thread);
2735 rtx dest;
2736 rtx src;
2738 /* We know "new_thread" is an insn due to NONJUMP_INSN_P (new_thread)
2739 above. */
2740 trial = as_a <rtx_insn *> (new_thread);
2741 pat = PATTERN (trial);
2743 if (!NONJUMP_INSN_P (trial)
2744 || GET_CODE (pat) != SET
2745 || ! eligible_for_delay (insn, 0, trial, flags)
2746 || can_throw_internal (trial))
2747 return 0;
2749 dest = SET_DEST (pat), src = SET_SRC (pat);
2750 if ((GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
2751 && rtx_equal_p (XEXP (src, 0), dest)
2752 && (!FLOAT_MODE_P (GET_MODE (src))
2753 || flag_unsafe_math_optimizations)
2754 && ! reg_overlap_mentioned_p (dest, XEXP (src, 1))
2755 && ! side_effects_p (pat))
2757 rtx other = XEXP (src, 1);
2758 rtx new_arith;
2759 rtx_insn *ninsn;
2761 /* If this is a constant adjustment, use the same code with
2762 the negated constant. Otherwise, reverse the sense of the
2763 arithmetic. */
2764 if (CONST_INT_P (other))
2765 new_arith = gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src), dest,
2766 negate_rtx (GET_MODE (src), other));
2767 else
2768 new_arith = gen_rtx_fmt_ee (GET_CODE (src) == PLUS ? MINUS : PLUS,
2769 GET_MODE (src), dest, other);
2771 ninsn = emit_insn_after (gen_rtx_SET (VOIDmode, dest, new_arith),
2772 insn);
2774 if (recog_memoized (ninsn) < 0
2775 || (extract_insn (ninsn),
2776 !constrain_operands (1, get_preferred_alternatives (ninsn))))
2778 delete_related_insns (ninsn);
2779 return 0;
2782 if (own_thread)
2784 update_block (trial, thread);
2785 if (trial == thread)
2787 thread = next_active_insn (thread);
2788 if (new_thread == trial)
2789 new_thread = thread;
2791 delete_related_insns (trial);
2793 else
2794 new_thread = next_active_insn (trial);
2796 ninsn = own_thread ? trial : copy_delay_slot_insn (trial);
2797 if (thread_if_true)
2798 INSN_FROM_TARGET_P (ninsn) = 1;
2800 delay_list = add_to_delay_list (ninsn, NULL);
2801 (*pslots_filled)++;
2805 if (delay_list && must_annul)
2806 INSN_ANNULLED_BRANCH_P (insn) = 1;
2808 /* If we are to branch into the middle of this thread, find an appropriate
2809 label or make a new one if none, and redirect INSN to it. If we hit the
2810 end of the function, use the end-of-function label. */
2811 if (new_thread != thread)
2813 rtx label;
2814 bool crossing = false;
2816 gcc_assert (thread_if_true);
2818 if (new_thread && simplejump_or_return_p (new_thread)
2819 && redirect_with_delay_list_safe_p (insn,
2820 JUMP_LABEL (new_thread),
2821 delay_list))
2822 new_thread = follow_jumps (JUMP_LABEL (new_thread), insn,
2823 &crossing);
2825 if (ANY_RETURN_P (new_thread))
2826 label = find_end_label (new_thread);
2827 else if (LABEL_P (new_thread))
2828 label = new_thread;
2829 else
2830 label = get_label_before (as_a <rtx_insn *> (new_thread),
2831 JUMP_LABEL (insn));
2833 if (label)
2835 reorg_redirect_jump (insn, label);
2836 if (crossing)
2837 CROSSING_JUMP_P (insn) = 1;
2841 return delay_list;
2844 /* Make another attempt to find insns to place in delay slots.
2846 We previously looked for insns located in front of the delay insn
2847 and, for non-jump delay insns, located behind the delay insn.
2849 Here only try to schedule jump insns and try to move insns from either
2850 the target or the following insns into the delay slot. If annulling is
2851 supported, we will be likely to do this. Otherwise, we can do this only
2852 if safe. */
2854 static void
2855 fill_eager_delay_slots (void)
2857 rtx_insn *insn;
2858 int i;
2859 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
2861 for (i = 0; i < num_unfilled_slots; i++)
2863 rtx condition;
2864 rtx target_label, insn_at_target;
2865 rtx_insn *fallthrough_insn;
2866 rtx_insn_list *delay_list = 0;
2867 int own_target;
2868 int own_fallthrough;
2869 int prediction, slots_to_fill, slots_filled;
2871 insn = unfilled_slots_base[i];
2872 if (insn == 0
2873 || insn->deleted ()
2874 || !JUMP_P (insn)
2875 || ! (condjump_p (insn) || condjump_in_parallel_p (insn)))
2876 continue;
2878 slots_to_fill = num_delay_slots (insn);
2879 /* Some machine description have defined instructions to have
2880 delay slots only in certain circumstances which may depend on
2881 nearby insns (which change due to reorg's actions).
2883 For example, the PA port normally has delay slots for unconditional
2884 jumps.
2886 However, the PA port claims such jumps do not have a delay slot
2887 if they are immediate successors of certain CALL_INSNs. This
2888 allows the port to favor filling the delay slot of the call with
2889 the unconditional jump. */
2890 if (slots_to_fill == 0)
2891 continue;
2893 slots_filled = 0;
2894 target_label = JUMP_LABEL (insn);
2895 condition = get_branch_condition (insn, target_label);
2897 if (condition == 0)
2898 continue;
2900 /* Get the next active fallthrough and target insns and see if we own
2901 them. Then see whether the branch is likely true. We don't need
2902 to do a lot of this for unconditional branches. */
2904 insn_at_target = first_active_target_insn (target_label);
2905 own_target = own_thread_p (target_label, target_label, 0);
2907 if (condition == const_true_rtx)
2909 own_fallthrough = 0;
2910 fallthrough_insn = 0;
2911 prediction = 2;
2913 else
2915 fallthrough_insn = next_active_insn (insn);
2916 own_fallthrough = own_thread_p (NEXT_INSN (insn), NULL_RTX, 1);
2917 prediction = mostly_true_jump (insn);
2920 /* If this insn is expected to branch, first try to get insns from our
2921 target, then our fallthrough insns. If it is not expected to branch,
2922 try the other order. */
2924 if (prediction > 0)
2926 delay_list
2927 = fill_slots_from_thread (insn, condition, insn_at_target,
2928 fallthrough_insn, prediction == 2, 1,
2929 own_target,
2930 slots_to_fill, &slots_filled, delay_list);
2932 if (delay_list == 0 && own_fallthrough)
2934 /* Even though we didn't find anything for delay slots,
2935 we might have found a redundant insn which we deleted
2936 from the thread that was filled. So we have to recompute
2937 the next insn at the target. */
2938 target_label = JUMP_LABEL (insn);
2939 insn_at_target = first_active_target_insn (target_label);
2941 delay_list
2942 = fill_slots_from_thread (insn, condition, fallthrough_insn,
2943 insn_at_target, 0, 0,
2944 own_fallthrough,
2945 slots_to_fill, &slots_filled,
2946 delay_list);
2949 else
2951 if (own_fallthrough)
2952 delay_list
2953 = fill_slots_from_thread (insn, condition, fallthrough_insn,
2954 insn_at_target, 0, 0,
2955 own_fallthrough,
2956 slots_to_fill, &slots_filled,
2957 delay_list);
2959 if (delay_list == 0)
2960 delay_list
2961 = fill_slots_from_thread (insn, condition, insn_at_target,
2962 next_active_insn (insn), 0, 1,
2963 own_target,
2964 slots_to_fill, &slots_filled,
2965 delay_list);
2968 if (delay_list)
2969 unfilled_slots_base[i]
2970 = emit_delay_sequence (insn, delay_list, slots_filled);
2972 if (slots_to_fill == slots_filled)
2973 unfilled_slots_base[i] = 0;
2975 note_delay_statistics (slots_filled, 1);
2979 static void delete_computation (rtx insn);
2981 /* Recursively delete prior insns that compute the value (used only by INSN
2982 which the caller is deleting) stored in the register mentioned by NOTE
2983 which is a REG_DEAD note associated with INSN. */
2985 static void
2986 delete_prior_computation (rtx note, rtx insn)
2988 rtx our_prev;
2989 rtx reg = XEXP (note, 0);
2991 for (our_prev = prev_nonnote_insn (insn);
2992 our_prev && (NONJUMP_INSN_P (our_prev)
2993 || CALL_P (our_prev));
2994 our_prev = prev_nonnote_insn (our_prev))
2996 rtx pat = PATTERN (our_prev);
2998 /* If we reach a CALL which is not calling a const function
2999 or the callee pops the arguments, then give up. */
3000 if (CALL_P (our_prev)
3001 && (! RTL_CONST_CALL_P (our_prev)
3002 || GET_CODE (pat) != SET || GET_CODE (SET_SRC (pat)) != CALL))
3003 break;
3005 /* If we reach a SEQUENCE, it is too complex to try to
3006 do anything with it, so give up. We can be run during
3007 and after reorg, so SEQUENCE rtl can legitimately show
3008 up here. */
3009 if (GET_CODE (pat) == SEQUENCE)
3010 break;
3012 if (GET_CODE (pat) == USE
3013 && NONJUMP_INSN_P (XEXP (pat, 0)))
3014 /* reorg creates USEs that look like this. We leave them
3015 alone because reorg needs them for its own purposes. */
3016 break;
3018 if (reg_set_p (reg, pat))
3020 if (side_effects_p (pat) && !CALL_P (our_prev))
3021 break;
3023 if (GET_CODE (pat) == PARALLEL)
3025 /* If we find a SET of something else, we can't
3026 delete the insn. */
3028 int i;
3030 for (i = 0; i < XVECLEN (pat, 0); i++)
3032 rtx part = XVECEXP (pat, 0, i);
3034 if (GET_CODE (part) == SET
3035 && SET_DEST (part) != reg)
3036 break;
3039 if (i == XVECLEN (pat, 0))
3040 delete_computation (our_prev);
3042 else if (GET_CODE (pat) == SET
3043 && REG_P (SET_DEST (pat)))
3045 int dest_regno = REGNO (SET_DEST (pat));
3046 int dest_endregno = END_REGNO (SET_DEST (pat));
3047 int regno = REGNO (reg);
3048 int endregno = END_REGNO (reg);
3050 if (dest_regno >= regno
3051 && dest_endregno <= endregno)
3052 delete_computation (our_prev);
3054 /* We may have a multi-word hard register and some, but not
3055 all, of the words of the register are needed in subsequent
3056 insns. Write REG_UNUSED notes for those parts that were not
3057 needed. */
3058 else if (dest_regno <= regno
3059 && dest_endregno >= endregno)
3061 int i;
3063 add_reg_note (our_prev, REG_UNUSED, reg);
3065 for (i = dest_regno; i < dest_endregno; i++)
3066 if (! find_regno_note (our_prev, REG_UNUSED, i))
3067 break;
3069 if (i == dest_endregno)
3070 delete_computation (our_prev);
3074 break;
3077 /* If PAT references the register that dies here, it is an
3078 additional use. Hence any prior SET isn't dead. However, this
3079 insn becomes the new place for the REG_DEAD note. */
3080 if (reg_overlap_mentioned_p (reg, pat))
3082 XEXP (note, 1) = REG_NOTES (our_prev);
3083 REG_NOTES (our_prev) = note;
3084 break;
3089 /* Delete INSN and recursively delete insns that compute values used only
3090 by INSN. This uses the REG_DEAD notes computed during flow analysis.
3092 Look at all our REG_DEAD notes. If a previous insn does nothing other
3093 than set a register that dies in this insn, we can delete that insn
3094 as well.
3096 On machines with CC0, if CC0 is used in this insn, we may be able to
3097 delete the insn that set it. */
3099 static void
3100 delete_computation (rtx insn)
3102 rtx note, next;
3104 #ifdef HAVE_cc0
3105 if (reg_referenced_p (cc0_rtx, PATTERN (insn)))
3107 rtx prev = prev_nonnote_insn (insn);
3108 /* We assume that at this stage
3109 CC's are always set explicitly
3110 and always immediately before the jump that
3111 will use them. So if the previous insn
3112 exists to set the CC's, delete it
3113 (unless it performs auto-increments, etc.). */
3114 if (prev && NONJUMP_INSN_P (prev)
3115 && sets_cc0_p (PATTERN (prev)))
3117 if (sets_cc0_p (PATTERN (prev)) > 0
3118 && ! side_effects_p (PATTERN (prev)))
3119 delete_computation (prev);
3120 else
3121 /* Otherwise, show that cc0 won't be used. */
3122 add_reg_note (prev, REG_UNUSED, cc0_rtx);
3125 #endif
3127 for (note = REG_NOTES (insn); note; note = next)
3129 next = XEXP (note, 1);
3131 if (REG_NOTE_KIND (note) != REG_DEAD
3132 /* Verify that the REG_NOTE is legitimate. */
3133 || !REG_P (XEXP (note, 0)))
3134 continue;
3136 delete_prior_computation (note, insn);
3139 delete_related_insns (insn);
3142 /* If all INSN does is set the pc, delete it,
3143 and delete the insn that set the condition codes for it
3144 if that's what the previous thing was. */
3146 static void
3147 delete_jump (rtx_insn *insn)
3149 rtx set = single_set (insn);
3151 if (set && GET_CODE (SET_DEST (set)) == PC)
3152 delete_computation (insn);
3155 static rtx_insn *
3156 label_before_next_insn (rtx x, rtx scan_limit)
3158 rtx_insn *insn = next_active_insn (x);
3159 while (insn)
3161 insn = PREV_INSN (insn);
3162 if (insn == scan_limit || insn == NULL_RTX)
3163 return NULL;
3164 if (LABEL_P (insn))
3165 break;
3167 return insn;
3171 /* Once we have tried two ways to fill a delay slot, make a pass over the
3172 code to try to improve the results and to do such things as more jump
3173 threading. */
3175 static void
3176 relax_delay_slots (rtx_insn *first)
3178 rtx_insn *insn, *next;
3179 rtx_sequence *pat;
3180 rtx trial;
3181 rtx_insn *delay_insn;
3182 rtx target_label;
3184 /* Look at every JUMP_INSN and see if we can improve it. */
3185 for (insn = first; insn; insn = next)
3187 rtx_insn *other;
3188 bool crossing;
3190 next = next_active_insn (insn);
3192 /* If this is a jump insn, see if it now jumps to a jump, jumps to
3193 the next insn, or jumps to a label that is not the last of a
3194 group of consecutive labels. */
3195 if (JUMP_P (insn)
3196 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3197 && !ANY_RETURN_P (target_label = JUMP_LABEL (insn)))
3199 target_label
3200 = skip_consecutive_labels (follow_jumps (target_label, insn,
3201 &crossing));
3202 if (ANY_RETURN_P (target_label))
3203 target_label = find_end_label (target_label);
3205 if (target_label && next_active_insn (target_label) == next
3206 && ! condjump_in_parallel_p (insn))
3208 delete_jump (insn);
3209 continue;
3212 if (target_label && target_label != JUMP_LABEL (insn))
3214 reorg_redirect_jump (insn, target_label);
3215 if (crossing)
3216 CROSSING_JUMP_P (insn) = 1;
3219 /* See if this jump conditionally branches around an unconditional
3220 jump. If so, invert this jump and point it to the target of the
3221 second jump. */
3222 if (next && simplejump_or_return_p (next)
3223 && any_condjump_p (insn)
3224 && target_label
3225 && next_active_insn (target_label) == next_active_insn (next)
3226 && no_labels_between_p (insn, next))
3228 rtx label = JUMP_LABEL (next);
3230 /* Be careful how we do this to avoid deleting code or
3231 labels that are momentarily dead. See similar optimization
3232 in jump.c.
3234 We also need to ensure we properly handle the case when
3235 invert_jump fails. */
3237 ++LABEL_NUSES (target_label);
3238 if (!ANY_RETURN_P (label))
3239 ++LABEL_NUSES (label);
3241 if (invert_jump (insn, label, 1))
3243 delete_related_insns (next);
3244 next = insn;
3247 if (!ANY_RETURN_P (label))
3248 --LABEL_NUSES (label);
3250 if (--LABEL_NUSES (target_label) == 0)
3251 delete_related_insns (target_label);
3253 continue;
3257 /* If this is an unconditional jump and the previous insn is a
3258 conditional jump, try reversing the condition of the previous
3259 insn and swapping our targets. The next pass might be able to
3260 fill the slots.
3262 Don't do this if we expect the conditional branch to be true, because
3263 we would then be making the more common case longer. */
3265 if (simplejump_or_return_p (insn)
3266 && (other = prev_active_insn (insn)) != 0
3267 && any_condjump_p (other)
3268 && no_labels_between_p (other, insn)
3269 && 0 > mostly_true_jump (other))
3271 rtx other_target = JUMP_LABEL (other);
3272 target_label = JUMP_LABEL (insn);
3274 if (invert_jump (other, target_label, 0))
3275 reorg_redirect_jump (insn, other_target);
3278 /* Now look only at cases where we have a filled delay slot. */
3279 if (!NONJUMP_INSN_P (insn) || GET_CODE (PATTERN (insn)) != SEQUENCE)
3280 continue;
3282 pat = as_a <rtx_sequence *> (PATTERN (insn));
3283 delay_insn = pat->insn (0);
3285 /* See if the first insn in the delay slot is redundant with some
3286 previous insn. Remove it from the delay slot if so; then set up
3287 to reprocess this insn. */
3288 if (redundant_insn (pat->insn (1), delay_insn, 0))
3290 update_block (pat->insn (1), insn);
3291 delete_from_delay_slot (pat->insn (1));
3292 next = prev_active_insn (next);
3293 continue;
3296 /* See if we have a RETURN insn with a filled delay slot followed
3297 by a RETURN insn with an unfilled a delay slot. If so, we can delete
3298 the first RETURN (but not its delay insn). This gives the same
3299 effect in fewer instructions.
3301 Only do so if optimizing for size since this results in slower, but
3302 smaller code. */
3303 if (optimize_function_for_size_p (cfun)
3304 && ANY_RETURN_P (PATTERN (delay_insn))
3305 && next
3306 && JUMP_P (next)
3307 && PATTERN (next) == PATTERN (delay_insn))
3309 rtx_insn *after;
3310 int i;
3312 /* Delete the RETURN and just execute the delay list insns.
3314 We do this by deleting the INSN containing the SEQUENCE, then
3315 re-emitting the insns separately, and then deleting the RETURN.
3316 This allows the count of the jump target to be properly
3317 decremented.
3319 Note that we need to change the INSN_UID of the re-emitted insns
3320 since it is used to hash the insns for mark_target_live_regs and
3321 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3323 Clear the from target bit, since these insns are no longer
3324 in delay slots. */
3325 for (i = 0; i < XVECLEN (pat, 0); i++)
3326 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3328 trial = PREV_INSN (insn);
3329 delete_related_insns (insn);
3330 gcc_assert (GET_CODE (pat) == SEQUENCE);
3331 add_insn_after (delay_insn, trial, NULL);
3332 after = delay_insn;
3333 for (i = 1; i < pat->len (); i++)
3334 after = emit_copy_of_insn_after (pat->insn (i), after);
3335 delete_scheduled_jump (delay_insn);
3336 continue;
3339 /* Now look only at the cases where we have a filled JUMP_INSN. */
3340 if (!JUMP_P (delay_insn)
3341 || !(condjump_p (delay_insn) || condjump_in_parallel_p (delay_insn)))
3342 continue;
3344 target_label = JUMP_LABEL (delay_insn);
3345 if (target_label && ANY_RETURN_P (target_label))
3346 continue;
3348 /* If this jump goes to another unconditional jump, thread it, but
3349 don't convert a jump into a RETURN here. */
3350 trial = skip_consecutive_labels (follow_jumps (target_label, delay_insn,
3351 &crossing));
3352 if (ANY_RETURN_P (trial))
3353 trial = find_end_label (trial);
3355 if (trial && trial != target_label
3356 && redirect_with_delay_slots_safe_p (delay_insn, trial, insn))
3358 reorg_redirect_jump (delay_insn, trial);
3359 target_label = trial;
3360 if (crossing)
3361 CROSSING_JUMP_P (insn) = 1;
3364 /* If the first insn at TARGET_LABEL is redundant with a previous
3365 insn, redirect the jump to the following insn and process again.
3366 We use next_real_insn instead of next_active_insn so we
3367 don't skip USE-markers, or we'll end up with incorrect
3368 liveness info. */
3369 trial = next_real_insn (target_label);
3370 if (trial && GET_CODE (PATTERN (trial)) != SEQUENCE
3371 && redundant_insn (trial, insn, 0)
3372 && ! can_throw_internal (trial))
3374 /* Figure out where to emit the special USE insn so we don't
3375 later incorrectly compute register live/death info. */
3376 rtx_insn *tmp = next_active_insn (trial);
3377 if (tmp == 0)
3378 tmp = find_end_label (simple_return_rtx);
3380 if (tmp)
3382 /* Insert the special USE insn and update dataflow info.
3383 We know "trial" is an insn here as it is the output of
3384 next_real_insn () above. */
3385 update_block (as_a <rtx_insn *> (trial), tmp);
3387 /* Now emit a label before the special USE insn, and
3388 redirect our jump to the new label. */
3389 target_label = get_label_before (PREV_INSN (tmp), target_label);
3390 reorg_redirect_jump (delay_insn, target_label);
3391 next = insn;
3392 continue;
3396 /* Similarly, if it is an unconditional jump with one insn in its
3397 delay list and that insn is redundant, thread the jump. */
3398 rtx_sequence *trial_seq =
3399 trial ? dyn_cast <rtx_sequence *> (PATTERN (trial)) : NULL;
3400 if (trial_seq
3401 && trial_seq->len () == 2
3402 && JUMP_P (trial_seq->insn (0))
3403 && simplejump_or_return_p (trial_seq->insn (0))
3404 && redundant_insn (trial_seq->insn (1), insn, 0))
3406 target_label = JUMP_LABEL (trial_seq->insn (0));
3407 if (ANY_RETURN_P (target_label))
3408 target_label = find_end_label (target_label);
3410 if (target_label
3411 && redirect_with_delay_slots_safe_p (delay_insn, target_label,
3412 insn))
3414 update_block (trial_seq->insn (1), insn);
3415 reorg_redirect_jump (delay_insn, target_label);
3416 next = insn;
3417 continue;
3421 /* See if we have a simple (conditional) jump that is useless. */
3422 if (! INSN_ANNULLED_BRANCH_P (delay_insn)
3423 && ! condjump_in_parallel_p (delay_insn)
3424 && prev_active_insn (target_label) == insn
3425 && ! BARRIER_P (prev_nonnote_insn (target_label))
3426 #ifdef HAVE_cc0
3427 /* If the last insn in the delay slot sets CC0 for some insn,
3428 various code assumes that it is in a delay slot. We could
3429 put it back where it belonged and delete the register notes,
3430 but it doesn't seem worthwhile in this uncommon case. */
3431 && ! find_reg_note (XVECEXP (pat, 0, XVECLEN (pat, 0) - 1),
3432 REG_CC_USER, NULL_RTX)
3433 #endif
3436 rtx_insn *after;
3437 int i;
3439 /* All this insn does is execute its delay list and jump to the
3440 following insn. So delete the jump and just execute the delay
3441 list insns.
3443 We do this by deleting the INSN containing the SEQUENCE, then
3444 re-emitting the insns separately, and then deleting the jump.
3445 This allows the count of the jump target to be properly
3446 decremented.
3448 Note that we need to change the INSN_UID of the re-emitted insns
3449 since it is used to hash the insns for mark_target_live_regs and
3450 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3452 Clear the from target bit, since these insns are no longer
3453 in delay slots. */
3454 for (i = 0; i < XVECLEN (pat, 0); i++)
3455 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3457 trial = PREV_INSN (insn);
3458 delete_related_insns (insn);
3459 gcc_assert (GET_CODE (pat) == SEQUENCE);
3460 add_insn_after (delay_insn, trial, NULL);
3461 after = delay_insn;
3462 for (i = 1; i < pat->len (); i++)
3463 after = emit_copy_of_insn_after (pat->insn (i), after);
3464 delete_scheduled_jump (delay_insn);
3465 continue;
3468 /* See if this is an unconditional jump around a single insn which is
3469 identical to the one in its delay slot. In this case, we can just
3470 delete the branch and the insn in its delay slot. */
3471 if (next && NONJUMP_INSN_P (next)
3472 && label_before_next_insn (next, insn) == target_label
3473 && simplejump_p (insn)
3474 && XVECLEN (pat, 0) == 2
3475 && rtx_equal_p (PATTERN (next), PATTERN (pat->insn (1))))
3477 delete_related_insns (insn);
3478 continue;
3481 /* See if this jump (with its delay slots) conditionally branches
3482 around an unconditional jump (without delay slots). If so, invert
3483 this jump and point it to the target of the second jump. We cannot
3484 do this for annulled jumps, though. Again, don't convert a jump to
3485 a RETURN here. */
3486 if (! INSN_ANNULLED_BRANCH_P (delay_insn)
3487 && any_condjump_p (delay_insn)
3488 && next && simplejump_or_return_p (next)
3489 && next_active_insn (target_label) == next_active_insn (next)
3490 && no_labels_between_p (insn, next))
3492 rtx label = JUMP_LABEL (next);
3493 rtx old_label = JUMP_LABEL (delay_insn);
3495 if (ANY_RETURN_P (label))
3496 label = find_end_label (label);
3498 /* find_end_label can generate a new label. Check this first. */
3499 if (label
3500 && no_labels_between_p (insn, next)
3501 && redirect_with_delay_slots_safe_p (delay_insn, label, insn))
3503 /* Be careful how we do this to avoid deleting code or labels
3504 that are momentarily dead. See similar optimization in
3505 jump.c */
3506 if (old_label)
3507 ++LABEL_NUSES (old_label);
3509 if (invert_jump (delay_insn, label, 1))
3511 int i;
3513 /* Must update the INSN_FROM_TARGET_P bits now that
3514 the branch is reversed, so that mark_target_live_regs
3515 will handle the delay slot insn correctly. */
3516 for (i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
3518 rtx slot = XVECEXP (PATTERN (insn), 0, i);
3519 INSN_FROM_TARGET_P (slot) = ! INSN_FROM_TARGET_P (slot);
3522 delete_related_insns (next);
3523 next = insn;
3526 if (old_label && --LABEL_NUSES (old_label) == 0)
3527 delete_related_insns (old_label);
3528 continue;
3532 /* If we own the thread opposite the way this insn branches, see if we
3533 can merge its delay slots with following insns. */
3534 if (INSN_FROM_TARGET_P (pat->insn (1))
3535 && own_thread_p (NEXT_INSN (insn), 0, 1))
3536 try_merge_delay_insns (insn, next);
3537 else if (! INSN_FROM_TARGET_P (pat->insn (1))
3538 && own_thread_p (target_label, target_label, 0))
3539 try_merge_delay_insns (insn, next_active_insn (target_label));
3541 /* If we get here, we haven't deleted INSN. But we may have deleted
3542 NEXT, so recompute it. */
3543 next = next_active_insn (insn);
3548 /* Look for filled jumps to the end of function label. We can try to convert
3549 them into RETURN insns if the insns in the delay slot are valid for the
3550 RETURN as well. */
3552 static void
3553 make_return_insns (rtx_insn *first)
3555 rtx_insn *insn;
3556 rtx_insn *jump_insn;
3557 rtx real_return_label = function_return_label;
3558 rtx real_simple_return_label = function_simple_return_label;
3559 int slots, i;
3561 /* See if there is a RETURN insn in the function other than the one we
3562 made for END_OF_FUNCTION_LABEL. If so, set up anything we can't change
3563 into a RETURN to jump to it. */
3564 for (insn = first; insn; insn = NEXT_INSN (insn))
3565 if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
3567 rtx t = get_label_before (insn, NULL_RTX);
3568 if (PATTERN (insn) == ret_rtx)
3569 real_return_label = t;
3570 else
3571 real_simple_return_label = t;
3572 break;
3575 /* Show an extra usage of REAL_RETURN_LABEL so it won't go away if it
3576 was equal to END_OF_FUNCTION_LABEL. */
3577 if (real_return_label)
3578 LABEL_NUSES (real_return_label)++;
3579 if (real_simple_return_label)
3580 LABEL_NUSES (real_simple_return_label)++;
3582 /* Clear the list of insns to fill so we can use it. */
3583 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3585 for (insn = first; insn; insn = NEXT_INSN (insn))
3587 int flags;
3588 rtx kind, real_label;
3590 /* Only look at filled JUMP_INSNs that go to the end of function
3591 label. */
3592 if (!NONJUMP_INSN_P (insn))
3593 continue;
3595 if (GET_CODE (PATTERN (insn)) != SEQUENCE)
3596 continue;
3598 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (insn));
3600 if (!jump_to_label_p (pat->insn (0)))
3601 continue;
3603 if (JUMP_LABEL (pat->insn (0)) == function_return_label)
3605 kind = ret_rtx;
3606 real_label = real_return_label;
3608 else if (JUMP_LABEL (pat->insn (0)) == function_simple_return_label)
3610 kind = simple_return_rtx;
3611 real_label = real_simple_return_label;
3613 else
3614 continue;
3616 jump_insn = pat->insn (0);
3618 /* If we can't make the jump into a RETURN, try to redirect it to the best
3619 RETURN and go on to the next insn. */
3620 if (!reorg_redirect_jump (jump_insn, kind))
3622 /* Make sure redirecting the jump will not invalidate the delay
3623 slot insns. */
3624 if (redirect_with_delay_slots_safe_p (jump_insn, real_label, insn))
3625 reorg_redirect_jump (jump_insn, real_label);
3626 continue;
3629 /* See if this RETURN can accept the insns current in its delay slot.
3630 It can if it has more or an equal number of slots and the contents
3631 of each is valid. */
3633 flags = get_jump_flags (jump_insn, JUMP_LABEL (jump_insn));
3634 slots = num_delay_slots (jump_insn);
3635 if (slots >= XVECLEN (pat, 0) - 1)
3637 for (i = 1; i < XVECLEN (pat, 0); i++)
3638 if (! (
3639 #ifdef ANNUL_IFFALSE_SLOTS
3640 (INSN_ANNULLED_BRANCH_P (jump_insn)
3641 && INSN_FROM_TARGET_P (pat->insn (i)))
3642 ? eligible_for_annul_false (jump_insn, i - 1,
3643 pat->insn (i), flags) :
3644 #endif
3645 #ifdef ANNUL_IFTRUE_SLOTS
3646 (INSN_ANNULLED_BRANCH_P (jump_insn)
3647 && ! INSN_FROM_TARGET_P (pat->insn (i)))
3648 ? eligible_for_annul_true (jump_insn, i - 1,
3649 pat->insn (i), flags) :
3650 #endif
3651 eligible_for_delay (jump_insn, i - 1,
3652 pat->insn (i), flags)))
3653 break;
3655 else
3656 i = 0;
3658 if (i == XVECLEN (pat, 0))
3659 continue;
3661 /* We have to do something with this insn. If it is an unconditional
3662 RETURN, delete the SEQUENCE and output the individual insns,
3663 followed by the RETURN. Then set things up so we try to find
3664 insns for its delay slots, if it needs some. */
3665 if (ANY_RETURN_P (PATTERN (jump_insn)))
3667 rtx_insn *prev = PREV_INSN (insn);
3669 delete_related_insns (insn);
3670 for (i = 1; i < XVECLEN (pat, 0); i++)
3671 prev = emit_insn_after (PATTERN (XVECEXP (pat, 0, i)), prev);
3673 insn = emit_jump_insn_after (PATTERN (jump_insn), prev);
3674 emit_barrier_after (insn);
3676 if (slots)
3677 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3679 else
3680 /* It is probably more efficient to keep this with its current
3681 delay slot as a branch to a RETURN. */
3682 reorg_redirect_jump (jump_insn, real_label);
3685 /* Now delete REAL_RETURN_LABEL if we never used it. Then try to fill any
3686 new delay slots we have created. */
3687 if (real_return_label != NULL_RTX && --LABEL_NUSES (real_return_label) == 0)
3688 delete_related_insns (real_return_label);
3689 if (real_simple_return_label != NULL_RTX
3690 && --LABEL_NUSES (real_simple_return_label) == 0)
3691 delete_related_insns (real_simple_return_label);
3693 fill_simple_delay_slots (1);
3694 fill_simple_delay_slots (0);
3697 /* Try to find insns to place in delay slots. */
3699 static void
3700 dbr_schedule (rtx_insn *first)
3702 rtx_insn *insn, *next, *epilogue_insn = 0;
3703 int i;
3704 bool need_return_insns;
3706 /* If the current function has no insns other than the prologue and
3707 epilogue, then do not try to fill any delay slots. */
3708 if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS)
3709 return;
3711 /* Find the highest INSN_UID and allocate and initialize our map from
3712 INSN_UID's to position in code. */
3713 for (max_uid = 0, insn = first; insn; insn = NEXT_INSN (insn))
3715 if (INSN_UID (insn) > max_uid)
3716 max_uid = INSN_UID (insn);
3717 if (NOTE_P (insn)
3718 && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
3719 epilogue_insn = insn;
3722 uid_to_ruid = XNEWVEC (int, max_uid + 1);
3723 for (i = 0, insn = first; insn; i++, insn = NEXT_INSN (insn))
3724 uid_to_ruid[INSN_UID (insn)] = i;
3726 /* Initialize the list of insns that need filling. */
3727 if (unfilled_firstobj == 0)
3729 gcc_obstack_init (&unfilled_slots_obstack);
3730 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3733 for (insn = next_active_insn (first); insn; insn = next_active_insn (insn))
3735 rtx target;
3737 /* Skip vector tables. We can't get attributes for them. */
3738 if (JUMP_TABLE_DATA_P (insn))
3739 continue;
3741 if (JUMP_P (insn))
3742 INSN_ANNULLED_BRANCH_P (insn) = 0;
3743 INSN_FROM_TARGET_P (insn) = 0;
3745 if (num_delay_slots (insn) > 0)
3746 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3748 /* Ensure all jumps go to the last of a set of consecutive labels. */
3749 if (JUMP_P (insn)
3750 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3751 && !ANY_RETURN_P (JUMP_LABEL (insn))
3752 && ((target = skip_consecutive_labels (JUMP_LABEL (insn)))
3753 != JUMP_LABEL (insn)))
3754 redirect_jump (insn, target, 1);
3757 init_resource_info (epilogue_insn);
3759 /* Show we haven't computed an end-of-function label yet. */
3760 function_return_label = function_simple_return_label = NULL;
3762 /* Initialize the statistics for this function. */
3763 memset (num_insns_needing_delays, 0, sizeof num_insns_needing_delays);
3764 memset (num_filled_delays, 0, sizeof num_filled_delays);
3766 /* Now do the delay slot filling. Try everything twice in case earlier
3767 changes make more slots fillable. */
3769 for (reorg_pass_number = 0;
3770 reorg_pass_number < MAX_REORG_PASSES;
3771 reorg_pass_number++)
3773 fill_simple_delay_slots (1);
3774 fill_simple_delay_slots (0);
3775 fill_eager_delay_slots ();
3776 relax_delay_slots (first);
3779 /* If we made an end of function label, indicate that it is now
3780 safe to delete it by undoing our prior adjustment to LABEL_NUSES.
3781 If it is now unused, delete it. */
3782 if (function_return_label && --LABEL_NUSES (function_return_label) == 0)
3783 delete_related_insns (function_return_label);
3784 if (function_simple_return_label
3785 && --LABEL_NUSES (function_simple_return_label) == 0)
3786 delete_related_insns (function_simple_return_label);
3788 need_return_insns = false;
3789 #ifdef HAVE_return
3790 need_return_insns |= HAVE_return && function_return_label != 0;
3791 #endif
3792 #ifdef HAVE_simple_return
3793 need_return_insns |= HAVE_simple_return && function_simple_return_label != 0;
3794 #endif
3795 if (need_return_insns)
3796 make_return_insns (first);
3798 /* Delete any USE insns made by update_block; subsequent passes don't need
3799 them or know how to deal with them. */
3800 for (insn = first; insn; insn = next)
3802 next = NEXT_INSN (insn);
3804 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
3805 && INSN_P (XEXP (PATTERN (insn), 0)))
3806 next = delete_related_insns (insn);
3809 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3811 /* It is not clear why the line below is needed, but it does seem to be. */
3812 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3814 if (dump_file)
3816 int i, j, need_comma;
3817 int total_delay_slots[MAX_DELAY_HISTOGRAM + 1];
3818 int total_annul_slots[MAX_DELAY_HISTOGRAM + 1];
3820 for (reorg_pass_number = 0;
3821 reorg_pass_number < MAX_REORG_PASSES;
3822 reorg_pass_number++)
3824 fprintf (dump_file, ";; Reorg pass #%d:\n", reorg_pass_number + 1);
3825 for (i = 0; i < NUM_REORG_FUNCTIONS; i++)
3827 need_comma = 0;
3828 fprintf (dump_file, ";; Reorg function #%d\n", i);
3830 fprintf (dump_file, ";; %d insns needing delay slots\n;; ",
3831 num_insns_needing_delays[i][reorg_pass_number]);
3833 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3834 if (num_filled_delays[i][j][reorg_pass_number])
3836 if (need_comma)
3837 fprintf (dump_file, ", ");
3838 need_comma = 1;
3839 fprintf (dump_file, "%d got %d delays",
3840 num_filled_delays[i][j][reorg_pass_number], j);
3842 fprintf (dump_file, "\n");
3845 memset (total_delay_slots, 0, sizeof total_delay_slots);
3846 memset (total_annul_slots, 0, sizeof total_annul_slots);
3847 for (insn = first; insn; insn = NEXT_INSN (insn))
3849 if (! insn->deleted ()
3850 && NONJUMP_INSN_P (insn)
3851 && GET_CODE (PATTERN (insn)) != USE
3852 && GET_CODE (PATTERN (insn)) != CLOBBER)
3854 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
3856 rtx control;
3857 j = XVECLEN (PATTERN (insn), 0) - 1;
3858 if (j > MAX_DELAY_HISTOGRAM)
3859 j = MAX_DELAY_HISTOGRAM;
3860 control = XVECEXP (PATTERN (insn), 0, 0);
3861 if (JUMP_P (control) && INSN_ANNULLED_BRANCH_P (control))
3862 total_annul_slots[j]++;
3863 else
3864 total_delay_slots[j]++;
3866 else if (num_delay_slots (insn) > 0)
3867 total_delay_slots[0]++;
3870 fprintf (dump_file, ";; Reorg totals: ");
3871 need_comma = 0;
3872 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3874 if (total_delay_slots[j])
3876 if (need_comma)
3877 fprintf (dump_file, ", ");
3878 need_comma = 1;
3879 fprintf (dump_file, "%d got %d delays", total_delay_slots[j], j);
3882 fprintf (dump_file, "\n");
3883 #if defined (ANNUL_IFTRUE_SLOTS) || defined (ANNUL_IFFALSE_SLOTS)
3884 fprintf (dump_file, ";; Reorg annuls: ");
3885 need_comma = 0;
3886 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3888 if (total_annul_slots[j])
3890 if (need_comma)
3891 fprintf (dump_file, ", ");
3892 need_comma = 1;
3893 fprintf (dump_file, "%d got %d delays", total_annul_slots[j], j);
3896 fprintf (dump_file, "\n");
3897 #endif
3898 fprintf (dump_file, "\n");
3901 if (!sibling_labels.is_empty ())
3903 update_alignments (sibling_labels);
3904 sibling_labels.release ();
3907 free_resource_info ();
3908 free (uid_to_ruid);
3909 crtl->dbr_scheduled_p = true;
3911 #endif /* DELAY_SLOTS */
3913 /* Run delay slot optimization. */
3914 static unsigned int
3915 rest_of_handle_delay_slots (void)
3917 #ifdef DELAY_SLOTS
3918 dbr_schedule (get_insns ());
3919 #endif
3920 return 0;
3923 namespace {
3925 const pass_data pass_data_delay_slots =
3927 RTL_PASS, /* type */
3928 "dbr", /* name */
3929 OPTGROUP_NONE, /* optinfo_flags */
3930 TV_DBR_SCHED, /* tv_id */
3931 0, /* properties_required */
3932 0, /* properties_provided */
3933 0, /* properties_destroyed */
3934 0, /* todo_flags_start */
3935 0, /* todo_flags_finish */
3938 class pass_delay_slots : public rtl_opt_pass
3940 public:
3941 pass_delay_slots (gcc::context *ctxt)
3942 : rtl_opt_pass (pass_data_delay_slots, ctxt)
3945 /* opt_pass methods: */
3946 virtual bool gate (function *);
3947 virtual unsigned int execute (function *)
3949 return rest_of_handle_delay_slots ();
3952 }; // class pass_delay_slots
3954 bool
3955 pass_delay_slots::gate (function *)
3957 #ifdef DELAY_SLOTS
3958 /* At -O0 dataflow info isn't updated after RA. */
3959 return optimize > 0 && flag_delayed_branch && !crtl->dbr_scheduled_p;
3960 #else
3961 return 0;
3962 #endif
3965 } // anon namespace
3967 rtl_opt_pass *
3968 make_pass_delay_slots (gcc::context *ctxt)
3970 return new pass_delay_slots (ctxt);
3973 /* Machine dependent reorg pass. */
3975 namespace {
3977 const pass_data pass_data_machine_reorg =
3979 RTL_PASS, /* type */
3980 "mach", /* name */
3981 OPTGROUP_NONE, /* optinfo_flags */
3982 TV_MACH_DEP, /* tv_id */
3983 0, /* properties_required */
3984 0, /* properties_provided */
3985 0, /* properties_destroyed */
3986 0, /* todo_flags_start */
3987 0, /* todo_flags_finish */
3990 class pass_machine_reorg : public rtl_opt_pass
3992 public:
3993 pass_machine_reorg (gcc::context *ctxt)
3994 : rtl_opt_pass (pass_data_machine_reorg, ctxt)
3997 /* opt_pass methods: */
3998 virtual bool gate (function *)
4000 return targetm.machine_dependent_reorg != 0;
4003 virtual unsigned int execute (function *)
4005 targetm.machine_dependent_reorg ();
4006 return 0;
4009 }; // class pass_machine_reorg
4011 } // anon namespace
4013 rtl_opt_pass *
4014 make_pass_machine_reorg (gcc::context *ctxt)
4016 return new pass_machine_reorg (ctxt);