* cp-tree.h (lang_decl_flags): Remove comdat. Updated dummy.
[official-gcc.git] / gcc / reorg.c
blob09b6dd8ea413e7b2e94bb4733c580de94bbb4e62
1 /* Perform instruction reorganizations for delay slot filling.
2 Copyright (C) 1992, 93-98, 1999 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu).
4 Hacked by Michael Tiemann (tiemann@cygnus.com).
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Instruction reorganization pass.
25 This pass runs after register allocation and final jump
26 optimization. It should be the last pass to run before peephole.
27 It serves primarily to fill delay slots of insns, typically branch
28 and call insns. Other insns typically involve more complicated
29 interactions of data dependencies and resource constraints, and
30 are better handled by scheduling before register allocation (by the
31 function `schedule_insns').
33 The Branch Penalty is the number of extra cycles that are needed to
34 execute a branch insn. On an ideal machine, branches take a single
35 cycle, and the Branch Penalty is 0. Several RISC machines approach
36 branch delays differently:
38 The MIPS and AMD 29000 have a single branch delay slot. Most insns
39 (except other branches) can be used to fill this slot. When the
40 slot is filled, two insns execute in two cycles, reducing the
41 branch penalty to zero.
43 The Motorola 88000 conditionally exposes its branch delay slot,
44 so code is shorter when it is turned off, but will run faster
45 when useful insns are scheduled there.
47 The IBM ROMP has two forms of branch and call insns, both with and
48 without a delay slot. Much like the 88k, insns not using the delay
49 slot can be shorted (2 bytes vs. 4 bytes), but will run slowed.
51 The SPARC always has a branch delay slot, but its effects can be
52 annulled when the branch is not taken. This means that failing to
53 find other sources of insns, we can hoist an insn from the branch
54 target that would only be safe to execute knowing that the branch
55 is taken.
57 The HP-PA always has a branch delay slot. For unconditional branches
58 its effects can be annulled when the branch is taken. The effects
59 of the delay slot in a conditional branch can be nullified for forward
60 taken branches, or for untaken backward branches. This means
61 we can hoist insns from the fall-through path for forward branches or
62 steal insns from the target of backward branches.
64 The TMS320C3x and C4x have three branch delay slots. When the three
65 slots are filled, the branch penalty is zero. Most insns can fill the
66 delay slots except jump insns.
68 Three techniques for filling delay slots have been implemented so far:
70 (1) `fill_simple_delay_slots' is the simplest, most efficient way
71 to fill delay slots. This pass first looks for insns which come
72 from before the branch and which are safe to execute after the
73 branch. Then it searches after the insn requiring delay slots or,
74 in the case of a branch, for insns that are after the point at
75 which the branch merges into the fallthrough code, if such a point
76 exists. When such insns are found, the branch penalty decreases
77 and no code expansion takes place.
79 (2) `fill_eager_delay_slots' is more complicated: it is used for
80 scheduling conditional jumps, or for scheduling jumps which cannot
81 be filled using (1). A machine need not have annulled jumps to use
82 this strategy, but it helps (by keeping more options open).
83 `fill_eager_delay_slots' tries to guess the direction the branch
84 will go; if it guesses right 100% of the time, it can reduce the
85 branch penalty as much as `fill_simple_delay_slots' does. If it
86 guesses wrong 100% of the time, it might as well schedule nops (or
87 on the m88k, unexpose the branch slot). When
88 `fill_eager_delay_slots' takes insns from the fall-through path of
89 the jump, usually there is no code expansion; when it takes insns
90 from the branch target, there is code expansion if it is not the
91 only way to reach that target.
93 (3) `relax_delay_slots' uses a set of rules to simplify code that
94 has been reorganized by (1) and (2). It finds cases where
95 conditional test can be eliminated, jumps can be threaded, extra
96 insns can be eliminated, etc. It is the job of (1) and (2) to do a
97 good job of scheduling locally; `relax_delay_slots' takes care of
98 making the various individual schedules work well together. It is
99 especially tuned to handle the control flow interactions of branch
100 insns. It does nothing for insns with delay slots that do not
101 branch.
103 On machines that use CC0, we are very conservative. We will not make
104 a copy of an insn involving CC0 since we want to maintain a 1-1
105 correspondence between the insn that sets and uses CC0. The insns are
106 allowed to be separated by placing an insn that sets CC0 (but not an insn
107 that uses CC0; we could do this, but it doesn't seem worthwhile) in a
108 delay slot. In that case, we point each insn at the other with REG_CC_USER
109 and REG_CC_SETTER notes. Note that these restrictions affect very few
110 machines because most RISC machines with delay slots will not use CC0
111 (the RT is the only known exception at this point).
113 Not yet implemented:
115 The Acorn Risc Machine can conditionally execute most insns, so
116 it is profitable to move single insns into a position to execute
117 based on the condition code of the previous insn.
119 The HP-PA can conditionally nullify insns, providing a similar
120 effect to the ARM, differing mostly in which insn is "in charge". */
122 #include "config.h"
123 #include "system.h"
124 #include "toplev.h"
125 #include "rtl.h"
126 #include "expr.h"
127 #include "insn-config.h"
128 #include "conditions.h"
129 #include "hard-reg-set.h"
130 #include "basic-block.h"
131 #include "regs.h"
132 #include "insn-flags.h"
133 #include "recog.h"
134 #include "flags.h"
135 #include "output.h"
136 #include "obstack.h"
137 #include "insn-attr.h"
138 #include "resource.h"
141 #ifdef DELAY_SLOTS
143 #define obstack_chunk_alloc xmalloc
144 #define obstack_chunk_free free
146 #ifndef ANNUL_IFTRUE_SLOTS
147 #define eligible_for_annul_true(INSN, SLOTS, TRIAL, FLAGS) 0
148 #endif
149 #ifndef ANNUL_IFFALSE_SLOTS
150 #define eligible_for_annul_false(INSN, SLOTS, TRIAL, FLAGS) 0
151 #endif
153 /* Insns which have delay slots that have not yet been filled. */
155 static struct obstack unfilled_slots_obstack;
156 static rtx *unfilled_firstobj;
158 /* Define macros to refer to the first and last slot containing unfilled
159 insns. These are used because the list may move and its address
160 should be recomputed at each use. */
162 #define unfilled_slots_base \
163 ((rtx *) obstack_base (&unfilled_slots_obstack))
165 #define unfilled_slots_next \
166 ((rtx *) obstack_next_free (&unfilled_slots_obstack))
168 /* Points to the label before the end of the function. */
169 static rtx end_of_function_label;
171 /* Mapping between INSN_UID's and position in the code since INSN_UID's do
172 not always monotonically increase. */
173 static int *uid_to_ruid;
175 /* Highest valid index in `uid_to_ruid'. */
176 static int max_uid;
178 static int stop_search_p PROTO((rtx, int));
179 static int resource_conflicts_p PROTO((struct resources *,
180 struct resources *));
181 static int insn_references_resource_p PROTO((rtx, struct resources *, int));
182 static int insn_sets_resource_p PROTO((rtx, struct resources *, int));
183 static rtx find_end_label PROTO((void));
184 static rtx emit_delay_sequence PROTO((rtx, rtx, int));
185 static rtx add_to_delay_list PROTO((rtx, rtx));
186 static rtx delete_from_delay_slot PROTO((rtx));
187 static void delete_scheduled_jump PROTO((rtx));
188 static void note_delay_statistics PROTO((int, int));
189 static rtx optimize_skip PROTO((rtx));
190 static int get_jump_flags PROTO((rtx, rtx));
191 static int rare_destination PROTO((rtx));
192 static int mostly_true_jump PROTO((rtx, rtx));
193 static rtx get_branch_condition PROTO((rtx, rtx));
194 static int condition_dominates_p PROTO((rtx, rtx));
195 static int redirect_with_delay_slots_safe_p PROTO ((rtx, rtx, rtx));
196 static int redirect_with_delay_list_safe_p PROTO ((rtx, rtx, rtx));
197 static int check_annul_list_true_false PROTO ((int, rtx));
198 static rtx steal_delay_list_from_target PROTO((rtx, rtx, rtx, rtx,
199 struct resources *,
200 struct resources *,
201 struct resources *,
202 int, int *, int *, rtx *));
203 static rtx steal_delay_list_from_fallthrough PROTO((rtx, rtx, rtx, rtx,
204 struct resources *,
205 struct resources *,
206 struct resources *,
207 int, int *, int *));
208 static void try_merge_delay_insns PROTO((rtx, rtx));
209 static rtx redundant_insn PROTO((rtx, rtx, rtx));
210 static int own_thread_p PROTO((rtx, rtx, int));
211 static void update_block PROTO((rtx, rtx));
212 static int reorg_redirect_jump PROTO((rtx, rtx));
213 static void update_reg_dead_notes PROTO((rtx, rtx));
214 static void fix_reg_dead_note PROTO((rtx, rtx));
215 static void update_reg_unused_notes PROTO((rtx, rtx));
216 static void fill_simple_delay_slots PROTO((int));
217 static rtx fill_slots_from_thread PROTO((rtx, rtx, rtx, rtx, int, int,
218 int, int, int *, rtx));
219 static void fill_eager_delay_slots PROTO((void));
220 static void relax_delay_slots PROTO((rtx));
221 static void make_return_insns PROTO((rtx));
223 /* Return TRUE if this insn should stop the search for insn to fill delay
224 slots. LABELS_P indicates that labels should terminate the search.
225 In all cases, jumps terminate the search. */
227 static int
228 stop_search_p (insn, labels_p)
229 rtx insn;
230 int labels_p;
232 if (insn == 0)
233 return 1;
235 switch (GET_CODE (insn))
237 case NOTE:
238 case CALL_INSN:
239 return 0;
241 case CODE_LABEL:
242 return labels_p;
244 case JUMP_INSN:
245 case BARRIER:
246 return 1;
248 case INSN:
249 /* OK unless it contains a delay slot or is an `asm' insn of some type.
250 We don't know anything about these. */
251 return (GET_CODE (PATTERN (insn)) == SEQUENCE
252 || GET_CODE (PATTERN (insn)) == ASM_INPUT
253 || asm_noperands (PATTERN (insn)) >= 0);
255 default:
256 abort ();
260 /* Return TRUE if any resources are marked in both RES1 and RES2 or if either
261 resource set contains a volatile memory reference. Otherwise, return FALSE. */
263 static int
264 resource_conflicts_p (res1, res2)
265 struct resources *res1, *res2;
267 if ((res1->cc && res2->cc) || (res1->memory && res2->memory)
268 || (res1->unch_memory && res2->unch_memory)
269 || res1->volatil || res2->volatil)
270 return 1;
272 #ifdef HARD_REG_SET
273 return (res1->regs & res2->regs) != HARD_CONST (0);
274 #else
276 int i;
278 for (i = 0; i < HARD_REG_SET_LONGS; i++)
279 if ((res1->regs[i] & res2->regs[i]) != 0)
280 return 1;
281 return 0;
283 #endif
286 /* Return TRUE if any resource marked in RES, a `struct resources', is
287 referenced by INSN. If INCLUDE_DELAYED_EFFECTS is set, return if the called
288 routine is using those resources.
290 We compute this by computing all the resources referenced by INSN and
291 seeing if this conflicts with RES. It might be faster to directly check
292 ourselves, and this is the way it used to work, but it means duplicating
293 a large block of complex code. */
295 static int
296 insn_references_resource_p (insn, res, include_delayed_effects)
297 register rtx insn;
298 register struct resources *res;
299 int include_delayed_effects;
301 struct resources insn_res;
303 CLEAR_RESOURCE (&insn_res);
304 mark_referenced_resources (insn, &insn_res, include_delayed_effects);
305 return resource_conflicts_p (&insn_res, res);
308 /* Return TRUE if INSN modifies resources that are marked in RES.
309 INCLUDE_DELAYED_EFFECTS is set if the actions of that routine should be
310 included. CC0 is only modified if it is explicitly set; see comments
311 in front of mark_set_resources for details. */
313 static int
314 insn_sets_resource_p (insn, res, include_delayed_effects)
315 register rtx insn;
316 register struct resources *res;
317 int include_delayed_effects;
319 struct resources insn_sets;
321 CLEAR_RESOURCE (&insn_sets);
322 mark_set_resources (insn, &insn_sets, 0, include_delayed_effects);
323 return resource_conflicts_p (&insn_sets, res);
326 /* Find a label at the end of the function or before a RETURN. If there is
327 none, make one. */
329 static rtx
330 find_end_label ()
332 rtx insn;
334 /* If we found one previously, return it. */
335 if (end_of_function_label)
336 return end_of_function_label;
338 /* Otherwise, see if there is a label at the end of the function. If there
339 is, it must be that RETURN insns aren't needed, so that is our return
340 label and we don't have to do anything else. */
342 insn = get_last_insn ();
343 while (GET_CODE (insn) == NOTE
344 || (GET_CODE (insn) == INSN
345 && (GET_CODE (PATTERN (insn)) == USE
346 || GET_CODE (PATTERN (insn)) == CLOBBER)))
347 insn = PREV_INSN (insn);
349 /* When a target threads its epilogue we might already have a
350 suitable return insn. If so put a label before it for the
351 end_of_function_label. */
352 if (GET_CODE (insn) == BARRIER
353 && GET_CODE (PREV_INSN (insn)) == JUMP_INSN
354 && GET_CODE (PATTERN (PREV_INSN (insn))) == RETURN)
356 rtx temp = PREV_INSN (PREV_INSN (insn));
357 end_of_function_label = gen_label_rtx ();
358 LABEL_NUSES (end_of_function_label) = 0;
360 /* Put the label before an USE insns that may proceed the RETURN insn. */
361 while (GET_CODE (temp) == USE)
362 temp = PREV_INSN (temp);
364 emit_label_after (end_of_function_label, temp);
367 else if (GET_CODE (insn) == CODE_LABEL)
368 end_of_function_label = insn;
369 else
371 /* Otherwise, make a new label and emit a RETURN and BARRIER,
372 if needed. */
373 end_of_function_label = gen_label_rtx ();
374 LABEL_NUSES (end_of_function_label) = 0;
375 emit_label (end_of_function_label);
376 #ifdef HAVE_return
377 if (HAVE_return)
379 /* The return we make may have delay slots too. */
380 rtx insn = gen_return ();
381 insn = emit_jump_insn (insn);
382 emit_barrier ();
383 if (num_delay_slots (insn) > 0)
384 obstack_ptr_grow (&unfilled_slots_obstack, insn);
386 #endif
389 /* Show one additional use for this label so it won't go away until
390 we are done. */
391 ++LABEL_NUSES (end_of_function_label);
393 return end_of_function_label;
396 /* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace
397 the pattern of INSN with the SEQUENCE.
399 Chain the insns so that NEXT_INSN of each insn in the sequence points to
400 the next and NEXT_INSN of the last insn in the sequence points to
401 the first insn after the sequence. Similarly for PREV_INSN. This makes
402 it easier to scan all insns.
404 Returns the SEQUENCE that replaces INSN. */
406 static rtx
407 emit_delay_sequence (insn, list, length)
408 rtx insn;
409 rtx list;
410 int length;
412 register int i = 1;
413 register rtx li;
414 int had_barrier = 0;
416 /* Allocate the rtvec to hold the insns and the SEQUENCE. */
417 rtvec seqv = rtvec_alloc (length + 1);
418 rtx seq = gen_rtx_SEQUENCE (VOIDmode, seqv);
419 rtx seq_insn = make_insn_raw (seq);
420 rtx first = get_insns ();
421 rtx last = get_last_insn ();
423 /* Make a copy of the insn having delay slots. */
424 rtx delay_insn = copy_rtx (insn);
426 /* If INSN is followed by a BARRIER, delete the BARRIER since it will only
427 confuse further processing. Update LAST in case it was the last insn.
428 We will put the BARRIER back in later. */
429 if (NEXT_INSN (insn) && GET_CODE (NEXT_INSN (insn)) == BARRIER)
431 delete_insn (NEXT_INSN (insn));
432 last = get_last_insn ();
433 had_barrier = 1;
436 /* Splice our SEQUENCE into the insn stream where INSN used to be. */
437 NEXT_INSN (seq_insn) = NEXT_INSN (insn);
438 PREV_INSN (seq_insn) = PREV_INSN (insn);
440 if (insn != last)
441 PREV_INSN (NEXT_INSN (seq_insn)) = seq_insn;
443 if (insn != first)
444 NEXT_INSN (PREV_INSN (seq_insn)) = seq_insn;
446 /* Note the calls to set_new_first_and_last_insn must occur after
447 SEQ_INSN has been completely spliced into the insn stream.
449 Otherwise CUR_INSN_UID will get set to an incorrect value because
450 set_new_first_and_last_insn will not find SEQ_INSN in the chain. */
451 if (insn == last)
452 set_new_first_and_last_insn (first, seq_insn);
454 if (insn == first)
455 set_new_first_and_last_insn (seq_insn, last);
457 /* Build our SEQUENCE and rebuild the insn chain. */
458 XVECEXP (seq, 0, 0) = delay_insn;
459 INSN_DELETED_P (delay_insn) = 0;
460 PREV_INSN (delay_insn) = PREV_INSN (seq_insn);
462 for (li = list; li; li = XEXP (li, 1), i++)
464 rtx tem = XEXP (li, 0);
465 rtx note;
467 /* Show that this copy of the insn isn't deleted. */
468 INSN_DELETED_P (tem) = 0;
470 XVECEXP (seq, 0, i) = tem;
471 PREV_INSN (tem) = XVECEXP (seq, 0, i - 1);
472 NEXT_INSN (XVECEXP (seq, 0, i - 1)) = tem;
474 /* Remove any REG_DEAD notes because we can't rely on them now
475 that the insn has been moved. */
476 for (note = REG_NOTES (tem); note; note = XEXP (note, 1))
477 if (REG_NOTE_KIND (note) == REG_DEAD)
478 XEXP (note, 0) = const0_rtx;
481 NEXT_INSN (XVECEXP (seq, 0, length)) = NEXT_INSN (seq_insn);
483 /* If the previous insn is a SEQUENCE, update the NEXT_INSN pointer on the
484 last insn in that SEQUENCE to point to us. Similarly for the first
485 insn in the following insn if it is a SEQUENCE. */
487 if (PREV_INSN (seq_insn) && GET_CODE (PREV_INSN (seq_insn)) == INSN
488 && GET_CODE (PATTERN (PREV_INSN (seq_insn))) == SEQUENCE)
489 NEXT_INSN (XVECEXP (PATTERN (PREV_INSN (seq_insn)), 0,
490 XVECLEN (PATTERN (PREV_INSN (seq_insn)), 0) - 1))
491 = seq_insn;
493 if (NEXT_INSN (seq_insn) && GET_CODE (NEXT_INSN (seq_insn)) == INSN
494 && GET_CODE (PATTERN (NEXT_INSN (seq_insn))) == SEQUENCE)
495 PREV_INSN (XVECEXP (PATTERN (NEXT_INSN (seq_insn)), 0, 0)) = seq_insn;
497 /* If there used to be a BARRIER, put it back. */
498 if (had_barrier)
499 emit_barrier_after (seq_insn);
501 if (i != length + 1)
502 abort ();
504 return seq_insn;
507 /* Add INSN to DELAY_LIST and return the head of the new list. The list must
508 be in the order in which the insns are to be executed. */
510 static rtx
511 add_to_delay_list (insn, delay_list)
512 rtx insn;
513 rtx delay_list;
515 /* If we have an empty list, just make a new list element. If
516 INSN has its block number recorded, clear it since we may
517 be moving the insn to a new block. */
519 if (delay_list == 0)
521 clear_hashed_info_for_insn (insn);
522 return gen_rtx_INSN_LIST (VOIDmode, insn, NULL_RTX);
525 /* Otherwise this must be an INSN_LIST. Add INSN to the end of the
526 list. */
527 XEXP (delay_list, 1) = add_to_delay_list (insn, XEXP (delay_list, 1));
529 return delay_list;
532 /* Delete INSN from the delay slot of the insn that it is in, which may
533 produce an insn with no delay slots. Return the new insn. */
535 static rtx
536 delete_from_delay_slot (insn)
537 rtx insn;
539 rtx trial, seq_insn, seq, prev;
540 rtx delay_list = 0;
541 int i;
543 /* We first must find the insn containing the SEQUENCE with INSN in its
544 delay slot. Do this by finding an insn, TRIAL, where
545 PREV_INSN (NEXT_INSN (TRIAL)) != TRIAL. */
547 for (trial = insn;
548 PREV_INSN (NEXT_INSN (trial)) == trial;
549 trial = NEXT_INSN (trial))
552 seq_insn = PREV_INSN (NEXT_INSN (trial));
553 seq = PATTERN (seq_insn);
555 /* Create a delay list consisting of all the insns other than the one
556 we are deleting (unless we were the only one). */
557 if (XVECLEN (seq, 0) > 2)
558 for (i = 1; i < XVECLEN (seq, 0); i++)
559 if (XVECEXP (seq, 0, i) != insn)
560 delay_list = add_to_delay_list (XVECEXP (seq, 0, i), delay_list);
562 /* Delete the old SEQUENCE, re-emit the insn that used to have the delay
563 list, and rebuild the delay list if non-empty. */
564 prev = PREV_INSN (seq_insn);
565 trial = XVECEXP (seq, 0, 0);
566 delete_insn (seq_insn);
567 add_insn_after (trial, prev);
569 if (GET_CODE (trial) == JUMP_INSN
570 && (simplejump_p (trial) || GET_CODE (PATTERN (trial)) == RETURN))
571 emit_barrier_after (trial);
573 /* If there are any delay insns, remit them. Otherwise clear the
574 annul flag. */
575 if (delay_list)
576 trial = emit_delay_sequence (trial, delay_list, XVECLEN (seq, 0) - 2);
577 else
578 INSN_ANNULLED_BRANCH_P (trial) = 0;
580 INSN_FROM_TARGET_P (insn) = 0;
582 /* Show we need to fill this insn again. */
583 obstack_ptr_grow (&unfilled_slots_obstack, trial);
585 return trial;
588 /* Delete INSN, a JUMP_INSN. If it is a conditional jump, we must track down
589 the insn that sets CC0 for it and delete it too. */
591 static void
592 delete_scheduled_jump (insn)
593 rtx insn;
595 /* Delete the insn that sets cc0 for us. On machines without cc0, we could
596 delete the insn that sets the condition code, but it is hard to find it.
597 Since this case is rare anyway, don't bother trying; there would likely
598 be other insns that became dead anyway, which we wouldn't know to
599 delete. */
601 #ifdef HAVE_cc0
602 if (reg_mentioned_p (cc0_rtx, insn))
604 rtx note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
606 /* If a reg-note was found, it points to an insn to set CC0. This
607 insn is in the delay list of some other insn. So delete it from
608 the delay list it was in. */
609 if (note)
611 if (! FIND_REG_INC_NOTE (XEXP (note, 0), NULL_RTX)
612 && sets_cc0_p (PATTERN (XEXP (note, 0))) == 1)
613 delete_from_delay_slot (XEXP (note, 0));
615 else
617 /* The insn setting CC0 is our previous insn, but it may be in
618 a delay slot. It will be the last insn in the delay slot, if
619 it is. */
620 rtx trial = previous_insn (insn);
621 if (GET_CODE (trial) == NOTE)
622 trial = prev_nonnote_insn (trial);
623 if (sets_cc0_p (PATTERN (trial)) != 1
624 || FIND_REG_INC_NOTE (trial, 0))
625 return;
626 if (PREV_INSN (NEXT_INSN (trial)) == trial)
627 delete_insn (trial);
628 else
629 delete_from_delay_slot (trial);
632 #endif
634 delete_insn (insn);
637 /* Counters for delay-slot filling. */
639 #define NUM_REORG_FUNCTIONS 2
640 #define MAX_DELAY_HISTOGRAM 3
641 #define MAX_REORG_PASSES 2
643 static int num_insns_needing_delays[NUM_REORG_FUNCTIONS][MAX_REORG_PASSES];
645 static int num_filled_delays[NUM_REORG_FUNCTIONS][MAX_DELAY_HISTOGRAM+1][MAX_REORG_PASSES];
647 static int reorg_pass_number;
649 static void
650 note_delay_statistics (slots_filled, index)
651 int slots_filled, index;
653 num_insns_needing_delays[index][reorg_pass_number]++;
654 if (slots_filled > MAX_DELAY_HISTOGRAM)
655 slots_filled = MAX_DELAY_HISTOGRAM;
656 num_filled_delays[index][slots_filled][reorg_pass_number]++;
659 #if defined(ANNUL_IFFALSE_SLOTS) || defined(ANNUL_IFTRUE_SLOTS)
661 /* Optimize the following cases:
663 1. When a conditional branch skips over only one instruction,
664 use an annulling branch and put that insn in the delay slot.
665 Use either a branch that annuls when the condition if true or
666 invert the test with a branch that annuls when the condition is
667 false. This saves insns, since otherwise we must copy an insn
668 from the L1 target.
670 (orig) (skip) (otherwise)
671 Bcc.n L1 Bcc',a L1 Bcc,a L1'
672 insn insn insn2
673 L1: L1: L1:
674 insn2 insn2 insn2
675 insn3 insn3 L1':
676 insn3
678 2. When a conditional branch skips over only one instruction,
679 and after that, it unconditionally branches somewhere else,
680 perform the similar optimization. This saves executing the
681 second branch in the case where the inverted condition is true.
683 Bcc.n L1 Bcc',a L2
684 insn insn
685 L1: L1:
686 Bra L2 Bra L2
688 INSN is a JUMP_INSN.
690 This should be expanded to skip over N insns, where N is the number
691 of delay slots required. */
693 static rtx
694 optimize_skip (insn)
695 register rtx insn;
697 register rtx trial = next_nonnote_insn (insn);
698 rtx next_trial = next_active_insn (trial);
699 rtx delay_list = 0;
700 rtx target_label;
701 int flags;
703 flags = get_jump_flags (insn, JUMP_LABEL (insn));
705 if (trial == 0
706 || GET_CODE (trial) != INSN
707 || GET_CODE (PATTERN (trial)) == SEQUENCE
708 || recog_memoized (trial) < 0
709 || (! eligible_for_annul_false (insn, 0, trial, flags)
710 && ! eligible_for_annul_true (insn, 0, trial, flags)))
711 return 0;
713 /* There are two cases where we are just executing one insn (we assume
714 here that a branch requires only one insn; this should be generalized
715 at some point): Where the branch goes around a single insn or where
716 we have one insn followed by a branch to the same label we branch to.
717 In both of these cases, inverting the jump and annulling the delay
718 slot give the same effect in fewer insns. */
719 if ((next_trial == next_active_insn (JUMP_LABEL (insn)))
720 || (next_trial != 0
721 && GET_CODE (next_trial) == JUMP_INSN
722 && JUMP_LABEL (insn) == JUMP_LABEL (next_trial)
723 && (simplejump_p (next_trial)
724 || GET_CODE (PATTERN (next_trial)) == RETURN)))
726 if (eligible_for_annul_false (insn, 0, trial, flags))
728 if (invert_jump (insn, JUMP_LABEL (insn)))
729 INSN_FROM_TARGET_P (trial) = 1;
730 else if (! eligible_for_annul_true (insn, 0, trial, flags))
731 return 0;
734 delay_list = add_to_delay_list (trial, NULL_RTX);
735 next_trial = next_active_insn (trial);
736 update_block (trial, trial);
737 delete_insn (trial);
739 /* Also, if we are targeting an unconditional
740 branch, thread our jump to the target of that branch. Don't
741 change this into a RETURN here, because it may not accept what
742 we have in the delay slot. We'll fix this up later. */
743 if (next_trial && GET_CODE (next_trial) == JUMP_INSN
744 && (simplejump_p (next_trial)
745 || GET_CODE (PATTERN (next_trial)) == RETURN))
747 target_label = JUMP_LABEL (next_trial);
748 if (target_label == 0)
749 target_label = find_end_label ();
751 /* Recompute the flags based on TARGET_LABEL since threading
752 the jump to TARGET_LABEL may change the direction of the
753 jump (which may change the circumstances in which the
754 delay slot is nullified). */
755 flags = get_jump_flags (insn, target_label);
756 if (eligible_for_annul_true (insn, 0, trial, flags))
757 reorg_redirect_jump (insn, target_label);
760 INSN_ANNULLED_BRANCH_P (insn) = 1;
763 return delay_list;
765 #endif
768 /* Encode and return branch direction and prediction information for
769 INSN assuming it will jump to LABEL.
771 Non conditional branches return no direction information and
772 are predicted as very likely taken. */
774 static int
775 get_jump_flags (insn, label)
776 rtx insn, label;
778 int flags;
780 /* get_jump_flags can be passed any insn with delay slots, these may
781 be INSNs, CALL_INSNs, or JUMP_INSNs. Only JUMP_INSNs have branch
782 direction information, and only if they are conditional jumps.
784 If LABEL is zero, then there is no way to determine the branch
785 direction. */
786 if (GET_CODE (insn) == JUMP_INSN
787 && (condjump_p (insn) || condjump_in_parallel_p (insn))
788 && INSN_UID (insn) <= max_uid
789 && label != 0
790 && INSN_UID (label) <= max_uid)
791 flags
792 = (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)])
793 ? ATTR_FLAG_forward : ATTR_FLAG_backward;
794 /* No valid direction information. */
795 else
796 flags = 0;
798 /* If insn is a conditional branch call mostly_true_jump to get
799 determine the branch prediction.
801 Non conditional branches are predicted as very likely taken. */
802 if (GET_CODE (insn) == JUMP_INSN
803 && (condjump_p (insn) || condjump_in_parallel_p (insn)))
805 int prediction;
807 prediction = mostly_true_jump (insn, get_branch_condition (insn, label));
808 switch (prediction)
810 case 2:
811 flags |= (ATTR_FLAG_very_likely | ATTR_FLAG_likely);
812 break;
813 case 1:
814 flags |= ATTR_FLAG_likely;
815 break;
816 case 0:
817 flags |= ATTR_FLAG_unlikely;
818 break;
819 case -1:
820 flags |= (ATTR_FLAG_very_unlikely | ATTR_FLAG_unlikely);
821 break;
823 default:
824 abort();
827 else
828 flags |= (ATTR_FLAG_very_likely | ATTR_FLAG_likely);
830 return flags;
833 /* Return 1 if INSN is a destination that will be branched to rarely (the
834 return point of a function); return 2 if DEST will be branched to very
835 rarely (a call to a function that doesn't return). Otherwise,
836 return 0. */
838 static int
839 rare_destination (insn)
840 rtx insn;
842 int jump_count = 0;
843 rtx next;
845 for (; insn; insn = next)
847 if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
848 insn = XVECEXP (PATTERN (insn), 0, 0);
850 next = NEXT_INSN (insn);
852 switch (GET_CODE (insn))
854 case CODE_LABEL:
855 return 0;
856 case BARRIER:
857 /* A BARRIER can either be after a JUMP_INSN or a CALL_INSN. We
858 don't scan past JUMP_INSNs, so any barrier we find here must
859 have been after a CALL_INSN and hence mean the call doesn't
860 return. */
861 return 2;
862 case JUMP_INSN:
863 if (GET_CODE (PATTERN (insn)) == RETURN)
864 return 1;
865 else if (simplejump_p (insn)
866 && jump_count++ < 10)
867 next = JUMP_LABEL (insn);
868 else
869 return 0;
871 default:
872 break;
876 /* If we got here it means we hit the end of the function. So this
877 is an unlikely destination. */
879 return 1;
882 /* Return truth value of the statement that this branch
883 is mostly taken. If we think that the branch is extremely likely
884 to be taken, we return 2. If the branch is slightly more likely to be
885 taken, return 1. If the branch is slightly less likely to be taken,
886 return 0 and if the branch is highly unlikely to be taken, return -1.
888 CONDITION, if non-zero, is the condition that JUMP_INSN is testing. */
890 static int
891 mostly_true_jump (jump_insn, condition)
892 rtx jump_insn, condition;
894 rtx target_label = JUMP_LABEL (jump_insn);
895 rtx insn;
896 int rare_dest = rare_destination (target_label);
897 int rare_fallthrough = rare_destination (NEXT_INSN (jump_insn));
899 /* If branch probabilities are available, then use that number since it
900 always gives a correct answer. */
901 if (flag_branch_probabilities)
903 rtx note = find_reg_note (jump_insn, REG_BR_PROB, 0);
904 if (note)
906 int prob = XINT (note, 0);
908 if (prob >= REG_BR_PROB_BASE * 9 / 10)
909 return 2;
910 else if (prob >= REG_BR_PROB_BASE / 2)
911 return 1;
912 else if (prob >= REG_BR_PROB_BASE / 10)
913 return 0;
914 else
915 return -1;
919 /* If this is a branch outside a loop, it is highly unlikely. */
920 if (GET_CODE (PATTERN (jump_insn)) == SET
921 && GET_CODE (SET_SRC (PATTERN (jump_insn))) == IF_THEN_ELSE
922 && ((GET_CODE (XEXP (SET_SRC (PATTERN (jump_insn)), 1)) == LABEL_REF
923 && LABEL_OUTSIDE_LOOP_P (XEXP (SET_SRC (PATTERN (jump_insn)), 1)))
924 || (GET_CODE (XEXP (SET_SRC (PATTERN (jump_insn)), 2)) == LABEL_REF
925 && LABEL_OUTSIDE_LOOP_P (XEXP (SET_SRC (PATTERN (jump_insn)), 2)))))
926 return -1;
928 if (target_label)
930 /* If this is the test of a loop, it is very likely true. We scan
931 backwards from the target label. If we find a NOTE_INSN_LOOP_BEG
932 before the next real insn, we assume the branch is to the top of
933 the loop. */
934 for (insn = PREV_INSN (target_label);
935 insn && GET_CODE (insn) == NOTE;
936 insn = PREV_INSN (insn))
937 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
938 return 2;
940 /* If this is a jump to the test of a loop, it is likely true. We scan
941 forwards from the target label. If we find a NOTE_INSN_LOOP_VTOP
942 before the next real insn, we assume the branch is to the loop branch
943 test. */
944 for (insn = NEXT_INSN (target_label);
945 insn && GET_CODE (insn) == NOTE;
946 insn = PREV_INSN (insn))
947 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_VTOP)
948 return 1;
951 /* Look at the relative rarities of the fallthrough and destination. If
952 they differ, we can predict the branch that way. */
954 switch (rare_fallthrough - rare_dest)
956 case -2:
957 return -1;
958 case -1:
959 return 0;
960 case 0:
961 break;
962 case 1:
963 return 1;
964 case 2:
965 return 2;
968 /* If we couldn't figure out what this jump was, assume it won't be
969 taken. This should be rare. */
970 if (condition == 0)
971 return 0;
973 /* EQ tests are usually false and NE tests are usually true. Also,
974 most quantities are positive, so we can make the appropriate guesses
975 about signed comparisons against zero. */
976 switch (GET_CODE (condition))
978 case CONST_INT:
979 /* Unconditional branch. */
980 return 1;
981 case EQ:
982 return 0;
983 case NE:
984 return 1;
985 case LE:
986 case LT:
987 if (XEXP (condition, 1) == const0_rtx)
988 return 0;
989 break;
990 case GE:
991 case GT:
992 if (XEXP (condition, 1) == const0_rtx)
993 return 1;
994 break;
996 default:
997 break;
1000 /* Predict backward branches usually take, forward branches usually not. If
1001 we don't know whether this is forward or backward, assume the branch
1002 will be taken, since most are. */
1003 return (target_label == 0 || INSN_UID (jump_insn) > max_uid
1004 || INSN_UID (target_label) > max_uid
1005 || (uid_to_ruid[INSN_UID (jump_insn)]
1006 > uid_to_ruid[INSN_UID (target_label)]));;
1009 /* Return the condition under which INSN will branch to TARGET. If TARGET
1010 is zero, return the condition under which INSN will return. If INSN is
1011 an unconditional branch, return const_true_rtx. If INSN isn't a simple
1012 type of jump, or it doesn't go to TARGET, return 0. */
1014 static rtx
1015 get_branch_condition (insn, target)
1016 rtx insn;
1017 rtx target;
1019 rtx pat = PATTERN (insn);
1020 rtx src;
1022 if (condjump_in_parallel_p (insn))
1023 pat = XVECEXP (pat, 0, 0);
1025 if (GET_CODE (pat) == RETURN)
1026 return target == 0 ? const_true_rtx : 0;
1028 else if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
1029 return 0;
1031 src = SET_SRC (pat);
1032 if (GET_CODE (src) == LABEL_REF && XEXP (src, 0) == target)
1033 return const_true_rtx;
1035 else if (GET_CODE (src) == IF_THEN_ELSE
1036 && ((target == 0 && GET_CODE (XEXP (src, 1)) == RETURN)
1037 || (GET_CODE (XEXP (src, 1)) == LABEL_REF
1038 && XEXP (XEXP (src, 1), 0) == target))
1039 && XEXP (src, 2) == pc_rtx)
1040 return XEXP (src, 0);
1042 else if (GET_CODE (src) == IF_THEN_ELSE
1043 && ((target == 0 && GET_CODE (XEXP (src, 2)) == RETURN)
1044 || (GET_CODE (XEXP (src, 2)) == LABEL_REF
1045 && XEXP (XEXP (src, 2), 0) == target))
1046 && XEXP (src, 1) == pc_rtx)
1047 return gen_rtx_fmt_ee (reverse_condition (GET_CODE (XEXP (src, 0))),
1048 GET_MODE (XEXP (src, 0)),
1049 XEXP (XEXP (src, 0), 0), XEXP (XEXP (src, 0), 1));
1051 return 0;
1054 /* Return non-zero if CONDITION is more strict than the condition of
1055 INSN, i.e., if INSN will always branch if CONDITION is true. */
1057 static int
1058 condition_dominates_p (condition, insn)
1059 rtx condition;
1060 rtx insn;
1062 rtx other_condition = get_branch_condition (insn, JUMP_LABEL (insn));
1063 enum rtx_code code = GET_CODE (condition);
1064 enum rtx_code other_code;
1066 if (rtx_equal_p (condition, other_condition)
1067 || other_condition == const_true_rtx)
1068 return 1;
1070 else if (condition == const_true_rtx || other_condition == 0)
1071 return 0;
1073 other_code = GET_CODE (other_condition);
1074 if (GET_RTX_LENGTH (code) != 2 || GET_RTX_LENGTH (other_code) != 2
1075 || ! rtx_equal_p (XEXP (condition, 0), XEXP (other_condition, 0))
1076 || ! rtx_equal_p (XEXP (condition, 1), XEXP (other_condition, 1)))
1077 return 0;
1079 return comparison_dominates_p (code, other_code);
1082 /* Return non-zero if redirecting JUMP to NEWLABEL does not invalidate
1083 any insns already in the delay slot of JUMP. */
1085 static int
1086 redirect_with_delay_slots_safe_p (jump, newlabel, seq)
1087 rtx jump, newlabel, seq;
1089 int flags, i;
1090 rtx pat = PATTERN (seq);
1092 /* Make sure all the delay slots of this jump would still
1093 be valid after threading the jump. If they are still
1094 valid, then return non-zero. */
1096 flags = get_jump_flags (jump, newlabel);
1097 for (i = 1; i < XVECLEN (pat, 0); i++)
1098 if (! (
1099 #ifdef ANNUL_IFFALSE_SLOTS
1100 (INSN_ANNULLED_BRANCH_P (jump)
1101 && INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
1102 ? eligible_for_annul_false (jump, i - 1,
1103 XVECEXP (pat, 0, i), flags) :
1104 #endif
1105 #ifdef ANNUL_IFTRUE_SLOTS
1106 (INSN_ANNULLED_BRANCH_P (jump)
1107 && ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
1108 ? eligible_for_annul_true (jump, i - 1,
1109 XVECEXP (pat, 0, i), flags) :
1110 #endif
1111 eligible_for_delay (jump, i -1, XVECEXP (pat, 0, i), flags)))
1112 break;
1114 return (i == XVECLEN (pat, 0));
1117 /* Return non-zero if redirecting JUMP to NEWLABEL does not invalidate
1118 any insns we wish to place in the delay slot of JUMP. */
1120 static int
1121 redirect_with_delay_list_safe_p (jump, newlabel, delay_list)
1122 rtx jump, newlabel, delay_list;
1124 int flags, i;
1125 rtx li;
1127 /* Make sure all the insns in DELAY_LIST would still be
1128 valid after threading the jump. If they are still
1129 valid, then return non-zero. */
1131 flags = get_jump_flags (jump, newlabel);
1132 for (li = delay_list, i = 0; li; li = XEXP (li, 1), i++)
1133 if (! (
1134 #ifdef ANNUL_IFFALSE_SLOTS
1135 (INSN_ANNULLED_BRANCH_P (jump)
1136 && INSN_FROM_TARGET_P (XEXP (li, 0)))
1137 ? eligible_for_annul_false (jump, i, XEXP (li, 0), flags) :
1138 #endif
1139 #ifdef ANNUL_IFTRUE_SLOTS
1140 (INSN_ANNULLED_BRANCH_P (jump)
1141 && ! INSN_FROM_TARGET_P (XEXP (li, 0)))
1142 ? eligible_for_annul_true (jump, i, XEXP (li, 0), flags) :
1143 #endif
1144 eligible_for_delay (jump, i, XEXP (li, 0), flags)))
1145 break;
1147 return (li == NULL);
1150 /* DELAY_LIST is a list of insns that have already been placed into delay
1151 slots. See if all of them have the same annulling status as ANNUL_TRUE_P.
1152 If not, return 0; otherwise return 1. */
1154 static int
1155 check_annul_list_true_false (annul_true_p, delay_list)
1156 int annul_true_p;
1157 rtx delay_list;
1159 rtx temp;
1161 if (delay_list)
1163 for (temp = delay_list; temp; temp = XEXP (temp, 1))
1165 rtx trial = XEXP (temp, 0);
1167 if ((annul_true_p && INSN_FROM_TARGET_P (trial))
1168 || (!annul_true_p && !INSN_FROM_TARGET_P (trial)))
1169 return 0;
1173 return 1;
1177 /* INSN branches to an insn whose pattern SEQ is a SEQUENCE. Given that
1178 the condition tested by INSN is CONDITION and the resources shown in
1179 OTHER_NEEDED are needed after INSN, see whether INSN can take all the insns
1180 from SEQ's delay list, in addition to whatever insns it may execute
1181 (in DELAY_LIST). SETS and NEEDED are denote resources already set and
1182 needed while searching for delay slot insns. Return the concatenated
1183 delay list if possible, otherwise, return 0.
1185 SLOTS_TO_FILL is the total number of slots required by INSN, and
1186 PSLOTS_FILLED points to the number filled so far (also the number of
1187 insns in DELAY_LIST). It is updated with the number that have been
1188 filled from the SEQUENCE, if any.
1190 PANNUL_P points to a non-zero value if we already know that we need
1191 to annul INSN. If this routine determines that annulling is needed,
1192 it may set that value non-zero.
1194 PNEW_THREAD points to a location that is to receive the place at which
1195 execution should continue. */
1197 static rtx
1198 steal_delay_list_from_target (insn, condition, seq, delay_list,
1199 sets, needed, other_needed,
1200 slots_to_fill, pslots_filled, pannul_p,
1201 pnew_thread)
1202 rtx insn, condition;
1203 rtx seq;
1204 rtx delay_list;
1205 struct resources *sets, *needed, *other_needed;
1206 int slots_to_fill;
1207 int *pslots_filled;
1208 int *pannul_p;
1209 rtx *pnew_thread;
1211 rtx temp;
1212 int slots_remaining = slots_to_fill - *pslots_filled;
1213 int total_slots_filled = *pslots_filled;
1214 rtx new_delay_list = 0;
1215 int must_annul = *pannul_p;
1216 int used_annul = 0;
1217 int i;
1218 struct resources cc_set;
1220 /* We can't do anything if there are more delay slots in SEQ than we
1221 can handle, or if we don't know that it will be a taken branch.
1222 We know that it will be a taken branch if it is either an unconditional
1223 branch or a conditional branch with a stricter branch condition.
1225 Also, exit if the branch has more than one set, since then it is computing
1226 other results that can't be ignored, e.g. the HPPA mov&branch instruction.
1227 ??? It may be possible to move other sets into INSN in addition to
1228 moving the instructions in the delay slots.
1230 We can not steal the delay list if one of the instructions in the
1231 current delay_list modifies the condition codes and the jump in the
1232 sequence is a conditional jump. We can not do this because we can
1233 not change the direction of the jump because the condition codes
1234 will effect the direction of the jump in the sequence. */
1236 CLEAR_RESOURCE (&cc_set);
1237 for (temp = delay_list; temp; temp = XEXP (temp, 1))
1239 rtx trial = XEXP (temp, 0);
1241 mark_set_resources (trial, &cc_set, 0, 1);
1242 if (insn_references_resource_p (XVECEXP (seq , 0, 0), &cc_set, 0))
1243 return delay_list;
1246 if (XVECLEN (seq, 0) - 1 > slots_remaining
1247 || ! condition_dominates_p (condition, XVECEXP (seq, 0, 0))
1248 || ! single_set (XVECEXP (seq, 0, 0)))
1249 return delay_list;
1251 for (i = 1; i < XVECLEN (seq, 0); i++)
1253 rtx trial = XVECEXP (seq, 0, i);
1254 int flags;
1256 if (insn_references_resource_p (trial, sets, 0)
1257 || insn_sets_resource_p (trial, needed, 0)
1258 || insn_sets_resource_p (trial, sets, 0)
1259 #ifdef HAVE_cc0
1260 /* If TRIAL sets CC0, we can't copy it, so we can't steal this
1261 delay list. */
1262 || find_reg_note (trial, REG_CC_USER, NULL_RTX)
1263 #endif
1264 /* If TRIAL is from the fallthrough code of an annulled branch insn
1265 in SEQ, we cannot use it. */
1266 || (INSN_ANNULLED_BRANCH_P (XVECEXP (seq, 0, 0))
1267 && ! INSN_FROM_TARGET_P (trial)))
1268 return delay_list;
1270 /* If this insn was already done (usually in a previous delay slot),
1271 pretend we put it in our delay slot. */
1272 if (redundant_insn (trial, insn, new_delay_list))
1273 continue;
1275 /* We will end up re-vectoring this branch, so compute flags
1276 based on jumping to the new label. */
1277 flags = get_jump_flags (insn, JUMP_LABEL (XVECEXP (seq, 0, 0)));
1279 if (! must_annul
1280 && ((condition == const_true_rtx
1281 || (! insn_sets_resource_p (trial, other_needed, 0)
1282 && ! may_trap_p (PATTERN (trial)))))
1283 ? eligible_for_delay (insn, total_slots_filled, trial, flags)
1284 : (must_annul || (delay_list == NULL && new_delay_list == NULL))
1285 && (must_annul = 1,
1286 check_annul_list_true_false (0, delay_list)
1287 && check_annul_list_true_false (0, new_delay_list)
1288 && eligible_for_annul_false (insn, total_slots_filled,
1289 trial, flags)))
1291 if (must_annul)
1292 used_annul = 1;
1293 temp = copy_rtx (trial);
1294 INSN_FROM_TARGET_P (temp) = 1;
1295 new_delay_list = add_to_delay_list (temp, new_delay_list);
1296 total_slots_filled++;
1298 if (--slots_remaining == 0)
1299 break;
1301 else
1302 return delay_list;
1305 /* Show the place to which we will be branching. */
1306 *pnew_thread = next_active_insn (JUMP_LABEL (XVECEXP (seq, 0, 0)));
1308 /* Add any new insns to the delay list and update the count of the
1309 number of slots filled. */
1310 *pslots_filled = total_slots_filled;
1311 if (used_annul)
1312 *pannul_p = 1;
1314 if (delay_list == 0)
1315 return new_delay_list;
1317 for (temp = new_delay_list; temp; temp = XEXP (temp, 1))
1318 delay_list = add_to_delay_list (XEXP (temp, 0), delay_list);
1320 return delay_list;
1323 /* Similar to steal_delay_list_from_target except that SEQ is on the
1324 fallthrough path of INSN. Here we only do something if the delay insn
1325 of SEQ is an unconditional branch. In that case we steal its delay slot
1326 for INSN since unconditional branches are much easier to fill. */
1328 static rtx
1329 steal_delay_list_from_fallthrough (insn, condition, seq,
1330 delay_list, sets, needed, other_needed,
1331 slots_to_fill, pslots_filled, pannul_p)
1332 rtx insn, condition;
1333 rtx seq;
1334 rtx delay_list;
1335 struct resources *sets, *needed, *other_needed;
1336 int slots_to_fill;
1337 int *pslots_filled;
1338 int *pannul_p;
1340 int i;
1341 int flags;
1342 int must_annul = *pannul_p;
1343 int used_annul = 0;
1345 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1347 /* We can't do anything if SEQ's delay insn isn't an
1348 unconditional branch. */
1350 if (! simplejump_p (XVECEXP (seq, 0, 0))
1351 && GET_CODE (PATTERN (XVECEXP (seq, 0, 0))) != RETURN)
1352 return delay_list;
1354 for (i = 1; i < XVECLEN (seq, 0); i++)
1356 rtx trial = XVECEXP (seq, 0, i);
1358 /* If TRIAL sets CC0, stealing it will move it too far from the use
1359 of CC0. */
1360 if (insn_references_resource_p (trial, sets, 0)
1361 || insn_sets_resource_p (trial, needed, 0)
1362 || insn_sets_resource_p (trial, sets, 0)
1363 #ifdef HAVE_cc0
1364 || sets_cc0_p (PATTERN (trial))
1365 #endif
1368 break;
1370 /* If this insn was already done, we don't need it. */
1371 if (redundant_insn (trial, insn, delay_list))
1373 delete_from_delay_slot (trial);
1374 continue;
1377 if (! must_annul
1378 && ((condition == const_true_rtx
1379 || (! insn_sets_resource_p (trial, other_needed, 0)
1380 && ! may_trap_p (PATTERN (trial)))))
1381 ? eligible_for_delay (insn, *pslots_filled, trial, flags)
1382 : (must_annul || delay_list == NULL) && (must_annul = 1,
1383 check_annul_list_true_false (1, delay_list)
1384 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
1386 if (must_annul)
1387 used_annul = 1;
1388 delete_from_delay_slot (trial);
1389 delay_list = add_to_delay_list (trial, delay_list);
1391 if (++(*pslots_filled) == slots_to_fill)
1392 break;
1394 else
1395 break;
1398 if (used_annul)
1399 *pannul_p = 1;
1400 return delay_list;
1404 /* Try merging insns starting at THREAD which match exactly the insns in
1405 INSN's delay list.
1407 If all insns were matched and the insn was previously annulling, the
1408 annul bit will be cleared.
1410 For each insn that is merged, if the branch is or will be non-annulling,
1411 we delete the merged insn. */
1413 static void
1414 try_merge_delay_insns (insn, thread)
1415 rtx insn, thread;
1417 rtx trial, next_trial;
1418 rtx delay_insn = XVECEXP (PATTERN (insn), 0, 0);
1419 int annul_p = INSN_ANNULLED_BRANCH_P (delay_insn);
1420 int slot_number = 1;
1421 int num_slots = XVECLEN (PATTERN (insn), 0);
1422 rtx next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1423 struct resources set, needed;
1424 rtx merged_insns = 0;
1425 int i;
1426 int flags;
1428 flags = get_jump_flags (delay_insn, JUMP_LABEL (delay_insn));
1430 CLEAR_RESOURCE (&needed);
1431 CLEAR_RESOURCE (&set);
1433 /* If this is not an annulling branch, take into account anything needed in
1434 INSN's delay slot. This prevents two increments from being incorrectly
1435 folded into one. If we are annulling, this would be the correct
1436 thing to do. (The alternative, looking at things set in NEXT_TO_MATCH
1437 will essentially disable this optimization. This method is somewhat of
1438 a kludge, but I don't see a better way.) */
1439 if (! annul_p)
1440 for (i = 1 ; i < num_slots ; i++)
1441 if (XVECEXP (PATTERN (insn), 0, i))
1442 mark_referenced_resources (XVECEXP (PATTERN (insn), 0, i), &needed, 1);
1444 for (trial = thread; !stop_search_p (trial, 1); trial = next_trial)
1446 rtx pat = PATTERN (trial);
1447 rtx oldtrial = trial;
1449 next_trial = next_nonnote_insn (trial);
1451 /* TRIAL must be a CALL_INSN or INSN. Skip USE and CLOBBER. */
1452 if (GET_CODE (trial) == INSN
1453 && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER))
1454 continue;
1456 if (GET_CODE (next_to_match) == GET_CODE (trial)
1457 #ifdef HAVE_cc0
1458 /* We can't share an insn that sets cc0. */
1459 && ! sets_cc0_p (pat)
1460 #endif
1461 && ! insn_references_resource_p (trial, &set, 1)
1462 && ! insn_sets_resource_p (trial, &set, 1)
1463 && ! insn_sets_resource_p (trial, &needed, 1)
1464 && (trial = try_split (pat, trial, 0)) != 0
1465 /* Update next_trial, in case try_split succeeded. */
1466 && (next_trial = next_nonnote_insn (trial))
1467 /* Likewise THREAD. */
1468 && (thread = oldtrial == thread ? trial : thread)
1469 && rtx_equal_p (PATTERN (next_to_match), PATTERN (trial))
1470 /* Have to test this condition if annul condition is different
1471 from (and less restrictive than) non-annulling one. */
1472 && eligible_for_delay (delay_insn, slot_number - 1, trial, flags))
1475 if (! annul_p)
1477 update_block (trial, thread);
1478 if (trial == thread)
1479 thread = next_active_insn (thread);
1481 delete_insn (trial);
1482 INSN_FROM_TARGET_P (next_to_match) = 0;
1484 else
1485 merged_insns = gen_rtx_INSN_LIST (VOIDmode, trial, merged_insns);
1487 if (++slot_number == num_slots)
1488 break;
1490 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1493 mark_set_resources (trial, &set, 0, 1);
1494 mark_referenced_resources (trial, &needed, 1);
1497 /* See if we stopped on a filled insn. If we did, try to see if its
1498 delay slots match. */
1499 if (slot_number != num_slots
1500 && trial && GET_CODE (trial) == INSN
1501 && GET_CODE (PATTERN (trial)) == SEQUENCE
1502 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (PATTERN (trial), 0, 0)))
1504 rtx pat = PATTERN (trial);
1505 rtx filled_insn = XVECEXP (pat, 0, 0);
1507 /* Account for resources set/needed by the filled insn. */
1508 mark_set_resources (filled_insn, &set, 0, 1);
1509 mark_referenced_resources (filled_insn, &needed, 1);
1511 for (i = 1; i < XVECLEN (pat, 0); i++)
1513 rtx dtrial = XVECEXP (pat, 0, i);
1515 if (! insn_references_resource_p (dtrial, &set, 1)
1516 && ! insn_sets_resource_p (dtrial, &set, 1)
1517 && ! insn_sets_resource_p (dtrial, &needed, 1)
1518 #ifdef HAVE_cc0
1519 && ! sets_cc0_p (PATTERN (dtrial))
1520 #endif
1521 && rtx_equal_p (PATTERN (next_to_match), PATTERN (dtrial))
1522 && eligible_for_delay (delay_insn, slot_number - 1, dtrial, flags))
1524 if (! annul_p)
1526 rtx new;
1528 update_block (dtrial, thread);
1529 new = delete_from_delay_slot (dtrial);
1530 if (INSN_DELETED_P (thread))
1531 thread = new;
1532 INSN_FROM_TARGET_P (next_to_match) = 0;
1534 else
1535 merged_insns = gen_rtx_INSN_LIST (SImode, dtrial,
1536 merged_insns);
1538 if (++slot_number == num_slots)
1539 break;
1541 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1543 else
1545 /* Keep track of the set/referenced resources for the delay
1546 slots of any trial insns we encounter. */
1547 mark_set_resources (dtrial, &set, 0, 1);
1548 mark_referenced_resources (dtrial, &needed, 1);
1553 /* If all insns in the delay slot have been matched and we were previously
1554 annulling the branch, we need not any more. In that case delete all the
1555 merged insns. Also clear the INSN_FROM_TARGET_P bit of each insn in
1556 the delay list so that we know that it isn't only being used at the
1557 target. */
1558 if (slot_number == num_slots && annul_p)
1560 for (; merged_insns; merged_insns = XEXP (merged_insns, 1))
1562 if (GET_MODE (merged_insns) == SImode)
1564 rtx new;
1566 update_block (XEXP (merged_insns, 0), thread);
1567 new = delete_from_delay_slot (XEXP (merged_insns, 0));
1568 if (INSN_DELETED_P (thread))
1569 thread = new;
1571 else
1573 update_block (XEXP (merged_insns, 0), thread);
1574 delete_insn (XEXP (merged_insns, 0));
1578 INSN_ANNULLED_BRANCH_P (delay_insn) = 0;
1580 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1581 INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i)) = 0;
1585 /* See if INSN is redundant with an insn in front of TARGET. Often this
1586 is called when INSN is a candidate for a delay slot of TARGET.
1587 DELAY_LIST are insns that will be placed in delay slots of TARGET in front
1588 of INSN. Often INSN will be redundant with an insn in a delay slot of
1589 some previous insn. This happens when we have a series of branches to the
1590 same label; in that case the first insn at the target might want to go
1591 into each of the delay slots.
1593 If we are not careful, this routine can take up a significant fraction
1594 of the total compilation time (4%), but only wins rarely. Hence we
1595 speed this routine up by making two passes. The first pass goes back
1596 until it hits a label and sees if it find an insn with an identical
1597 pattern. Only in this (relatively rare) event does it check for
1598 data conflicts.
1600 We do not split insns we encounter. This could cause us not to find a
1601 redundant insn, but the cost of splitting seems greater than the possible
1602 gain in rare cases. */
1604 static rtx
1605 redundant_insn (insn, target, delay_list)
1606 rtx insn;
1607 rtx target;
1608 rtx delay_list;
1610 rtx target_main = target;
1611 rtx ipat = PATTERN (insn);
1612 rtx trial, pat;
1613 struct resources needed, set;
1614 int i;
1616 /* If INSN has any REG_UNUSED notes, it can't match anything since we
1617 are allowed to not actually assign to such a register. */
1618 if (find_reg_note (insn, REG_UNUSED, NULL_RTX) != 0)
1619 return 0;
1621 /* Scan backwards looking for a match. */
1622 for (trial = PREV_INSN (target); trial; trial = PREV_INSN (trial))
1624 if (GET_CODE (trial) == CODE_LABEL)
1625 return 0;
1627 if (GET_RTX_CLASS (GET_CODE (trial)) != 'i')
1628 continue;
1630 pat = PATTERN (trial);
1631 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1632 continue;
1634 if (GET_CODE (pat) == SEQUENCE)
1636 /* Stop for a CALL and its delay slots because it is difficult to
1637 track its resource needs correctly. */
1638 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL_INSN)
1639 return 0;
1641 /* Stop for an INSN or JUMP_INSN with delayed effects and its delay
1642 slots because it is difficult to track its resource needs
1643 correctly. */
1645 #ifdef INSN_SETS_ARE_DELAYED
1646 if (INSN_SETS_ARE_DELAYED (XVECEXP (pat, 0, 0)))
1647 return 0;
1648 #endif
1650 #ifdef INSN_REFERENCES_ARE_DELAYED
1651 if (INSN_REFERENCES_ARE_DELAYED (XVECEXP (pat, 0, 0)))
1652 return 0;
1653 #endif
1655 /* See if any of the insns in the delay slot match, updating
1656 resource requirements as we go. */
1657 for (i = XVECLEN (pat, 0) - 1; i > 0; i--)
1658 if (GET_CODE (XVECEXP (pat, 0, i)) == GET_CODE (insn)
1659 && rtx_equal_p (PATTERN (XVECEXP (pat, 0, i)), ipat)
1660 && ! find_reg_note (XVECEXP (pat, 0, i), REG_UNUSED, NULL_RTX))
1661 break;
1663 /* If found a match, exit this loop early. */
1664 if (i > 0)
1665 break;
1668 else if (GET_CODE (trial) == GET_CODE (insn) && rtx_equal_p (pat, ipat)
1669 && ! find_reg_note (trial, REG_UNUSED, NULL_RTX))
1670 break;
1673 /* If we didn't find an insn that matches, return 0. */
1674 if (trial == 0)
1675 return 0;
1677 /* See what resources this insn sets and needs. If they overlap, or
1678 if this insn references CC0, it can't be redundant. */
1680 CLEAR_RESOURCE (&needed);
1681 CLEAR_RESOURCE (&set);
1682 mark_set_resources (insn, &set, 0, 1);
1683 mark_referenced_resources (insn, &needed, 1);
1685 /* If TARGET is a SEQUENCE, get the main insn. */
1686 if (GET_CODE (target) == INSN && GET_CODE (PATTERN (target)) == SEQUENCE)
1687 target_main = XVECEXP (PATTERN (target), 0, 0);
1689 if (resource_conflicts_p (&needed, &set)
1690 #ifdef HAVE_cc0
1691 || reg_mentioned_p (cc0_rtx, ipat)
1692 #endif
1693 /* The insn requiring the delay may not set anything needed or set by
1694 INSN. */
1695 || insn_sets_resource_p (target_main, &needed, 1)
1696 || insn_sets_resource_p (target_main, &set, 1))
1697 return 0;
1699 /* Insns we pass may not set either NEEDED or SET, so merge them for
1700 simpler tests. */
1701 needed.memory |= set.memory;
1702 needed.unch_memory |= set.unch_memory;
1703 IOR_HARD_REG_SET (needed.regs, set.regs);
1705 /* This insn isn't redundant if it conflicts with an insn that either is
1706 or will be in a delay slot of TARGET. */
1708 while (delay_list)
1710 if (insn_sets_resource_p (XEXP (delay_list, 0), &needed, 1))
1711 return 0;
1712 delay_list = XEXP (delay_list, 1);
1715 if (GET_CODE (target) == INSN && GET_CODE (PATTERN (target)) == SEQUENCE)
1716 for (i = 1; i < XVECLEN (PATTERN (target), 0); i++)
1717 if (insn_sets_resource_p (XVECEXP (PATTERN (target), 0, i), &needed, 1))
1718 return 0;
1720 /* Scan backwards until we reach a label or an insn that uses something
1721 INSN sets or sets something insn uses or sets. */
1723 for (trial = PREV_INSN (target);
1724 trial && GET_CODE (trial) != CODE_LABEL;
1725 trial = PREV_INSN (trial))
1727 if (GET_CODE (trial) != INSN && GET_CODE (trial) != CALL_INSN
1728 && GET_CODE (trial) != JUMP_INSN)
1729 continue;
1731 pat = PATTERN (trial);
1732 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1733 continue;
1735 if (GET_CODE (pat) == SEQUENCE)
1737 /* If this is a CALL_INSN and its delay slots, it is hard to track
1738 the resource needs properly, so give up. */
1739 if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL_INSN)
1740 return 0;
1742 /* If this is an INSN or JUMP_INSN with delayed effects, it
1743 is hard to track the resource needs properly, so give up. */
1745 #ifdef INSN_SETS_ARE_DELAYED
1746 if (INSN_SETS_ARE_DELAYED (XVECEXP (pat, 0, 0)))
1747 return 0;
1748 #endif
1750 #ifdef INSN_REFERENCES_ARE_DELAYED
1751 if (INSN_REFERENCES_ARE_DELAYED (XVECEXP (pat, 0, 0)))
1752 return 0;
1753 #endif
1755 /* See if any of the insns in the delay slot match, updating
1756 resource requirements as we go. */
1757 for (i = XVECLEN (pat, 0) - 1; i > 0; i--)
1759 rtx candidate = XVECEXP (pat, 0, i);
1761 /* If an insn will be annulled if the branch is false, it isn't
1762 considered as a possible duplicate insn. */
1763 if (rtx_equal_p (PATTERN (candidate), ipat)
1764 && ! (INSN_ANNULLED_BRANCH_P (XVECEXP (pat, 0, 0))
1765 && INSN_FROM_TARGET_P (candidate)))
1767 /* Show that this insn will be used in the sequel. */
1768 INSN_FROM_TARGET_P (candidate) = 0;
1769 return candidate;
1772 /* Unless this is an annulled insn from the target of a branch,
1773 we must stop if it sets anything needed or set by INSN. */
1774 if ((! INSN_ANNULLED_BRANCH_P (XVECEXP (pat, 0, 0))
1775 || ! INSN_FROM_TARGET_P (candidate))
1776 && insn_sets_resource_p (candidate, &needed, 1))
1777 return 0;
1781 /* If the insn requiring the delay slot conflicts with INSN, we
1782 must stop. */
1783 if (insn_sets_resource_p (XVECEXP (pat, 0, 0), &needed, 1))
1784 return 0;
1786 else
1788 /* See if TRIAL is the same as INSN. */
1789 pat = PATTERN (trial);
1790 if (rtx_equal_p (pat, ipat))
1791 return trial;
1793 /* Can't go any further if TRIAL conflicts with INSN. */
1794 if (insn_sets_resource_p (trial, &needed, 1))
1795 return 0;
1799 return 0;
1802 /* Return 1 if THREAD can only be executed in one way. If LABEL is non-zero,
1803 it is the target of the branch insn being scanned. If ALLOW_FALLTHROUGH
1804 is non-zero, we are allowed to fall into this thread; otherwise, we are
1805 not.
1807 If LABEL is used more than one or we pass a label other than LABEL before
1808 finding an active insn, we do not own this thread. */
1810 static int
1811 own_thread_p (thread, label, allow_fallthrough)
1812 rtx thread;
1813 rtx label;
1814 int allow_fallthrough;
1816 rtx active_insn;
1817 rtx insn;
1819 /* We don't own the function end. */
1820 if (thread == 0)
1821 return 0;
1823 /* Get the first active insn, or THREAD, if it is an active insn. */
1824 active_insn = next_active_insn (PREV_INSN (thread));
1826 for (insn = thread; insn != active_insn; insn = NEXT_INSN (insn))
1827 if (GET_CODE (insn) == CODE_LABEL
1828 && (insn != label || LABEL_NUSES (insn) != 1))
1829 return 0;
1831 if (allow_fallthrough)
1832 return 1;
1834 /* Ensure that we reach a BARRIER before any insn or label. */
1835 for (insn = prev_nonnote_insn (thread);
1836 insn == 0 || GET_CODE (insn) != BARRIER;
1837 insn = prev_nonnote_insn (insn))
1838 if (insn == 0
1839 || GET_CODE (insn) == CODE_LABEL
1840 || (GET_CODE (insn) == INSN
1841 && GET_CODE (PATTERN (insn)) != USE
1842 && GET_CODE (PATTERN (insn)) != CLOBBER))
1843 return 0;
1845 return 1;
1848 /* Called when INSN is being moved from a location near the target of a jump.
1849 We leave a marker of the form (use (INSN)) immediately in front
1850 of WHERE for mark_target_live_regs. These markers will be deleted when
1851 reorg finishes.
1853 We used to try to update the live status of registers if WHERE is at
1854 the start of a basic block, but that can't work since we may remove a
1855 BARRIER in relax_delay_slots. */
1857 static void
1858 update_block (insn, where)
1859 rtx insn;
1860 rtx where;
1862 /* Ignore if this was in a delay slot and it came from the target of
1863 a branch. */
1864 if (INSN_FROM_TARGET_P (insn))
1865 return;
1867 emit_insn_before (gen_rtx_USE (VOIDmode, insn), where);
1869 /* INSN might be making a value live in a block where it didn't use to
1870 be. So recompute liveness information for this block. */
1872 incr_ticks_for_insn (insn);
1875 /* Similar to REDIRECT_JUMP except that we update the BB_TICKS entry for
1876 the basic block containing the jump. */
1878 static int
1879 reorg_redirect_jump (jump, nlabel)
1880 rtx jump;
1881 rtx nlabel;
1883 incr_ticks_for_insn (jump);
1884 return redirect_jump (jump, nlabel);
1887 /* Called when INSN is being moved forward into a delay slot of DELAYED_INSN.
1888 We check every instruction between INSN and DELAYED_INSN for REG_DEAD notes
1889 that reference values used in INSN. If we find one, then we move the
1890 REG_DEAD note to INSN.
1892 This is needed to handle the case where an later insn (after INSN) has a
1893 REG_DEAD note for a register used by INSN, and this later insn subsequently
1894 gets moved before a CODE_LABEL because it is a redundant insn. In this
1895 case, mark_target_live_regs may be confused into thinking the register
1896 is dead because it sees a REG_DEAD note immediately before a CODE_LABEL. */
1898 static void
1899 update_reg_dead_notes (insn, delayed_insn)
1900 rtx insn, delayed_insn;
1902 rtx p, link, next;
1904 for (p = next_nonnote_insn (insn); p != delayed_insn;
1905 p = next_nonnote_insn (p))
1906 for (link = REG_NOTES (p); link; link = next)
1908 next = XEXP (link, 1);
1910 if (REG_NOTE_KIND (link) != REG_DEAD
1911 || GET_CODE (XEXP (link, 0)) != REG)
1912 continue;
1914 if (reg_referenced_p (XEXP (link, 0), PATTERN (insn)))
1916 /* Move the REG_DEAD note from P to INSN. */
1917 remove_note (p, link);
1918 XEXP (link, 1) = REG_NOTES (insn);
1919 REG_NOTES (insn) = link;
1924 /* Called when an insn redundant with start_insn is deleted. If there
1925 is a REG_DEAD note for the target of start_insn between start_insn
1926 and stop_insn, then the REG_DEAD note needs to be deleted since the
1927 value no longer dies there.
1929 If the REG_DEAD note isn't deleted, then mark_target_live_regs may be
1930 confused into thinking the register is dead. */
1932 static void
1933 fix_reg_dead_note (start_insn, stop_insn)
1934 rtx start_insn, stop_insn;
1936 rtx p, link, next;
1938 for (p = next_nonnote_insn (start_insn); p != stop_insn;
1939 p = next_nonnote_insn (p))
1940 for (link = REG_NOTES (p); link; link = next)
1942 next = XEXP (link, 1);
1944 if (REG_NOTE_KIND (link) != REG_DEAD
1945 || GET_CODE (XEXP (link, 0)) != REG)
1946 continue;
1948 if (reg_set_p (XEXP (link, 0), PATTERN (start_insn)))
1950 remove_note (p, link);
1951 return;
1956 /* Delete any REG_UNUSED notes that exist on INSN but not on REDUNDANT_INSN.
1958 This handles the case of udivmodXi4 instructions which optimize their
1959 output depending on whether any REG_UNUSED notes are present.
1960 we must make sure that INSN calculates as many results as REDUNDANT_INSN
1961 does. */
1963 static void
1964 update_reg_unused_notes (insn, redundant_insn)
1965 rtx insn, redundant_insn;
1967 rtx link, next;
1969 for (link = REG_NOTES (insn); link; link = next)
1971 next = XEXP (link, 1);
1973 if (REG_NOTE_KIND (link) != REG_UNUSED
1974 || GET_CODE (XEXP (link, 0)) != REG)
1975 continue;
1977 if (! find_regno_note (redundant_insn, REG_UNUSED,
1978 REGNO (XEXP (link, 0))))
1979 remove_note (insn, link);
1983 /* Scan a function looking for insns that need a delay slot and find insns to
1984 put into the delay slot.
1986 NON_JUMPS_P is non-zero if we are to only try to fill non-jump insns (such
1987 as calls). We do these first since we don't want jump insns (that are
1988 easier to fill) to get the only insns that could be used for non-jump insns.
1989 When it is zero, only try to fill JUMP_INSNs.
1991 When slots are filled in this manner, the insns (including the
1992 delay_insn) are put together in a SEQUENCE rtx. In this fashion,
1993 it is possible to tell whether a delay slot has really been filled
1994 or not. `final' knows how to deal with this, by communicating
1995 through FINAL_SEQUENCE. */
1997 static void
1998 fill_simple_delay_slots (non_jumps_p)
1999 int non_jumps_p;
2001 register rtx insn, pat, trial, next_trial;
2002 register int i;
2003 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
2004 struct resources needed, set;
2005 int slots_to_fill, slots_filled;
2006 rtx delay_list;
2008 for (i = 0; i < num_unfilled_slots; i++)
2010 int flags;
2011 /* Get the next insn to fill. If it has already had any slots assigned,
2012 we can't do anything with it. Maybe we'll improve this later. */
2014 insn = unfilled_slots_base[i];
2015 if (insn == 0
2016 || INSN_DELETED_P (insn)
2017 || (GET_CODE (insn) == INSN
2018 && GET_CODE (PATTERN (insn)) == SEQUENCE)
2019 || (GET_CODE (insn) == JUMP_INSN && non_jumps_p)
2020 || (GET_CODE (insn) != JUMP_INSN && ! non_jumps_p))
2021 continue;
2023 if (GET_CODE (insn) == JUMP_INSN)
2024 flags = get_jump_flags (insn, JUMP_LABEL (insn));
2025 else
2026 flags = get_jump_flags (insn, NULL_RTX);
2027 slots_to_fill = num_delay_slots (insn);
2029 /* Some machine description have defined instructions to have
2030 delay slots only in certain circumstances which may depend on
2031 nearby insns (which change due to reorg's actions).
2033 For example, the PA port normally has delay slots for unconditional
2034 jumps.
2036 However, the PA port claims such jumps do not have a delay slot
2037 if they are immediate successors of certain CALL_INSNs. This
2038 allows the port to favor filling the delay slot of the call with
2039 the unconditional jump. */
2040 if (slots_to_fill == 0)
2041 continue;
2043 /* This insn needs, or can use, some delay slots. SLOTS_TO_FILL
2044 says how many. After initialization, first try optimizing
2046 call _foo call _foo
2047 nop add %o7,.-L1,%o7
2048 b,a L1
2051 If this case applies, the delay slot of the call is filled with
2052 the unconditional jump. This is done first to avoid having the
2053 delay slot of the call filled in the backward scan. Also, since
2054 the unconditional jump is likely to also have a delay slot, that
2055 insn must exist when it is subsequently scanned.
2057 This is tried on each insn with delay slots as some machines
2058 have insns which perform calls, but are not represented as
2059 CALL_INSNs. */
2061 slots_filled = 0;
2062 delay_list = 0;
2064 if ((trial = next_active_insn (insn))
2065 && GET_CODE (trial) == JUMP_INSN
2066 && simplejump_p (trial)
2067 && eligible_for_delay (insn, slots_filled, trial, flags)
2068 && no_labels_between_p (insn, trial))
2070 rtx *tmp;
2071 slots_filled++;
2072 delay_list = add_to_delay_list (trial, delay_list);
2074 /* TRIAL may have had its delay slot filled, then unfilled. When
2075 the delay slot is unfilled, TRIAL is placed back on the unfilled
2076 slots obstack. Unfortunately, it is placed on the end of the
2077 obstack, not in its original location. Therefore, we must search
2078 from entry i + 1 to the end of the unfilled slots obstack to
2079 try and find TRIAL. */
2080 tmp = &unfilled_slots_base[i + 1];
2081 while (*tmp != trial && tmp != unfilled_slots_next)
2082 tmp++;
2084 /* Remove the unconditional jump from consideration for delay slot
2085 filling and unthread it. */
2086 if (*tmp == trial)
2087 *tmp = 0;
2089 rtx next = NEXT_INSN (trial);
2090 rtx prev = PREV_INSN (trial);
2091 if (prev)
2092 NEXT_INSN (prev) = next;
2093 if (next)
2094 PREV_INSN (next) = prev;
2098 /* Now, scan backwards from the insn to search for a potential
2099 delay-slot candidate. Stop searching when a label or jump is hit.
2101 For each candidate, if it is to go into the delay slot (moved
2102 forward in execution sequence), it must not need or set any resources
2103 that were set by later insns and must not set any resources that
2104 are needed for those insns.
2106 The delay slot insn itself sets resources unless it is a call
2107 (in which case the called routine, not the insn itself, is doing
2108 the setting). */
2110 if (slots_filled < slots_to_fill)
2112 CLEAR_RESOURCE (&needed);
2113 CLEAR_RESOURCE (&set);
2114 mark_set_resources (insn, &set, 0, 0);
2115 mark_referenced_resources (insn, &needed, 0);
2117 for (trial = prev_nonnote_insn (insn); ! stop_search_p (trial, 1);
2118 trial = next_trial)
2120 next_trial = prev_nonnote_insn (trial);
2122 /* This must be an INSN or CALL_INSN. */
2123 pat = PATTERN (trial);
2125 /* USE and CLOBBER at this level was just for flow; ignore it. */
2126 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2127 continue;
2129 /* Check for resource conflict first, to avoid unnecessary
2130 splitting. */
2131 if (! insn_references_resource_p (trial, &set, 1)
2132 && ! insn_sets_resource_p (trial, &set, 1)
2133 && ! insn_sets_resource_p (trial, &needed, 1)
2134 #ifdef HAVE_cc0
2135 /* Can't separate set of cc0 from its use. */
2136 && ! (reg_mentioned_p (cc0_rtx, pat)
2137 && ! sets_cc0_p (pat))
2138 #endif
2141 trial = try_split (pat, trial, 1);
2142 next_trial = prev_nonnote_insn (trial);
2143 if (eligible_for_delay (insn, slots_filled, trial, flags))
2145 /* In this case, we are searching backward, so if we
2146 find insns to put on the delay list, we want
2147 to put them at the head, rather than the
2148 tail, of the list. */
2150 update_reg_dead_notes (trial, insn);
2151 delay_list = gen_rtx_INSN_LIST (VOIDmode,
2152 trial, delay_list);
2153 update_block (trial, trial);
2154 delete_insn (trial);
2155 if (slots_to_fill == ++slots_filled)
2156 break;
2157 continue;
2161 mark_set_resources (trial, &set, 0, 1);
2162 mark_referenced_resources (trial, &needed, 1);
2166 /* If all needed slots haven't been filled, we come here. */
2168 /* Try to optimize case of jumping around a single insn. */
2169 #if defined(ANNUL_IFFALSE_SLOTS) || defined(ANNUL_IFTRUE_SLOTS)
2170 if (slots_filled != slots_to_fill
2171 && delay_list == 0
2172 && GET_CODE (insn) == JUMP_INSN
2173 && (condjump_p (insn) || condjump_in_parallel_p (insn)))
2175 delay_list = optimize_skip (insn);
2176 if (delay_list)
2177 slots_filled += 1;
2179 #endif
2181 /* Try to get insns from beyond the insn needing the delay slot.
2182 These insns can neither set or reference resources set in insns being
2183 skipped, cannot set resources in the insn being skipped, and, if this
2184 is a CALL_INSN (or a CALL_INSN is passed), cannot trap (because the
2185 call might not return).
2187 There used to be code which continued past the target label if
2188 we saw all uses of the target label. This code did not work,
2189 because it failed to account for some instructions which were
2190 both annulled and marked as from the target. This can happen as a
2191 result of optimize_skip. Since this code was redundant with
2192 fill_eager_delay_slots anyways, it was just deleted. */
2194 if (slots_filled != slots_to_fill
2195 && (GET_CODE (insn) != JUMP_INSN
2196 || ((condjump_p (insn) || condjump_in_parallel_p (insn))
2197 && ! simplejump_p (insn)
2198 && JUMP_LABEL (insn) != 0)))
2200 rtx target = 0;
2201 int maybe_never = 0;
2202 struct resources needed_at_jump;
2204 CLEAR_RESOURCE (&needed);
2205 CLEAR_RESOURCE (&set);
2207 if (GET_CODE (insn) == CALL_INSN)
2209 mark_set_resources (insn, &set, 0, 1);
2210 mark_referenced_resources (insn, &needed, 1);
2211 maybe_never = 1;
2213 else
2215 mark_set_resources (insn, &set, 0, 1);
2216 mark_referenced_resources (insn, &needed, 1);
2217 if (GET_CODE (insn) == JUMP_INSN)
2218 target = JUMP_LABEL (insn);
2221 for (trial = next_nonnote_insn (insn); trial; trial = next_trial)
2223 rtx pat, trial_delay;
2225 next_trial = next_nonnote_insn (trial);
2227 if (GET_CODE (trial) == CODE_LABEL
2228 || GET_CODE (trial) == BARRIER)
2229 break;
2231 /* We must have an INSN, JUMP_INSN, or CALL_INSN. */
2232 pat = PATTERN (trial);
2234 /* Stand-alone USE and CLOBBER are just for flow. */
2235 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2236 continue;
2238 /* If this already has filled delay slots, get the insn needing
2239 the delay slots. */
2240 if (GET_CODE (pat) == SEQUENCE)
2241 trial_delay = XVECEXP (pat, 0, 0);
2242 else
2243 trial_delay = trial;
2245 /* If this is a jump insn to our target, indicate that we have
2246 seen another jump to it. If we aren't handling a conditional
2247 jump, stop our search. Otherwise, compute the needs at its
2248 target and add them to NEEDED. */
2249 if (GET_CODE (trial_delay) == JUMP_INSN)
2251 if (target == 0)
2252 break;
2253 else if (JUMP_LABEL (trial_delay) != target)
2255 rtx ninsn =
2256 next_active_insn (JUMP_LABEL (trial_delay));
2258 mark_target_live_regs (get_insns (), ninsn,
2259 &needed_at_jump);
2260 needed.memory |= needed_at_jump.memory;
2261 needed.unch_memory |= needed_at_jump.unch_memory;
2262 IOR_HARD_REG_SET (needed.regs, needed_at_jump.regs);
2266 /* See if we have a resource problem before we try to
2267 split. */
2268 if (target == 0
2269 && GET_CODE (pat) != SEQUENCE
2270 && ! insn_references_resource_p (trial, &set, 1)
2271 && ! insn_sets_resource_p (trial, &set, 1)
2272 && ! insn_sets_resource_p (trial, &needed, 1)
2273 #ifdef HAVE_cc0
2274 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat))
2275 #endif
2276 && ! (maybe_never && may_trap_p (pat))
2277 && (trial = try_split (pat, trial, 0))
2278 && eligible_for_delay (insn, slots_filled, trial, flags))
2280 next_trial = next_nonnote_insn (trial);
2281 delay_list = add_to_delay_list (trial, delay_list);
2283 #ifdef HAVE_cc0
2284 if (reg_mentioned_p (cc0_rtx, pat))
2285 link_cc0_insns (trial);
2286 #endif
2288 delete_insn (trial);
2289 if (slots_to_fill == ++slots_filled)
2290 break;
2291 continue;
2294 mark_set_resources (trial, &set, 0, 1);
2295 mark_referenced_resources (trial, &needed, 1);
2297 /* Ensure we don't put insns between the setting of cc and the
2298 comparison by moving a setting of cc into an earlier delay
2299 slot since these insns could clobber the condition code. */
2300 set.cc = 1;
2302 /* If this is a call or jump, we might not get here. */
2303 if (GET_CODE (trial_delay) == CALL_INSN
2304 || GET_CODE (trial_delay) == JUMP_INSN)
2305 maybe_never = 1;
2308 /* If there are slots left to fill and our search was stopped by an
2309 unconditional branch, try the insn at the branch target. We can
2310 redirect the branch if it works.
2312 Don't do this if the insn at the branch target is a branch. */
2313 if (slots_to_fill != slots_filled
2314 && trial
2315 && GET_CODE (trial) == JUMP_INSN
2316 && simplejump_p (trial)
2317 && (target == 0 || JUMP_LABEL (trial) == target)
2318 && (next_trial = next_active_insn (JUMP_LABEL (trial))) != 0
2319 && ! (GET_CODE (next_trial) == INSN
2320 && GET_CODE (PATTERN (next_trial)) == SEQUENCE)
2321 && GET_CODE (next_trial) != JUMP_INSN
2322 && ! insn_references_resource_p (next_trial, &set, 1)
2323 && ! insn_sets_resource_p (next_trial, &set, 1)
2324 && ! insn_sets_resource_p (next_trial, &needed, 1)
2325 #ifdef HAVE_cc0
2326 && ! reg_mentioned_p (cc0_rtx, PATTERN (next_trial))
2327 #endif
2328 && ! (maybe_never && may_trap_p (PATTERN (next_trial)))
2329 && (next_trial = try_split (PATTERN (next_trial), next_trial, 0))
2330 && eligible_for_delay (insn, slots_filled, next_trial, flags))
2332 rtx new_label = next_active_insn (next_trial);
2334 if (new_label != 0)
2335 new_label = get_label_before (new_label);
2336 else
2337 new_label = find_end_label ();
2339 delay_list
2340 = add_to_delay_list (copy_rtx (next_trial), delay_list);
2341 slots_filled++;
2342 reorg_redirect_jump (trial, new_label);
2344 /* If we merged because we both jumped to the same place,
2345 redirect the original insn also. */
2346 if (target)
2347 reorg_redirect_jump (insn, new_label);
2351 /* If this is an unconditional jump, then try to get insns from the
2352 target of the jump. */
2353 if (GET_CODE (insn) == JUMP_INSN
2354 && simplejump_p (insn)
2355 && slots_filled != slots_to_fill)
2356 delay_list
2357 = fill_slots_from_thread (insn, const_true_rtx,
2358 next_active_insn (JUMP_LABEL (insn)),
2359 NULL, 1, 1,
2360 own_thread_p (JUMP_LABEL (insn),
2361 JUMP_LABEL (insn), 0),
2362 slots_to_fill, &slots_filled,
2363 delay_list);
2365 if (delay_list)
2366 unfilled_slots_base[i]
2367 = emit_delay_sequence (insn, delay_list, slots_filled);
2369 if (slots_to_fill == slots_filled)
2370 unfilled_slots_base[i] = 0;
2372 note_delay_statistics (slots_filled, 0);
2375 #ifdef DELAY_SLOTS_FOR_EPILOGUE
2376 /* See if the epilogue needs any delay slots. Try to fill them if so.
2377 The only thing we can do is scan backwards from the end of the
2378 function. If we did this in a previous pass, it is incorrect to do it
2379 again. */
2380 if (current_function_epilogue_delay_list)
2381 return;
2383 slots_to_fill = DELAY_SLOTS_FOR_EPILOGUE;
2384 if (slots_to_fill == 0)
2385 return;
2387 slots_filled = 0;
2388 CLEAR_RESOURCE (&set);
2390 /* The frame pointer and stack pointer are needed at the beginning of
2391 the epilogue, so instructions setting them can not be put in the
2392 epilogue delay slot. However, everything else needed at function
2393 end is safe, so we don't want to use end_of_function_needs here. */
2394 CLEAR_RESOURCE (&needed);
2395 if (frame_pointer_needed)
2397 SET_HARD_REG_BIT (needed.regs, FRAME_POINTER_REGNUM);
2398 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2399 SET_HARD_REG_BIT (needed.regs, HARD_FRAME_POINTER_REGNUM);
2400 #endif
2401 #ifdef EXIT_IGNORE_STACK
2402 if (! EXIT_IGNORE_STACK
2403 || current_function_sp_is_unchanging)
2404 #endif
2405 SET_HARD_REG_BIT (needed.regs, STACK_POINTER_REGNUM);
2407 else
2408 SET_HARD_REG_BIT (needed.regs, STACK_POINTER_REGNUM);
2410 #ifdef EPILOGUE_USES
2411 for (i = 0; i <FIRST_PSEUDO_REGISTER; i++)
2413 if (EPILOGUE_USES (i))
2414 SET_HARD_REG_BIT (needed.regs, i);
2416 #endif
2418 for (trial = get_last_insn (); ! stop_search_p (trial, 1);
2419 trial = PREV_INSN (trial))
2421 if (GET_CODE (trial) == NOTE)
2422 continue;
2423 pat = PATTERN (trial);
2424 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2425 continue;
2427 if (! insn_references_resource_p (trial, &set, 1)
2428 && ! insn_sets_resource_p (trial, &needed, 1)
2429 && ! insn_sets_resource_p (trial, &set, 1)
2430 #ifdef HAVE_cc0
2431 /* Don't want to mess with cc0 here. */
2432 && ! reg_mentioned_p (cc0_rtx, pat)
2433 #endif
2436 trial = try_split (pat, trial, 1);
2437 if (ELIGIBLE_FOR_EPILOGUE_DELAY (trial, slots_filled))
2439 /* Here as well we are searching backward, so put the
2440 insns we find on the head of the list. */
2442 current_function_epilogue_delay_list
2443 = gen_rtx_INSN_LIST (VOIDmode, trial,
2444 current_function_epilogue_delay_list);
2445 mark_end_of_function_resources (trial, 1);
2446 update_block (trial, trial);
2447 delete_insn (trial);
2449 /* Clear deleted bit so final.c will output the insn. */
2450 INSN_DELETED_P (trial) = 0;
2452 if (slots_to_fill == ++slots_filled)
2453 break;
2454 continue;
2458 mark_set_resources (trial, &set, 0, 1);
2459 mark_referenced_resources (trial, &needed, 1);
2462 note_delay_statistics (slots_filled, 0);
2463 #endif
2466 /* Try to find insns to place in delay slots.
2468 INSN is the jump needing SLOTS_TO_FILL delay slots. It tests CONDITION
2469 or is an unconditional branch if CONDITION is const_true_rtx.
2470 *PSLOTS_FILLED is updated with the number of slots that we have filled.
2472 THREAD is a flow-of-control, either the insns to be executed if the
2473 branch is true or if the branch is false, THREAD_IF_TRUE says which.
2475 OPPOSITE_THREAD is the thread in the opposite direction. It is used
2476 to see if any potential delay slot insns set things needed there.
2478 LIKELY is non-zero if it is extremely likely that the branch will be
2479 taken and THREAD_IF_TRUE is set. This is used for the branch at the
2480 end of a loop back up to the top.
2482 OWN_THREAD and OWN_OPPOSITE_THREAD are true if we are the only user of the
2483 thread. I.e., it is the fallthrough code of our jump or the target of the
2484 jump when we are the only jump going there.
2486 If OWN_THREAD is false, it must be the "true" thread of a jump. In that
2487 case, we can only take insns from the head of the thread for our delay
2488 slot. We then adjust the jump to point after the insns we have taken. */
2490 static rtx
2491 fill_slots_from_thread (insn, condition, thread, opposite_thread, likely,
2492 thread_if_true, own_thread,
2493 slots_to_fill, pslots_filled, delay_list)
2494 rtx insn;
2495 rtx condition;
2496 rtx thread, opposite_thread;
2497 int likely;
2498 int thread_if_true;
2499 int own_thread;
2500 int slots_to_fill, *pslots_filled;
2501 rtx delay_list;
2503 rtx new_thread;
2504 struct resources opposite_needed, set, needed;
2505 rtx trial;
2506 int lose = 0;
2507 int must_annul = 0;
2508 int flags;
2510 /* Validate our arguments. */
2511 if ((condition == const_true_rtx && ! thread_if_true)
2512 || (! own_thread && ! thread_if_true))
2513 abort ();
2515 flags = get_jump_flags (insn, JUMP_LABEL (insn));
2517 /* If our thread is the end of subroutine, we can't get any delay
2518 insns from that. */
2519 if (thread == 0)
2520 return delay_list;
2522 /* If this is an unconditional branch, nothing is needed at the
2523 opposite thread. Otherwise, compute what is needed there. */
2524 if (condition == const_true_rtx)
2525 CLEAR_RESOURCE (&opposite_needed);
2526 else
2527 mark_target_live_regs (get_insns (), opposite_thread, &opposite_needed);
2529 /* If the insn at THREAD can be split, do it here to avoid having to
2530 update THREAD and NEW_THREAD if it is done in the loop below. Also
2531 initialize NEW_THREAD. */
2533 new_thread = thread = try_split (PATTERN (thread), thread, 0);
2535 /* Scan insns at THREAD. We are looking for an insn that can be removed
2536 from THREAD (it neither sets nor references resources that were set
2537 ahead of it and it doesn't set anything needs by the insns ahead of
2538 it) and that either can be placed in an annulling insn or aren't
2539 needed at OPPOSITE_THREAD. */
2541 CLEAR_RESOURCE (&needed);
2542 CLEAR_RESOURCE (&set);
2544 /* If we do not own this thread, we must stop as soon as we find
2545 something that we can't put in a delay slot, since all we can do
2546 is branch into THREAD at a later point. Therefore, labels stop
2547 the search if this is not the `true' thread. */
2549 for (trial = thread;
2550 ! stop_search_p (trial, ! thread_if_true) && (! lose || own_thread);
2551 trial = next_nonnote_insn (trial))
2553 rtx pat, old_trial;
2555 /* If we have passed a label, we no longer own this thread. */
2556 if (GET_CODE (trial) == CODE_LABEL)
2558 own_thread = 0;
2559 continue;
2562 pat = PATTERN (trial);
2563 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2564 continue;
2566 /* If TRIAL conflicts with the insns ahead of it, we lose. Also,
2567 don't separate or copy insns that set and use CC0. */
2568 if (! insn_references_resource_p (trial, &set, 1)
2569 && ! insn_sets_resource_p (trial, &set, 1)
2570 && ! insn_sets_resource_p (trial, &needed, 1)
2571 #ifdef HAVE_cc0
2572 && ! (reg_mentioned_p (cc0_rtx, pat)
2573 && (! own_thread || ! sets_cc0_p (pat)))
2574 #endif
2577 rtx prior_insn;
2579 /* If TRIAL is redundant with some insn before INSN, we don't
2580 actually need to add it to the delay list; we can merely pretend
2581 we did. */
2582 if ((prior_insn = redundant_insn (trial, insn, delay_list)))
2584 fix_reg_dead_note (prior_insn, insn);
2585 if (own_thread)
2587 update_block (trial, thread);
2588 if (trial == thread)
2590 thread = next_active_insn (thread);
2591 if (new_thread == trial)
2592 new_thread = thread;
2595 delete_insn (trial);
2597 else
2599 update_reg_unused_notes (prior_insn, trial);
2600 new_thread = next_active_insn (trial);
2603 continue;
2606 /* There are two ways we can win: If TRIAL doesn't set anything
2607 needed at the opposite thread and can't trap, or if it can
2608 go into an annulled delay slot. */
2609 if (!must_annul
2610 && (condition == const_true_rtx
2611 || (! insn_sets_resource_p (trial, &opposite_needed, 1)
2612 && ! may_trap_p (pat))))
2614 old_trial = trial;
2615 trial = try_split (pat, trial, 0);
2616 if (new_thread == old_trial)
2617 new_thread = trial;
2618 if (thread == old_trial)
2619 thread = trial;
2620 pat = PATTERN (trial);
2621 if (eligible_for_delay (insn, *pslots_filled, trial, flags))
2622 goto winner;
2624 else if (0
2625 #ifdef ANNUL_IFTRUE_SLOTS
2626 || ! thread_if_true
2627 #endif
2628 #ifdef ANNUL_IFFALSE_SLOTS
2629 || thread_if_true
2630 #endif
2633 old_trial = trial;
2634 trial = try_split (pat, trial, 0);
2635 if (new_thread == old_trial)
2636 new_thread = trial;
2637 if (thread == old_trial)
2638 thread = trial;
2639 pat = PATTERN (trial);
2640 if ((must_annul || delay_list == NULL) && (thread_if_true
2641 ? check_annul_list_true_false (0, delay_list)
2642 && eligible_for_annul_false (insn, *pslots_filled, trial, flags)
2643 : check_annul_list_true_false (1, delay_list)
2644 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
2646 rtx temp;
2648 must_annul = 1;
2649 winner:
2651 #ifdef HAVE_cc0
2652 if (reg_mentioned_p (cc0_rtx, pat))
2653 link_cc0_insns (trial);
2654 #endif
2656 /* If we own this thread, delete the insn. If this is the
2657 destination of a branch, show that a basic block status
2658 may have been updated. In any case, mark the new
2659 starting point of this thread. */
2660 if (own_thread)
2662 update_block (trial, thread);
2663 if (trial == thread)
2665 thread = next_active_insn (thread);
2666 if (new_thread == trial)
2667 new_thread = thread;
2669 delete_insn (trial);
2671 else
2672 new_thread = next_active_insn (trial);
2674 temp = own_thread ? trial : copy_rtx (trial);
2675 if (thread_if_true)
2676 INSN_FROM_TARGET_P (temp) = 1;
2678 delay_list = add_to_delay_list (temp, delay_list);
2680 if (slots_to_fill == ++(*pslots_filled))
2682 /* Even though we have filled all the slots, we
2683 may be branching to a location that has a
2684 redundant insn. Skip any if so. */
2685 while (new_thread && ! own_thread
2686 && ! insn_sets_resource_p (new_thread, &set, 1)
2687 && ! insn_sets_resource_p (new_thread, &needed, 1)
2688 && ! insn_references_resource_p (new_thread,
2689 &set, 1)
2690 && (prior_insn
2691 = redundant_insn (new_thread, insn,
2692 delay_list)))
2694 /* We know we do not own the thread, so no need
2695 to call update_block and delete_insn. */
2696 fix_reg_dead_note (prior_insn, insn);
2697 update_reg_unused_notes (prior_insn, new_thread);
2698 new_thread = next_active_insn (new_thread);
2700 break;
2703 continue;
2708 /* This insn can't go into a delay slot. */
2709 lose = 1;
2710 mark_set_resources (trial, &set, 0, 1);
2711 mark_referenced_resources (trial, &needed, 1);
2713 /* Ensure we don't put insns between the setting of cc and the comparison
2714 by moving a setting of cc into an earlier delay slot since these insns
2715 could clobber the condition code. */
2716 set.cc = 1;
2718 /* If this insn is a register-register copy and the next insn has
2719 a use of our destination, change it to use our source. That way,
2720 it will become a candidate for our delay slot the next time
2721 through this loop. This case occurs commonly in loops that
2722 scan a list.
2724 We could check for more complex cases than those tested below,
2725 but it doesn't seem worth it. It might also be a good idea to try
2726 to swap the two insns. That might do better.
2728 We can't do this if the next insn modifies our destination, because
2729 that would make the replacement into the insn invalid. We also can't
2730 do this if it modifies our source, because it might be an earlyclobber
2731 operand. This latter test also prevents updating the contents of
2732 a PRE_INC. */
2734 if (GET_CODE (trial) == INSN && GET_CODE (pat) == SET
2735 && GET_CODE (SET_SRC (pat)) == REG
2736 && GET_CODE (SET_DEST (pat)) == REG)
2738 rtx next = next_nonnote_insn (trial);
2740 if (next && GET_CODE (next) == INSN
2741 && GET_CODE (PATTERN (next)) != USE
2742 && ! reg_set_p (SET_DEST (pat), next)
2743 && ! reg_set_p (SET_SRC (pat), next)
2744 && reg_referenced_p (SET_DEST (pat), PATTERN (next)))
2745 validate_replace_rtx (SET_DEST (pat), SET_SRC (pat), next);
2749 /* If we stopped on a branch insn that has delay slots, see if we can
2750 steal some of the insns in those slots. */
2751 if (trial && GET_CODE (trial) == INSN
2752 && GET_CODE (PATTERN (trial)) == SEQUENCE
2753 && GET_CODE (XVECEXP (PATTERN (trial), 0, 0)) == JUMP_INSN)
2755 /* If this is the `true' thread, we will want to follow the jump,
2756 so we can only do this if we have taken everything up to here. */
2757 if (thread_if_true && trial == new_thread)
2758 delay_list
2759 = steal_delay_list_from_target (insn, condition, PATTERN (trial),
2760 delay_list, &set, &needed,
2761 &opposite_needed, slots_to_fill,
2762 pslots_filled, &must_annul,
2763 &new_thread);
2764 else if (! thread_if_true)
2765 delay_list
2766 = steal_delay_list_from_fallthrough (insn, condition,
2767 PATTERN (trial),
2768 delay_list, &set, &needed,
2769 &opposite_needed, slots_to_fill,
2770 pslots_filled, &must_annul);
2773 /* If we haven't found anything for this delay slot and it is very
2774 likely that the branch will be taken, see if the insn at our target
2775 increments or decrements a register with an increment that does not
2776 depend on the destination register. If so, try to place the opposite
2777 arithmetic insn after the jump insn and put the arithmetic insn in the
2778 delay slot. If we can't do this, return. */
2779 if (delay_list == 0 && likely && new_thread
2780 && GET_CODE (new_thread) == INSN
2781 && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
2782 && asm_noperands (PATTERN (new_thread)) < 0)
2784 rtx pat = PATTERN (new_thread);
2785 rtx dest;
2786 rtx src;
2788 trial = new_thread;
2789 pat = PATTERN (trial);
2791 if (GET_CODE (trial) != INSN || GET_CODE (pat) != SET
2792 || ! eligible_for_delay (insn, 0, trial, flags))
2793 return 0;
2795 dest = SET_DEST (pat), src = SET_SRC (pat);
2796 if ((GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
2797 && rtx_equal_p (XEXP (src, 0), dest)
2798 && ! reg_overlap_mentioned_p (dest, XEXP (src, 1)))
2800 rtx other = XEXP (src, 1);
2801 rtx new_arith;
2802 rtx ninsn;
2804 /* If this is a constant adjustment, use the same code with
2805 the negated constant. Otherwise, reverse the sense of the
2806 arithmetic. */
2807 if (GET_CODE (other) == CONST_INT)
2808 new_arith = gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src), dest,
2809 negate_rtx (GET_MODE (src), other));
2810 else
2811 new_arith = gen_rtx_fmt_ee (GET_CODE (src) == PLUS ? MINUS : PLUS,
2812 GET_MODE (src), dest, other);
2814 ninsn = emit_insn_after (gen_rtx_SET (VOIDmode, dest, new_arith),
2815 insn);
2817 if (recog_memoized (ninsn) < 0
2818 || (extract_insn (ninsn), ! constrain_operands (1)))
2820 delete_insn (ninsn);
2821 return 0;
2824 if (own_thread)
2826 update_block (trial, thread);
2827 if (trial == thread)
2829 thread = next_active_insn (thread);
2830 if (new_thread == trial)
2831 new_thread = thread;
2833 delete_insn (trial);
2835 else
2836 new_thread = next_active_insn (trial);
2838 ninsn = own_thread ? trial : copy_rtx (trial);
2839 if (thread_if_true)
2840 INSN_FROM_TARGET_P (ninsn) = 1;
2842 delay_list = add_to_delay_list (ninsn, NULL_RTX);
2843 (*pslots_filled)++;
2847 if (delay_list && must_annul)
2848 INSN_ANNULLED_BRANCH_P (insn) = 1;
2850 /* If we are to branch into the middle of this thread, find an appropriate
2851 label or make a new one if none, and redirect INSN to it. If we hit the
2852 end of the function, use the end-of-function label. */
2853 if (new_thread != thread)
2855 rtx label;
2857 if (! thread_if_true)
2858 abort ();
2860 if (new_thread && GET_CODE (new_thread) == JUMP_INSN
2861 && (simplejump_p (new_thread)
2862 || GET_CODE (PATTERN (new_thread)) == RETURN)
2863 && redirect_with_delay_list_safe_p (insn,
2864 JUMP_LABEL (new_thread),
2865 delay_list))
2866 new_thread = follow_jumps (JUMP_LABEL (new_thread));
2868 if (new_thread == 0)
2869 label = find_end_label ();
2870 else if (GET_CODE (new_thread) == CODE_LABEL)
2871 label = new_thread;
2872 else
2873 label = get_label_before (new_thread);
2875 reorg_redirect_jump (insn, label);
2878 return delay_list;
2881 /* Make another attempt to find insns to place in delay slots.
2883 We previously looked for insns located in front of the delay insn
2884 and, for non-jump delay insns, located behind the delay insn.
2886 Here only try to schedule jump insns and try to move insns from either
2887 the target or the following insns into the delay slot. If annulling is
2888 supported, we will be likely to do this. Otherwise, we can do this only
2889 if safe. */
2891 static void
2892 fill_eager_delay_slots ()
2894 register rtx insn;
2895 register int i;
2896 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
2898 for (i = 0; i < num_unfilled_slots; i++)
2900 rtx condition;
2901 rtx target_label, insn_at_target, fallthrough_insn;
2902 rtx delay_list = 0;
2903 int own_target;
2904 int own_fallthrough;
2905 int prediction, slots_to_fill, slots_filled;
2907 insn = unfilled_slots_base[i];
2908 if (insn == 0
2909 || INSN_DELETED_P (insn)
2910 || GET_CODE (insn) != JUMP_INSN
2911 || ! (condjump_p (insn) || condjump_in_parallel_p (insn)))
2912 continue;
2914 slots_to_fill = num_delay_slots (insn);
2915 /* Some machine description have defined instructions to have
2916 delay slots only in certain circumstances which may depend on
2917 nearby insns (which change due to reorg's actions).
2919 For example, the PA port normally has delay slots for unconditional
2920 jumps.
2922 However, the PA port claims such jumps do not have a delay slot
2923 if they are immediate successors of certain CALL_INSNs. This
2924 allows the port to favor filling the delay slot of the call with
2925 the unconditional jump. */
2926 if (slots_to_fill == 0)
2927 continue;
2929 slots_filled = 0;
2930 target_label = JUMP_LABEL (insn);
2931 condition = get_branch_condition (insn, target_label);
2933 if (condition == 0)
2934 continue;
2936 /* Get the next active fallthrough and target insns and see if we own
2937 them. Then see whether the branch is likely true. We don't need
2938 to do a lot of this for unconditional branches. */
2940 insn_at_target = next_active_insn (target_label);
2941 own_target = own_thread_p (target_label, target_label, 0);
2943 if (condition == const_true_rtx)
2945 own_fallthrough = 0;
2946 fallthrough_insn = 0;
2947 prediction = 2;
2949 else
2951 fallthrough_insn = next_active_insn (insn);
2952 own_fallthrough = own_thread_p (NEXT_INSN (insn), NULL_RTX, 1);
2953 prediction = mostly_true_jump (insn, condition);
2956 /* If this insn is expected to branch, first try to get insns from our
2957 target, then our fallthrough insns. If it is not, expected to branch,
2958 try the other order. */
2960 if (prediction > 0)
2962 delay_list
2963 = fill_slots_from_thread (insn, condition, insn_at_target,
2964 fallthrough_insn, prediction == 2, 1,
2965 own_target,
2966 slots_to_fill, &slots_filled, delay_list);
2968 if (delay_list == 0 && own_fallthrough)
2970 /* Even though we didn't find anything for delay slots,
2971 we might have found a redundant insn which we deleted
2972 from the thread that was filled. So we have to recompute
2973 the next insn at the target. */
2974 target_label = JUMP_LABEL (insn);
2975 insn_at_target = next_active_insn (target_label);
2977 delay_list
2978 = fill_slots_from_thread (insn, condition, fallthrough_insn,
2979 insn_at_target, 0, 0,
2980 own_fallthrough,
2981 slots_to_fill, &slots_filled,
2982 delay_list);
2985 else
2987 if (own_fallthrough)
2988 delay_list
2989 = fill_slots_from_thread (insn, condition, fallthrough_insn,
2990 insn_at_target, 0, 0,
2991 own_fallthrough,
2992 slots_to_fill, &slots_filled,
2993 delay_list);
2995 if (delay_list == 0)
2996 delay_list
2997 = fill_slots_from_thread (insn, condition, insn_at_target,
2998 next_active_insn (insn), 0, 1,
2999 own_target,
3000 slots_to_fill, &slots_filled,
3001 delay_list);
3004 if (delay_list)
3005 unfilled_slots_base[i]
3006 = emit_delay_sequence (insn, delay_list, slots_filled);
3008 if (slots_to_fill == slots_filled)
3009 unfilled_slots_base[i] = 0;
3011 note_delay_statistics (slots_filled, 1);
3015 /* Once we have tried two ways to fill a delay slot, make a pass over the
3016 code to try to improve the results and to do such things as more jump
3017 threading. */
3019 static void
3020 relax_delay_slots (first)
3021 rtx first;
3023 register rtx insn, next, pat;
3024 register rtx trial, delay_insn, target_label;
3026 /* Look at every JUMP_INSN and see if we can improve it. */
3027 for (insn = first; insn; insn = next)
3029 rtx other;
3031 next = next_active_insn (insn);
3033 /* If this is a jump insn, see if it now jumps to a jump, jumps to
3034 the next insn, or jumps to a label that is not the last of a
3035 group of consecutive labels. */
3036 if (GET_CODE (insn) == JUMP_INSN
3037 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3038 && (target_label = JUMP_LABEL (insn)) != 0)
3040 target_label = follow_jumps (target_label);
3041 target_label = prev_label (next_active_insn (target_label));
3043 if (target_label == 0)
3044 target_label = find_end_label ();
3046 if (next_active_insn (target_label) == next
3047 && ! condjump_in_parallel_p (insn))
3049 delete_jump (insn);
3050 continue;
3053 if (target_label != JUMP_LABEL (insn))
3054 reorg_redirect_jump (insn, target_label);
3056 /* See if this jump branches around a unconditional jump.
3057 If so, invert this jump and point it to the target of the
3058 second jump. */
3059 if (next && GET_CODE (next) == JUMP_INSN
3060 && (simplejump_p (next) || GET_CODE (PATTERN (next)) == RETURN)
3061 && next_active_insn (target_label) == next_active_insn (next)
3062 && no_labels_between_p (insn, next))
3064 rtx label = JUMP_LABEL (next);
3066 /* Be careful how we do this to avoid deleting code or
3067 labels that are momentarily dead. See similar optimization
3068 in jump.c.
3070 We also need to ensure we properly handle the case when
3071 invert_jump fails. */
3073 ++LABEL_NUSES (target_label);
3074 if (label)
3075 ++LABEL_NUSES (label);
3077 if (invert_jump (insn, label))
3079 delete_insn (next);
3080 next = insn;
3083 if (label)
3084 --LABEL_NUSES (label);
3086 if (--LABEL_NUSES (target_label) == 0)
3087 delete_insn (target_label);
3089 continue;
3093 /* If this is an unconditional jump and the previous insn is a
3094 conditional jump, try reversing the condition of the previous
3095 insn and swapping our targets. The next pass might be able to
3096 fill the slots.
3098 Don't do this if we expect the conditional branch to be true, because
3099 we would then be making the more common case longer. */
3101 if (GET_CODE (insn) == JUMP_INSN
3102 && (simplejump_p (insn) || GET_CODE (PATTERN (insn)) == RETURN)
3103 && (other = prev_active_insn (insn)) != 0
3104 && (condjump_p (other) || condjump_in_parallel_p (other))
3105 && no_labels_between_p (other, insn)
3106 && 0 > mostly_true_jump (other,
3107 get_branch_condition (other,
3108 JUMP_LABEL (other))))
3110 rtx other_target = JUMP_LABEL (other);
3111 target_label = JUMP_LABEL (insn);
3113 /* Increment the count of OTHER_TARGET, so it doesn't get deleted
3114 as we move the label. */
3115 if (other_target)
3116 ++LABEL_NUSES (other_target);
3118 if (invert_jump (other, target_label))
3119 reorg_redirect_jump (insn, other_target);
3121 if (other_target)
3122 --LABEL_NUSES (other_target);
3125 /* Now look only at cases where we have filled a delay slot. */
3126 if (GET_CODE (insn) != INSN
3127 || GET_CODE (PATTERN (insn)) != SEQUENCE)
3128 continue;
3130 pat = PATTERN (insn);
3131 delay_insn = XVECEXP (pat, 0, 0);
3133 /* See if the first insn in the delay slot is redundant with some
3134 previous insn. Remove it from the delay slot if so; then set up
3135 to reprocess this insn. */
3136 if (redundant_insn (XVECEXP (pat, 0, 1), delay_insn, 0))
3138 delete_from_delay_slot (XVECEXP (pat, 0, 1));
3139 next = prev_active_insn (next);
3140 continue;
3143 /* See if we have a RETURN insn with a filled delay slot followed
3144 by a RETURN insn with an unfilled a delay slot. If so, we can delete
3145 the first RETURN (but not it's delay insn). This gives the same
3146 effect in fewer instructions.
3148 Only do so if optimizing for size since this results in slower, but
3149 smaller code. */
3150 if (optimize_size
3151 && GET_CODE (PATTERN (delay_insn)) == RETURN
3152 && next
3153 && GET_CODE (next) == JUMP_INSN
3154 && GET_CODE (PATTERN (next)) == RETURN)
3156 int i;
3158 /* Delete the RETURN and just execute the delay list insns.
3160 We do this by deleting the INSN containing the SEQUENCE, then
3161 re-emitting the insns separately, and then deleting the RETURN.
3162 This allows the count of the jump target to be properly
3163 decremented. */
3165 /* Clear the from target bit, since these insns are no longer
3166 in delay slots. */
3167 for (i = 0; i < XVECLEN (pat, 0); i++)
3168 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3170 trial = PREV_INSN (insn);
3171 delete_insn (insn);
3172 emit_insn_after (pat, trial);
3173 delete_scheduled_jump (delay_insn);
3174 continue;
3177 /* Now look only at the cases where we have a filled JUMP_INSN. */
3178 if (GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) != JUMP_INSN
3179 || ! (condjump_p (XVECEXP (PATTERN (insn), 0, 0))
3180 || condjump_in_parallel_p (XVECEXP (PATTERN (insn), 0, 0))))
3181 continue;
3183 target_label = JUMP_LABEL (delay_insn);
3185 if (target_label)
3187 /* If this jump goes to another unconditional jump, thread it, but
3188 don't convert a jump into a RETURN here. */
3189 trial = follow_jumps (target_label);
3190 /* We use next_real_insn instead of next_active_insn, so that
3191 the special USE insns emitted by reorg won't be ignored.
3192 If they are ignored, then they will get deleted if target_label
3193 is now unreachable, and that would cause mark_target_live_regs
3194 to fail. */
3195 trial = prev_label (next_real_insn (trial));
3196 if (trial == 0 && target_label != 0)
3197 trial = find_end_label ();
3199 if (trial != target_label
3200 && redirect_with_delay_slots_safe_p (delay_insn, trial, insn))
3202 reorg_redirect_jump (delay_insn, trial);
3203 target_label = trial;
3206 /* If the first insn at TARGET_LABEL is redundant with a previous
3207 insn, redirect the jump to the following insn process again. */
3208 trial = next_active_insn (target_label);
3209 if (trial && GET_CODE (PATTERN (trial)) != SEQUENCE
3210 && redundant_insn (trial, insn, 0))
3212 rtx tmp;
3214 /* Figure out where to emit the special USE insn so we don't
3215 later incorrectly compute register live/death info. */
3216 tmp = next_active_insn (trial);
3217 if (tmp == 0)
3218 tmp = find_end_label ();
3220 /* Insert the special USE insn and update dataflow info. */
3221 update_block (trial, tmp);
3223 /* Now emit a label before the special USE insn, and
3224 redirect our jump to the new label. */
3225 target_label = get_label_before (PREV_INSN (tmp));
3226 reorg_redirect_jump (delay_insn, target_label);
3227 next = insn;
3228 continue;
3231 /* Similarly, if it is an unconditional jump with one insn in its
3232 delay list and that insn is redundant, thread the jump. */
3233 if (trial && GET_CODE (PATTERN (trial)) == SEQUENCE
3234 && XVECLEN (PATTERN (trial), 0) == 2
3235 && GET_CODE (XVECEXP (PATTERN (trial), 0, 0)) == JUMP_INSN
3236 && (simplejump_p (XVECEXP (PATTERN (trial), 0, 0))
3237 || GET_CODE (PATTERN (XVECEXP (PATTERN (trial), 0, 0))) == RETURN)
3238 && redundant_insn (XVECEXP (PATTERN (trial), 0, 1), insn, 0))
3240 target_label = JUMP_LABEL (XVECEXP (PATTERN (trial), 0, 0));
3241 if (target_label == 0)
3242 target_label = find_end_label ();
3244 if (redirect_with_delay_slots_safe_p (delay_insn, target_label,
3245 insn))
3247 reorg_redirect_jump (delay_insn, target_label);
3248 next = insn;
3249 continue;
3254 if (! INSN_ANNULLED_BRANCH_P (delay_insn)
3255 && prev_active_insn (target_label) == insn
3256 && ! condjump_in_parallel_p (delay_insn)
3257 #ifdef HAVE_cc0
3258 /* If the last insn in the delay slot sets CC0 for some insn,
3259 various code assumes that it is in a delay slot. We could
3260 put it back where it belonged and delete the register notes,
3261 but it doesn't seem worthwhile in this uncommon case. */
3262 && ! find_reg_note (XVECEXP (pat, 0, XVECLEN (pat, 0) - 1),
3263 REG_CC_USER, NULL_RTX)
3264 #endif
3267 int i;
3269 /* All this insn does is execute its delay list and jump to the
3270 following insn. So delete the jump and just execute the delay
3271 list insns.
3273 We do this by deleting the INSN containing the SEQUENCE, then
3274 re-emitting the insns separately, and then deleting the jump.
3275 This allows the count of the jump target to be properly
3276 decremented. */
3278 /* Clear the from target bit, since these insns are no longer
3279 in delay slots. */
3280 for (i = 0; i < XVECLEN (pat, 0); i++)
3281 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3283 trial = PREV_INSN (insn);
3284 delete_insn (insn);
3285 emit_insn_after (pat, trial);
3286 delete_scheduled_jump (delay_insn);
3287 continue;
3290 /* See if this is an unconditional jump around a single insn which is
3291 identical to the one in its delay slot. In this case, we can just
3292 delete the branch and the insn in its delay slot. */
3293 if (next && GET_CODE (next) == INSN
3294 && prev_label (next_active_insn (next)) == target_label
3295 && simplejump_p (insn)
3296 && XVECLEN (pat, 0) == 2
3297 && rtx_equal_p (PATTERN (next), PATTERN (XVECEXP (pat, 0, 1))))
3299 delete_insn (insn);
3300 continue;
3303 /* See if this jump (with its delay slots) branches around another
3304 jump (without delay slots). If so, invert this jump and point
3305 it to the target of the second jump. We cannot do this for
3306 annulled jumps, though. Again, don't convert a jump to a RETURN
3307 here. */
3308 if (! INSN_ANNULLED_BRANCH_P (delay_insn)
3309 && next && GET_CODE (next) == JUMP_INSN
3310 && (simplejump_p (next) || GET_CODE (PATTERN (next)) == RETURN)
3311 && next_active_insn (target_label) == next_active_insn (next)
3312 && no_labels_between_p (insn, next))
3314 rtx label = JUMP_LABEL (next);
3315 rtx old_label = JUMP_LABEL (delay_insn);
3317 if (label == 0)
3318 label = find_end_label ();
3320 if (redirect_with_delay_slots_safe_p (delay_insn, label, insn))
3322 /* Be careful how we do this to avoid deleting code or labels
3323 that are momentarily dead. See similar optimization in
3324 jump.c */
3325 if (old_label)
3326 ++LABEL_NUSES (old_label);
3328 if (invert_jump (delay_insn, label))
3330 int i;
3332 /* Must update the INSN_FROM_TARGET_P bits now that
3333 the branch is reversed, so that mark_target_live_regs
3334 will handle the delay slot insn correctly. */
3335 for (i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
3337 rtx slot = XVECEXP (PATTERN (insn), 0, i);
3338 INSN_FROM_TARGET_P (slot) = ! INSN_FROM_TARGET_P (slot);
3341 delete_insn (next);
3342 next = insn;
3345 if (old_label && --LABEL_NUSES (old_label) == 0)
3346 delete_insn (old_label);
3347 continue;
3351 /* If we own the thread opposite the way this insn branches, see if we
3352 can merge its delay slots with following insns. */
3353 if (INSN_FROM_TARGET_P (XVECEXP (pat, 0, 1))
3354 && own_thread_p (NEXT_INSN (insn), 0, 1))
3355 try_merge_delay_insns (insn, next);
3356 else if (! INSN_FROM_TARGET_P (XVECEXP (pat, 0, 1))
3357 && own_thread_p (target_label, target_label, 0))
3358 try_merge_delay_insns (insn, next_active_insn (target_label));
3360 /* If we get here, we haven't deleted INSN. But we may have deleted
3361 NEXT, so recompute it. */
3362 next = next_active_insn (insn);
3366 #ifdef HAVE_return
3368 /* Look for filled jumps to the end of function label. We can try to convert
3369 them into RETURN insns if the insns in the delay slot are valid for the
3370 RETURN as well. */
3372 static void
3373 make_return_insns (first)
3374 rtx first;
3376 rtx insn, jump_insn, pat;
3377 rtx real_return_label = end_of_function_label;
3378 int slots, i;
3380 /* See if there is a RETURN insn in the function other than the one we
3381 made for END_OF_FUNCTION_LABEL. If so, set up anything we can't change
3382 into a RETURN to jump to it. */
3383 for (insn = first; insn; insn = NEXT_INSN (insn))
3384 if (GET_CODE (insn) == JUMP_INSN && GET_CODE (PATTERN (insn)) == RETURN)
3386 real_return_label = get_label_before (insn);
3387 break;
3390 /* Show an extra usage of REAL_RETURN_LABEL so it won't go away if it
3391 was equal to END_OF_FUNCTION_LABEL. */
3392 LABEL_NUSES (real_return_label)++;
3394 /* Clear the list of insns to fill so we can use it. */
3395 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3397 for (insn = first; insn; insn = NEXT_INSN (insn))
3399 int flags;
3401 /* Only look at filled JUMP_INSNs that go to the end of function
3402 label. */
3403 if (GET_CODE (insn) != INSN
3404 || GET_CODE (PATTERN (insn)) != SEQUENCE
3405 || GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) != JUMP_INSN
3406 || JUMP_LABEL (XVECEXP (PATTERN (insn), 0, 0)) != end_of_function_label)
3407 continue;
3409 pat = PATTERN (insn);
3410 jump_insn = XVECEXP (pat, 0, 0);
3412 /* If we can't make the jump into a RETURN, try to redirect it to the best
3413 RETURN and go on to the next insn. */
3414 if (! reorg_redirect_jump (jump_insn, NULL_RTX))
3416 /* Make sure redirecting the jump will not invalidate the delay
3417 slot insns. */
3418 if (redirect_with_delay_slots_safe_p (jump_insn,
3419 real_return_label,
3420 insn))
3421 reorg_redirect_jump (jump_insn, real_return_label);
3422 continue;
3425 /* See if this RETURN can accept the insns current in its delay slot.
3426 It can if it has more or an equal number of slots and the contents
3427 of each is valid. */
3429 flags = get_jump_flags (jump_insn, JUMP_LABEL (jump_insn));
3430 slots = num_delay_slots (jump_insn);
3431 if (slots >= XVECLEN (pat, 0) - 1)
3433 for (i = 1; i < XVECLEN (pat, 0); i++)
3434 if (! (
3435 #ifdef ANNUL_IFFALSE_SLOTS
3436 (INSN_ANNULLED_BRANCH_P (jump_insn)
3437 && INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
3438 ? eligible_for_annul_false (jump_insn, i - 1,
3439 XVECEXP (pat, 0, i), flags) :
3440 #endif
3441 #ifdef ANNUL_IFTRUE_SLOTS
3442 (INSN_ANNULLED_BRANCH_P (jump_insn)
3443 && ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
3444 ? eligible_for_annul_true (jump_insn, i - 1,
3445 XVECEXP (pat, 0, i), flags) :
3446 #endif
3447 eligible_for_delay (jump_insn, i -1, XVECEXP (pat, 0, i), flags)))
3448 break;
3450 else
3451 i = 0;
3453 if (i == XVECLEN (pat, 0))
3454 continue;
3456 /* We have to do something with this insn. If it is an unconditional
3457 RETURN, delete the SEQUENCE and output the individual insns,
3458 followed by the RETURN. Then set things up so we try to find
3459 insns for its delay slots, if it needs some. */
3460 if (GET_CODE (PATTERN (jump_insn)) == RETURN)
3462 rtx prev = PREV_INSN (insn);
3464 delete_insn (insn);
3465 for (i = 1; i < XVECLEN (pat, 0); i++)
3466 prev = emit_insn_after (PATTERN (XVECEXP (pat, 0, i)), prev);
3468 insn = emit_jump_insn_after (PATTERN (jump_insn), prev);
3469 emit_barrier_after (insn);
3471 if (slots)
3472 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3474 else
3475 /* It is probably more efficient to keep this with its current
3476 delay slot as a branch to a RETURN. */
3477 reorg_redirect_jump (jump_insn, real_return_label);
3480 /* Now delete REAL_RETURN_LABEL if we never used it. Then try to fill any
3481 new delay slots we have created. */
3482 if (--LABEL_NUSES (real_return_label) == 0)
3483 delete_insn (real_return_label);
3485 fill_simple_delay_slots (1);
3486 fill_simple_delay_slots (0);
3488 #endif
3490 /* Try to find insns to place in delay slots. */
3492 void
3493 dbr_schedule (first, file)
3494 rtx first;
3495 FILE *file;
3497 rtx insn, next, epilogue_insn = 0;
3498 int i;
3499 #if 0
3500 int old_flag_no_peephole = flag_no_peephole;
3502 /* Execute `final' once in prescan mode to delete any insns that won't be
3503 used. Don't let final try to do any peephole optimization--it will
3504 ruin dataflow information for this pass. */
3506 flag_no_peephole = 1;
3507 final (first, 0, NO_DEBUG, 1, 1);
3508 flag_no_peephole = old_flag_no_peephole;
3509 #endif
3511 /* If the current function has no insns other than the prologue and
3512 epilogue, then do not try to fill any delay slots. */
3513 if (n_basic_blocks == 0)
3514 return;
3516 /* Find the highest INSN_UID and allocate and initialize our map from
3517 INSN_UID's to position in code. */
3518 for (max_uid = 0, insn = first; insn; insn = NEXT_INSN (insn))
3520 if (INSN_UID (insn) > max_uid)
3521 max_uid = INSN_UID (insn);
3522 if (GET_CODE (insn) == NOTE
3523 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_EPILOGUE_BEG)
3524 epilogue_insn = insn;
3527 uid_to_ruid = (int *) alloca ((max_uid + 1) * sizeof (int));
3528 for (i = 0, insn = first; insn; i++, insn = NEXT_INSN (insn))
3529 uid_to_ruid[INSN_UID (insn)] = i;
3531 /* Initialize the list of insns that need filling. */
3532 if (unfilled_firstobj == 0)
3534 gcc_obstack_init (&unfilled_slots_obstack);
3535 unfilled_firstobj = (rtx *) obstack_alloc (&unfilled_slots_obstack, 0);
3538 for (insn = next_active_insn (first); insn; insn = next_active_insn (insn))
3540 rtx target;
3542 INSN_ANNULLED_BRANCH_P (insn) = 0;
3543 INSN_FROM_TARGET_P (insn) = 0;
3545 /* Skip vector tables. We can't get attributes for them. */
3546 if (GET_CODE (insn) == JUMP_INSN
3547 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
3548 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
3549 continue;
3551 if (num_delay_slots (insn) > 0)
3552 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3554 /* Ensure all jumps go to the last of a set of consecutive labels. */
3555 if (GET_CODE (insn) == JUMP_INSN
3556 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3557 && JUMP_LABEL (insn) != 0
3558 && ((target = prev_label (next_active_insn (JUMP_LABEL (insn))))
3559 != JUMP_LABEL (insn)))
3560 redirect_jump (insn, target);
3563 init_resource_info (epilogue_insn);
3565 /* Show we haven't computed an end-of-function label yet. */
3566 end_of_function_label = 0;
3568 /* Initialize the statistics for this function. */
3569 bzero ((char *) num_insns_needing_delays, sizeof num_insns_needing_delays);
3570 bzero ((char *) num_filled_delays, sizeof num_filled_delays);
3572 /* Now do the delay slot filling. Try everything twice in case earlier
3573 changes make more slots fillable. */
3575 for (reorg_pass_number = 0;
3576 reorg_pass_number < MAX_REORG_PASSES;
3577 reorg_pass_number++)
3579 fill_simple_delay_slots (1);
3580 fill_simple_delay_slots (0);
3581 fill_eager_delay_slots ();
3582 relax_delay_slots (first);
3585 /* Delete any USE insns made by update_block; subsequent passes don't need
3586 them or know how to deal with them. */
3587 for (insn = first; insn; insn = next)
3589 next = NEXT_INSN (insn);
3591 if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == USE
3592 && GET_RTX_CLASS (GET_CODE (XEXP (PATTERN (insn), 0))) == 'i')
3593 next = delete_insn (insn);
3596 /* If we made an end of function label, indicate that it is now
3597 safe to delete it by undoing our prior adjustment to LABEL_NUSES.
3598 If it is now unused, delete it. */
3599 if (end_of_function_label && --LABEL_NUSES (end_of_function_label) == 0)
3600 delete_insn (end_of_function_label);
3602 #ifdef HAVE_return
3603 if (HAVE_return && end_of_function_label != 0)
3604 make_return_insns (first);
3605 #endif
3607 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3609 /* It is not clear why the line below is needed, but it does seem to be. */
3610 unfilled_firstobj = (rtx *) obstack_alloc (&unfilled_slots_obstack, 0);
3612 /* Reposition the prologue and epilogue notes in case we moved the
3613 prologue/epilogue insns. */
3614 reposition_prologue_and_epilogue_notes (first);
3616 if (file)
3618 register int i, j, need_comma;
3620 for (reorg_pass_number = 0;
3621 reorg_pass_number < MAX_REORG_PASSES;
3622 reorg_pass_number++)
3624 fprintf (file, ";; Reorg pass #%d:\n", reorg_pass_number + 1);
3625 for (i = 0; i < NUM_REORG_FUNCTIONS; i++)
3627 need_comma = 0;
3628 fprintf (file, ";; Reorg function #%d\n", i);
3630 fprintf (file, ";; %d insns needing delay slots\n;; ",
3631 num_insns_needing_delays[i][reorg_pass_number]);
3633 for (j = 0; j < MAX_DELAY_HISTOGRAM; j++)
3634 if (num_filled_delays[i][j][reorg_pass_number])
3636 if (need_comma)
3637 fprintf (file, ", ");
3638 need_comma = 1;
3639 fprintf (file, "%d got %d delays",
3640 num_filled_delays[i][j][reorg_pass_number], j);
3642 fprintf (file, "\n");
3647 /* For all JUMP insns, fill in branch prediction notes, so that during
3648 assembler output a target can set branch prediction bits in the code.
3649 We have to do this now, as up until this point the destinations of
3650 JUMPS can be moved around and changed, but past right here that cannot
3651 happen. */
3652 for (insn = first; insn; insn = NEXT_INSN (insn))
3654 int pred_flags;
3656 if (GET_CODE (insn) == INSN)
3658 rtx pat = PATTERN (insn);
3660 if (GET_CODE (pat) == SEQUENCE)
3661 insn = XVECEXP (pat, 0, 0);
3663 if (GET_CODE (insn) != JUMP_INSN)
3664 continue;
3666 pred_flags = get_jump_flags (insn, JUMP_LABEL (insn));
3667 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_BR_PRED,
3668 GEN_INT (pred_flags),
3669 REG_NOTES (insn));
3671 free_resource_info ();
3673 #endif /* DELAY_SLOTS */