* omp-low.c (lower_omp_target): Remove unreachable code & merge
[official-gcc.git] / gcc / reorg.c
blob239f59d93daa61230dcf93117ea21191b5591760
1 /* Perform instruction reorganizations for delay slot filling.
2 Copyright (C) 1992-2015 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu).
4 Hacked by Michael Tiemann (tiemann@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Instruction reorganization pass.
24 This pass runs after register allocation and final jump
25 optimization. It should be the last pass to run before peephole.
26 It serves primarily to fill delay slots of insns, typically branch
27 and call insns. Other insns typically involve more complicated
28 interactions of data dependencies and resource constraints, and
29 are better handled by scheduling before register allocation (by the
30 function `schedule_insns').
32 The Branch Penalty is the number of extra cycles that are needed to
33 execute a branch insn. On an ideal machine, branches take a single
34 cycle, and the Branch Penalty is 0. Several RISC machines approach
35 branch delays differently:
37 The MIPS has a single branch delay slot. Most insns
38 (except other branches) can be used to fill this slot. When the
39 slot is filled, two insns execute in two cycles, reducing the
40 branch penalty to zero.
42 The SPARC always has a branch delay slot, but its effects can be
43 annulled when the branch is not taken. This means that failing to
44 find other sources of insns, we can hoist an insn from the branch
45 target that would only be safe to execute knowing that the branch
46 is taken.
48 The HP-PA always has a branch delay slot. For unconditional branches
49 its effects can be annulled when the branch is taken. The effects
50 of the delay slot in a conditional branch can be nullified for forward
51 taken branches, or for untaken backward branches. This means
52 we can hoist insns from the fall-through path for forward branches or
53 steal insns from the target of backward branches.
55 The TMS320C3x and C4x have three branch delay slots. When the three
56 slots are filled, the branch penalty is zero. Most insns can fill the
57 delay slots except jump insns.
59 Three techniques for filling delay slots have been implemented so far:
61 (1) `fill_simple_delay_slots' is the simplest, most efficient way
62 to fill delay slots. This pass first looks for insns which come
63 from before the branch and which are safe to execute after the
64 branch. Then it searches after the insn requiring delay slots or,
65 in the case of a branch, for insns that are after the point at
66 which the branch merges into the fallthrough code, if such a point
67 exists. When such insns are found, the branch penalty decreases
68 and no code expansion takes place.
70 (2) `fill_eager_delay_slots' is more complicated: it is used for
71 scheduling conditional jumps, or for scheduling jumps which cannot
72 be filled using (1). A machine need not have annulled jumps to use
73 this strategy, but it helps (by keeping more options open).
74 `fill_eager_delay_slots' tries to guess the direction the branch
75 will go; if it guesses right 100% of the time, it can reduce the
76 branch penalty as much as `fill_simple_delay_slots' does. If it
77 guesses wrong 100% of the time, it might as well schedule nops. When
78 `fill_eager_delay_slots' takes insns from the fall-through path of
79 the jump, usually there is no code expansion; when it takes insns
80 from the branch target, there is code expansion if it is not the
81 only way to reach that target.
83 (3) `relax_delay_slots' uses a set of rules to simplify code that
84 has been reorganized by (1) and (2). It finds cases where
85 conditional test can be eliminated, jumps can be threaded, extra
86 insns can be eliminated, etc. It is the job of (1) and (2) to do a
87 good job of scheduling locally; `relax_delay_slots' takes care of
88 making the various individual schedules work well together. It is
89 especially tuned to handle the control flow interactions of branch
90 insns. It does nothing for insns with delay slots that do not
91 branch.
93 On machines that use CC0, we are very conservative. We will not make
94 a copy of an insn involving CC0 since we want to maintain a 1-1
95 correspondence between the insn that sets and uses CC0. The insns are
96 allowed to be separated by placing an insn that sets CC0 (but not an insn
97 that uses CC0; we could do this, but it doesn't seem worthwhile) in a
98 delay slot. In that case, we point each insn at the other with REG_CC_USER
99 and REG_CC_SETTER notes. Note that these restrictions affect very few
100 machines because most RISC machines with delay slots will not use CC0
101 (the RT is the only known exception at this point). */
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "backend.h"
107 #include "target.h"
108 #include "rtl.h"
109 #include "tree.h"
110 #include "predict.h"
111 #include "df.h"
112 #include "tm_p.h"
113 #include "expmed.h"
114 #include "insn-config.h"
115 #include "regs.h"
116 #include "emit-rtl.h"
117 #include "recog.h"
118 #include "diagnostic-core.h"
119 #include "flags.h"
120 #include "alias.h"
121 #include "dojump.h"
122 #include "explow.h"
123 #include "calls.h"
124 #include "varasm.h"
125 #include "stmt.h"
126 #include "expr.h"
127 #include "conditions.h"
128 #include "insn-attr.h"
129 #include "resource.h"
130 #include "except.h"
131 #include "params.h"
132 #include "tree-pass.h"
135 /* First, some functions that were used before GCC got a control flow graph.
136 These functions are now only used here in reorg.c, and have therefore
137 been moved here to avoid inadvertent misuse elsewhere in the compiler. */
139 /* Return the last label to mark the same position as LABEL. Return LABEL
140 itself if it is null or any return rtx. */
142 static rtx
143 skip_consecutive_labels (rtx label_or_return)
145 rtx_insn *insn;
147 if (label_or_return && ANY_RETURN_P (label_or_return))
148 return label_or_return;
150 rtx_insn *label = as_a <rtx_insn *> (label_or_return);
152 for (insn = label; insn != 0 && !INSN_P (insn); insn = NEXT_INSN (insn))
153 if (LABEL_P (insn))
154 label = insn;
156 return label;
159 /* INSN uses CC0 and is being moved into a delay slot. Set up REG_CC_SETTER
160 and REG_CC_USER notes so we can find it. */
162 static void
163 link_cc0_insns (rtx_insn *insn)
165 rtx user = next_nonnote_insn (insn);
167 if (NONJUMP_INSN_P (user) && GET_CODE (PATTERN (user)) == SEQUENCE)
168 user = XVECEXP (PATTERN (user), 0, 0);
170 add_reg_note (user, REG_CC_SETTER, insn);
171 add_reg_note (insn, REG_CC_USER, user);
174 /* Insns which have delay slots that have not yet been filled. */
176 static struct obstack unfilled_slots_obstack;
177 static rtx *unfilled_firstobj;
179 /* Define macros to refer to the first and last slot containing unfilled
180 insns. These are used because the list may move and its address
181 should be recomputed at each use. */
183 #define unfilled_slots_base \
184 ((rtx_insn **) obstack_base (&unfilled_slots_obstack))
186 #define unfilled_slots_next \
187 ((rtx_insn **) obstack_next_free (&unfilled_slots_obstack))
189 /* Points to the label before the end of the function, or before a
190 return insn. */
191 static rtx_code_label *function_return_label;
192 /* Likewise for a simple_return. */
193 static rtx_code_label *function_simple_return_label;
195 /* Mapping between INSN_UID's and position in the code since INSN_UID's do
196 not always monotonically increase. */
197 static int *uid_to_ruid;
199 /* Highest valid index in `uid_to_ruid'. */
200 static int max_uid;
202 static int stop_search_p (rtx_insn *, int);
203 static int resource_conflicts_p (struct resources *, struct resources *);
204 static int insn_references_resource_p (rtx, struct resources *, bool);
205 static int insn_sets_resource_p (rtx, struct resources *, bool);
206 static rtx_code_label *find_end_label (rtx);
207 static rtx_insn *emit_delay_sequence (rtx_insn *, const vec<rtx_insn *> &,
208 int);
209 static void add_to_delay_list (rtx_insn *, vec<rtx_insn *> *);
210 static rtx_insn *delete_from_delay_slot (rtx_insn *);
211 static void delete_scheduled_jump (rtx_insn *);
212 static void note_delay_statistics (int, int);
213 static int get_jump_flags (const rtx_insn *, rtx);
214 static int mostly_true_jump (rtx);
215 static rtx get_branch_condition (const rtx_insn *, rtx);
216 static int condition_dominates_p (rtx, const rtx_insn *);
217 static int redirect_with_delay_slots_safe_p (rtx_insn *, rtx, rtx);
218 static int redirect_with_delay_list_safe_p (rtx_insn *, rtx,
219 const vec<rtx_insn *> &);
220 static int check_annul_list_true_false (int, const vec<rtx_insn *> &);
221 static void steal_delay_list_from_target (rtx_insn *, rtx, rtx_sequence *,
222 vec<rtx_insn *> *,
223 struct resources *,
224 struct resources *,
225 struct resources *,
226 int, int *, int *,
227 rtx *);
228 static void steal_delay_list_from_fallthrough (rtx_insn *, rtx, rtx_sequence *,
229 vec<rtx_insn *> *,
230 struct resources *,
231 struct resources *,
232 struct resources *,
233 int, int *, int *);
234 static void try_merge_delay_insns (rtx_insn *, rtx_insn *);
235 static rtx redundant_insn (rtx, rtx_insn *, const vec<rtx_insn *> &);
236 static int own_thread_p (rtx, rtx, int);
237 static void update_block (rtx_insn *, rtx);
238 static int reorg_redirect_jump (rtx_jump_insn *, rtx);
239 static void update_reg_dead_notes (rtx_insn *, rtx_insn *);
240 static void fix_reg_dead_note (rtx, rtx);
241 static void update_reg_unused_notes (rtx, rtx);
242 static void fill_simple_delay_slots (int);
243 static void fill_slots_from_thread (rtx_jump_insn *, rtx, rtx, rtx,
244 int, int, int, int,
245 int *, vec<rtx_insn *> *);
246 static void fill_eager_delay_slots (void);
247 static void relax_delay_slots (rtx_insn *);
248 static void make_return_insns (rtx_insn *);
250 /* A wrapper around next_active_insn which takes care to return ret_rtx
251 unchanged. */
253 static rtx
254 first_active_target_insn (rtx insn)
256 if (ANY_RETURN_P (insn))
257 return insn;
258 return next_active_insn (as_a <rtx_insn *> (insn));
261 /* Return true iff INSN is a simplejump, or any kind of return insn. */
263 static bool
264 simplejump_or_return_p (rtx insn)
266 return (JUMP_P (insn)
267 && (simplejump_p (as_a <rtx_insn *> (insn))
268 || ANY_RETURN_P (PATTERN (insn))));
271 /* Return TRUE if this insn should stop the search for insn to fill delay
272 slots. LABELS_P indicates that labels should terminate the search.
273 In all cases, jumps terminate the search. */
275 static int
276 stop_search_p (rtx_insn *insn, int labels_p)
278 if (insn == 0)
279 return 1;
281 /* If the insn can throw an exception that is caught within the function,
282 it may effectively perform a jump from the viewpoint of the function.
283 Therefore act like for a jump. */
284 if (can_throw_internal (insn))
285 return 1;
287 switch (GET_CODE (insn))
289 case NOTE:
290 case CALL_INSN:
291 return 0;
293 case CODE_LABEL:
294 return labels_p;
296 case JUMP_INSN:
297 case BARRIER:
298 return 1;
300 case INSN:
301 /* OK unless it contains a delay slot or is an `asm' insn of some type.
302 We don't know anything about these. */
303 return (GET_CODE (PATTERN (insn)) == SEQUENCE
304 || GET_CODE (PATTERN (insn)) == ASM_INPUT
305 || asm_noperands (PATTERN (insn)) >= 0);
307 default:
308 gcc_unreachable ();
312 /* Return TRUE if any resources are marked in both RES1 and RES2 or if either
313 resource set contains a volatile memory reference. Otherwise, return FALSE. */
315 static int
316 resource_conflicts_p (struct resources *res1, struct resources *res2)
318 if ((res1->cc && res2->cc) || (res1->memory && res2->memory)
319 || res1->volatil || res2->volatil)
320 return 1;
322 return hard_reg_set_intersect_p (res1->regs, res2->regs);
325 /* Return TRUE if any resource marked in RES, a `struct resources', is
326 referenced by INSN. If INCLUDE_DELAYED_EFFECTS is set, return if the called
327 routine is using those resources.
329 We compute this by computing all the resources referenced by INSN and
330 seeing if this conflicts with RES. It might be faster to directly check
331 ourselves, and this is the way it used to work, but it means duplicating
332 a large block of complex code. */
334 static int
335 insn_references_resource_p (rtx insn, struct resources *res,
336 bool include_delayed_effects)
338 struct resources insn_res;
340 CLEAR_RESOURCE (&insn_res);
341 mark_referenced_resources (insn, &insn_res, include_delayed_effects);
342 return resource_conflicts_p (&insn_res, res);
345 /* Return TRUE if INSN modifies resources that are marked in RES.
346 INCLUDE_DELAYED_EFFECTS is set if the actions of that routine should be
347 included. CC0 is only modified if it is explicitly set; see comments
348 in front of mark_set_resources for details. */
350 static int
351 insn_sets_resource_p (rtx insn, struct resources *res,
352 bool include_delayed_effects)
354 struct resources insn_sets;
356 CLEAR_RESOURCE (&insn_sets);
357 mark_set_resources (insn, &insn_sets, 0,
358 (include_delayed_effects
359 ? MARK_SRC_DEST_CALL
360 : MARK_SRC_DEST));
361 return resource_conflicts_p (&insn_sets, res);
364 /* Find a label at the end of the function or before a RETURN. If there
365 is none, try to make one. If that fails, returns 0.
367 The property of such a label is that it is placed just before the
368 epilogue or a bare RETURN insn, so that another bare RETURN can be
369 turned into a jump to the label unconditionally. In particular, the
370 label cannot be placed before a RETURN insn with a filled delay slot.
372 ??? There may be a problem with the current implementation. Suppose
373 we start with a bare RETURN insn and call find_end_label. It may set
374 function_return_label just before the RETURN. Suppose the machinery
375 is able to fill the delay slot of the RETURN insn afterwards. Then
376 function_return_label is no longer valid according to the property
377 described above and find_end_label will still return it unmodified.
378 Note that this is probably mitigated by the following observation:
379 once function_return_label is made, it is very likely the target of
380 a jump, so filling the delay slot of the RETURN will be much more
381 difficult.
382 KIND is either simple_return_rtx or ret_rtx, indicating which type of
383 return we're looking for. */
385 static rtx_code_label *
386 find_end_label (rtx kind)
388 rtx_insn *insn;
389 rtx_code_label **plabel;
391 if (kind == ret_rtx)
392 plabel = &function_return_label;
393 else
395 gcc_assert (kind == simple_return_rtx);
396 plabel = &function_simple_return_label;
399 /* If we found one previously, return it. */
400 if (*plabel)
401 return *plabel;
403 /* Otherwise, see if there is a label at the end of the function. If there
404 is, it must be that RETURN insns aren't needed, so that is our return
405 label and we don't have to do anything else. */
407 insn = get_last_insn ();
408 while (NOTE_P (insn)
409 || (NONJUMP_INSN_P (insn)
410 && (GET_CODE (PATTERN (insn)) == USE
411 || GET_CODE (PATTERN (insn)) == CLOBBER)))
412 insn = PREV_INSN (insn);
414 /* When a target threads its epilogue we might already have a
415 suitable return insn. If so put a label before it for the
416 function_return_label. */
417 if (BARRIER_P (insn)
418 && JUMP_P (PREV_INSN (insn))
419 && PATTERN (PREV_INSN (insn)) == kind)
421 rtx_insn *temp = PREV_INSN (PREV_INSN (insn));
422 rtx_code_label *label = gen_label_rtx ();
423 LABEL_NUSES (label) = 0;
425 /* Put the label before any USE insns that may precede the RETURN
426 insn. */
427 while (GET_CODE (temp) == USE)
428 temp = PREV_INSN (temp);
430 emit_label_after (label, temp);
431 *plabel = label;
434 else if (LABEL_P (insn))
435 *plabel = as_a <rtx_code_label *> (insn);
436 else
438 rtx_code_label *label = gen_label_rtx ();
439 LABEL_NUSES (label) = 0;
440 /* If the basic block reorder pass moves the return insn to
441 some other place try to locate it again and put our
442 function_return_label there. */
443 while (insn && ! (JUMP_P (insn) && (PATTERN (insn) == kind)))
444 insn = PREV_INSN (insn);
445 if (insn)
447 insn = PREV_INSN (insn);
449 /* Put the label before any USE insns that may precede the
450 RETURN insn. */
451 while (GET_CODE (insn) == USE)
452 insn = PREV_INSN (insn);
454 emit_label_after (label, insn);
456 else
458 if (targetm.have_epilogue () && ! targetm.have_return ())
459 /* The RETURN insn has its delay slot filled so we cannot
460 emit the label just before it. Since we already have
461 an epilogue and cannot emit a new RETURN, we cannot
462 emit the label at all. */
463 return NULL;
465 /* Otherwise, make a new label and emit a RETURN and BARRIER,
466 if needed. */
467 emit_label (label);
468 if (targetm.have_return ())
470 /* The return we make may have delay slots too. */
471 rtx_insn *pat = targetm.gen_return ();
472 rtx_insn *insn = emit_jump_insn (pat);
473 set_return_jump_label (insn);
474 emit_barrier ();
475 if (num_delay_slots (insn) > 0)
476 obstack_ptr_grow (&unfilled_slots_obstack, insn);
479 *plabel = label;
482 /* Show one additional use for this label so it won't go away until
483 we are done. */
484 ++LABEL_NUSES (*plabel);
486 return *plabel;
489 /* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace
490 the pattern of INSN with the SEQUENCE.
492 Returns the insn containing the SEQUENCE that replaces INSN. */
494 static rtx_insn *
495 emit_delay_sequence (rtx_insn *insn, const vec<rtx_insn *> &list, int length)
497 /* Allocate the rtvec to hold the insns and the SEQUENCE. */
498 rtvec seqv = rtvec_alloc (length + 1);
499 rtx seq = gen_rtx_SEQUENCE (VOIDmode, seqv);
500 rtx_insn *seq_insn = make_insn_raw (seq);
502 /* If DELAY_INSN has a location, use it for SEQ_INSN. If DELAY_INSN does
503 not have a location, but one of the delayed insns does, we pick up a
504 location from there later. */
505 INSN_LOCATION (seq_insn) = INSN_LOCATION (insn);
507 /* Unlink INSN from the insn chain, so that we can put it into
508 the SEQUENCE. Remember where we want to emit SEQUENCE in AFTER. */
509 rtx_insn *after = PREV_INSN (insn);
510 remove_insn (insn);
511 SET_NEXT_INSN (insn) = SET_PREV_INSN (insn) = NULL;
513 /* Build our SEQUENCE and rebuild the insn chain. */
514 start_sequence ();
515 XVECEXP (seq, 0, 0) = emit_insn (insn);
517 unsigned int delay_insns = list.length ();
518 gcc_assert (delay_insns == (unsigned int) length);
519 for (unsigned int i = 0; i < delay_insns; i++)
521 rtx_insn *tem = list[i];
522 rtx note, next;
524 /* Show that this copy of the insn isn't deleted. */
525 tem->set_undeleted ();
527 /* Unlink insn from its original place, and re-emit it into
528 the sequence. */
529 SET_NEXT_INSN (tem) = SET_PREV_INSN (tem) = NULL;
530 XVECEXP (seq, 0, i + 1) = emit_insn (tem);
532 /* SPARC assembler, for instance, emit warning when debug info is output
533 into the delay slot. */
534 if (INSN_LOCATION (tem) && !INSN_LOCATION (seq_insn))
535 INSN_LOCATION (seq_insn) = INSN_LOCATION (tem);
536 INSN_LOCATION (tem) = 0;
538 for (note = REG_NOTES (tem); note; note = next)
540 next = XEXP (note, 1);
541 switch (REG_NOTE_KIND (note))
543 case REG_DEAD:
544 /* Remove any REG_DEAD notes because we can't rely on them now
545 that the insn has been moved. */
546 remove_note (tem, note);
547 break;
549 case REG_LABEL_OPERAND:
550 case REG_LABEL_TARGET:
551 /* Keep the label reference count up to date. */
552 if (LABEL_P (XEXP (note, 0)))
553 LABEL_NUSES (XEXP (note, 0)) ++;
554 break;
556 default:
557 break;
561 end_sequence ();
563 /* Splice our SEQUENCE into the insn stream where INSN used to be. */
564 add_insn_after (seq_insn, after, NULL);
566 return seq_insn;
569 /* Add INSN to DELAY_LIST and return the head of the new list. The list must
570 be in the order in which the insns are to be executed. */
572 static void
573 add_to_delay_list (rtx_insn *insn, vec<rtx_insn *> *delay_list)
575 /* If INSN has its block number recorded, clear it since we may
576 be moving the insn to a new block. */
577 clear_hashed_info_for_insn (insn);
578 delay_list->safe_push (insn);
581 /* Delete INSN from the delay slot of the insn that it is in, which may
582 produce an insn with no delay slots. Return the new insn. */
584 static rtx_insn *
585 delete_from_delay_slot (rtx_insn *insn)
587 rtx_insn *trial, *seq_insn, *prev;
588 rtx_sequence *seq;
589 int i;
590 int had_barrier = 0;
592 /* We first must find the insn containing the SEQUENCE with INSN in its
593 delay slot. Do this by finding an insn, TRIAL, where
594 PREV_INSN (NEXT_INSN (TRIAL)) != TRIAL. */
596 for (trial = insn;
597 PREV_INSN (NEXT_INSN (trial)) == trial;
598 trial = NEXT_INSN (trial))
601 seq_insn = PREV_INSN (NEXT_INSN (trial));
602 seq = as_a <rtx_sequence *> (PATTERN (seq_insn));
604 if (NEXT_INSN (seq_insn) && BARRIER_P (NEXT_INSN (seq_insn)))
605 had_barrier = 1;
607 /* Create a delay list consisting of all the insns other than the one
608 we are deleting (unless we were the only one). */
609 auto_vec<rtx_insn *, 5> delay_list;
610 if (seq->len () > 2)
611 for (i = 1; i < seq->len (); i++)
612 if (seq->insn (i) != insn)
613 add_to_delay_list (seq->insn (i), &delay_list);
615 /* Delete the old SEQUENCE, re-emit the insn that used to have the delay
616 list, and rebuild the delay list if non-empty. */
617 prev = PREV_INSN (seq_insn);
618 trial = seq->insn (0);
619 delete_related_insns (seq_insn);
620 add_insn_after (trial, prev, NULL);
622 /* If there was a barrier after the old SEQUENCE, remit it. */
623 if (had_barrier)
624 emit_barrier_after (trial);
626 /* If there are any delay insns, remit them. Otherwise clear the
627 annul flag. */
628 if (!delay_list.is_empty ())
629 trial = emit_delay_sequence (trial, delay_list, XVECLEN (seq, 0) - 2);
630 else if (JUMP_P (trial))
631 INSN_ANNULLED_BRANCH_P (trial) = 0;
633 INSN_FROM_TARGET_P (insn) = 0;
635 /* Show we need to fill this insn again. */
636 obstack_ptr_grow (&unfilled_slots_obstack, trial);
638 return trial;
641 /* Delete INSN, a JUMP_INSN. If it is a conditional jump, we must track down
642 the insn that sets CC0 for it and delete it too. */
644 static void
645 delete_scheduled_jump (rtx_insn *insn)
647 /* Delete the insn that sets cc0 for us. On machines without cc0, we could
648 delete the insn that sets the condition code, but it is hard to find it.
649 Since this case is rare anyway, don't bother trying; there would likely
650 be other insns that became dead anyway, which we wouldn't know to
651 delete. */
653 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, insn))
655 rtx note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
657 /* If a reg-note was found, it points to an insn to set CC0. This
658 insn is in the delay list of some other insn. So delete it from
659 the delay list it was in. */
660 if (note)
662 if (! FIND_REG_INC_NOTE (XEXP (note, 0), NULL_RTX)
663 && sets_cc0_p (PATTERN (XEXP (note, 0))) == 1)
664 delete_from_delay_slot (as_a <rtx_insn *> (XEXP (note, 0)));
666 else
668 /* The insn setting CC0 is our previous insn, but it may be in
669 a delay slot. It will be the last insn in the delay slot, if
670 it is. */
671 rtx_insn *trial = previous_insn (insn);
672 if (NOTE_P (trial))
673 trial = prev_nonnote_insn (trial);
674 if (sets_cc0_p (PATTERN (trial)) != 1
675 || FIND_REG_INC_NOTE (trial, NULL_RTX))
676 return;
677 if (PREV_INSN (NEXT_INSN (trial)) == trial)
678 delete_related_insns (trial);
679 else
680 delete_from_delay_slot (trial);
684 delete_related_insns (insn);
687 /* Counters for delay-slot filling. */
689 #define NUM_REORG_FUNCTIONS 2
690 #define MAX_DELAY_HISTOGRAM 3
691 #define MAX_REORG_PASSES 2
693 static int num_insns_needing_delays[NUM_REORG_FUNCTIONS][MAX_REORG_PASSES];
695 static int num_filled_delays[NUM_REORG_FUNCTIONS][MAX_DELAY_HISTOGRAM+1][MAX_REORG_PASSES];
697 static int reorg_pass_number;
699 static void
700 note_delay_statistics (int slots_filled, int index)
702 num_insns_needing_delays[index][reorg_pass_number]++;
703 if (slots_filled > MAX_DELAY_HISTOGRAM)
704 slots_filled = MAX_DELAY_HISTOGRAM;
705 num_filled_delays[index][slots_filled][reorg_pass_number]++;
708 /* Optimize the following cases:
710 1. When a conditional branch skips over only one instruction,
711 use an annulling branch and put that insn in the delay slot.
712 Use either a branch that annuls when the condition if true or
713 invert the test with a branch that annuls when the condition is
714 false. This saves insns, since otherwise we must copy an insn
715 from the L1 target.
717 (orig) (skip) (otherwise)
718 Bcc.n L1 Bcc',a L1 Bcc,a L1'
719 insn insn insn2
720 L1: L1: L1:
721 insn2 insn2 insn2
722 insn3 insn3 L1':
723 insn3
725 2. When a conditional branch skips over only one instruction,
726 and after that, it unconditionally branches somewhere else,
727 perform the similar optimization. This saves executing the
728 second branch in the case where the inverted condition is true.
730 Bcc.n L1 Bcc',a L2
731 insn insn
732 L1: L1:
733 Bra L2 Bra L2
735 INSN is a JUMP_INSN.
737 This should be expanded to skip over N insns, where N is the number
738 of delay slots required. */
740 static void
741 optimize_skip (rtx_jump_insn *insn, vec<rtx_insn *> *delay_list)
743 rtx_insn *trial = next_nonnote_insn (insn);
744 rtx_insn *next_trial = next_active_insn (trial);
745 int flags;
747 flags = get_jump_flags (insn, JUMP_LABEL (insn));
749 if (trial == 0
750 || !NONJUMP_INSN_P (trial)
751 || GET_CODE (PATTERN (trial)) == SEQUENCE
752 || recog_memoized (trial) < 0
753 || (! eligible_for_annul_false (insn, 0, trial, flags)
754 && ! eligible_for_annul_true (insn, 0, trial, flags))
755 || can_throw_internal (trial))
756 return;
758 /* There are two cases where we are just executing one insn (we assume
759 here that a branch requires only one insn; this should be generalized
760 at some point): Where the branch goes around a single insn or where
761 we have one insn followed by a branch to the same label we branch to.
762 In both of these cases, inverting the jump and annulling the delay
763 slot give the same effect in fewer insns. */
764 if (next_trial == next_active_insn (JUMP_LABEL (insn))
765 || (next_trial != 0
766 && simplejump_or_return_p (next_trial)
767 && JUMP_LABEL (insn) == JUMP_LABEL (next_trial)))
769 if (eligible_for_annul_false (insn, 0, trial, flags))
771 if (invert_jump (insn, JUMP_LABEL (insn), 1))
772 INSN_FROM_TARGET_P (trial) = 1;
773 else if (! eligible_for_annul_true (insn, 0, trial, flags))
774 return;
777 add_to_delay_list (trial, delay_list);
778 next_trial = next_active_insn (trial);
779 update_block (trial, trial);
780 delete_related_insns (trial);
782 /* Also, if we are targeting an unconditional
783 branch, thread our jump to the target of that branch. Don't
784 change this into a RETURN here, because it may not accept what
785 we have in the delay slot. We'll fix this up later. */
786 if (next_trial && simplejump_or_return_p (next_trial))
788 rtx target_label = JUMP_LABEL (next_trial);
789 if (ANY_RETURN_P (target_label))
790 target_label = find_end_label (target_label);
792 if (target_label)
794 /* Recompute the flags based on TARGET_LABEL since threading
795 the jump to TARGET_LABEL may change the direction of the
796 jump (which may change the circumstances in which the
797 delay slot is nullified). */
798 flags = get_jump_flags (insn, target_label);
799 if (eligible_for_annul_true (insn, 0, trial, flags))
800 reorg_redirect_jump (insn, target_label);
804 INSN_ANNULLED_BRANCH_P (insn) = 1;
808 /* Encode and return branch direction and prediction information for
809 INSN assuming it will jump to LABEL.
811 Non conditional branches return no direction information and
812 are predicted as very likely taken. */
814 static int
815 get_jump_flags (const rtx_insn *insn, rtx label)
817 int flags;
819 /* get_jump_flags can be passed any insn with delay slots, these may
820 be INSNs, CALL_INSNs, or JUMP_INSNs. Only JUMP_INSNs have branch
821 direction information, and only if they are conditional jumps.
823 If LABEL is a return, then there is no way to determine the branch
824 direction. */
825 if (JUMP_P (insn)
826 && (condjump_p (insn) || condjump_in_parallel_p (insn))
827 && !ANY_RETURN_P (label)
828 && INSN_UID (insn) <= max_uid
829 && INSN_UID (label) <= max_uid)
830 flags
831 = (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)])
832 ? ATTR_FLAG_forward : ATTR_FLAG_backward;
833 /* No valid direction information. */
834 else
835 flags = 0;
837 return flags;
840 /* Return truth value of the statement that this branch
841 is mostly taken. If we think that the branch is extremely likely
842 to be taken, we return 2. If the branch is slightly more likely to be
843 taken, return 1. If the branch is slightly less likely to be taken,
844 return 0 and if the branch is highly unlikely to be taken, return -1. */
846 static int
847 mostly_true_jump (rtx jump_insn)
849 /* If branch probabilities are available, then use that number since it
850 always gives a correct answer. */
851 rtx note = find_reg_note (jump_insn, REG_BR_PROB, 0);
852 if (note)
854 int prob = XINT (note, 0);
856 if (prob >= REG_BR_PROB_BASE * 9 / 10)
857 return 2;
858 else if (prob >= REG_BR_PROB_BASE / 2)
859 return 1;
860 else if (prob >= REG_BR_PROB_BASE / 10)
861 return 0;
862 else
863 return -1;
866 /* If there is no note, assume branches are not taken.
867 This should be rare. */
868 return 0;
871 /* Return the condition under which INSN will branch to TARGET. If TARGET
872 is zero, return the condition under which INSN will return. If INSN is
873 an unconditional branch, return const_true_rtx. If INSN isn't a simple
874 type of jump, or it doesn't go to TARGET, return 0. */
876 static rtx
877 get_branch_condition (const rtx_insn *insn, rtx target)
879 rtx pat = PATTERN (insn);
880 rtx src;
882 if (condjump_in_parallel_p (insn))
883 pat = XVECEXP (pat, 0, 0);
885 if (ANY_RETURN_P (pat) && pat == target)
886 return const_true_rtx;
888 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
889 return 0;
891 src = SET_SRC (pat);
892 if (GET_CODE (src) == LABEL_REF && LABEL_REF_LABEL (src) == target)
893 return const_true_rtx;
895 else if (GET_CODE (src) == IF_THEN_ELSE
896 && XEXP (src, 2) == pc_rtx
897 && ((GET_CODE (XEXP (src, 1)) == LABEL_REF
898 && LABEL_REF_LABEL (XEXP (src, 1)) == target)
899 || (ANY_RETURN_P (XEXP (src, 1)) && XEXP (src, 1) == target)))
900 return XEXP (src, 0);
902 else if (GET_CODE (src) == IF_THEN_ELSE
903 && XEXP (src, 1) == pc_rtx
904 && ((GET_CODE (XEXP (src, 2)) == LABEL_REF
905 && LABEL_REF_LABEL (XEXP (src, 2)) == target)
906 || (ANY_RETURN_P (XEXP (src, 2)) && XEXP (src, 2) == target)))
908 enum rtx_code rev;
909 rev = reversed_comparison_code (XEXP (src, 0), insn);
910 if (rev != UNKNOWN)
911 return gen_rtx_fmt_ee (rev, GET_MODE (XEXP (src, 0)),
912 XEXP (XEXP (src, 0), 0),
913 XEXP (XEXP (src, 0), 1));
916 return 0;
919 /* Return nonzero if CONDITION is more strict than the condition of
920 INSN, i.e., if INSN will always branch if CONDITION is true. */
922 static int
923 condition_dominates_p (rtx condition, const rtx_insn *insn)
925 rtx other_condition = get_branch_condition (insn, JUMP_LABEL (insn));
926 enum rtx_code code = GET_CODE (condition);
927 enum rtx_code other_code;
929 if (rtx_equal_p (condition, other_condition)
930 || other_condition == const_true_rtx)
931 return 1;
933 else if (condition == const_true_rtx || other_condition == 0)
934 return 0;
936 other_code = GET_CODE (other_condition);
937 if (GET_RTX_LENGTH (code) != 2 || GET_RTX_LENGTH (other_code) != 2
938 || ! rtx_equal_p (XEXP (condition, 0), XEXP (other_condition, 0))
939 || ! rtx_equal_p (XEXP (condition, 1), XEXP (other_condition, 1)))
940 return 0;
942 return comparison_dominates_p (code, other_code);
945 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
946 any insns already in the delay slot of JUMP. */
948 static int
949 redirect_with_delay_slots_safe_p (rtx_insn *jump, rtx newlabel, rtx seq)
951 int flags, i;
952 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (seq));
954 /* Make sure all the delay slots of this jump would still
955 be valid after threading the jump. If they are still
956 valid, then return nonzero. */
958 flags = get_jump_flags (jump, newlabel);
959 for (i = 1; i < pat->len (); i++)
960 if (! (
961 #if ANNUL_IFFALSE_SLOTS
962 (INSN_ANNULLED_BRANCH_P (jump)
963 && INSN_FROM_TARGET_P (pat->insn (i)))
964 ? eligible_for_annul_false (jump, i - 1, pat->insn (i), flags) :
965 #endif
966 #if ANNUL_IFTRUE_SLOTS
967 (INSN_ANNULLED_BRANCH_P (jump)
968 && ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
969 ? eligible_for_annul_true (jump, i - 1, pat->insn (i), flags) :
970 #endif
971 eligible_for_delay (jump, i - 1, pat->insn (i), flags)))
972 break;
974 return (i == pat->len ());
977 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
978 any insns we wish to place in the delay slot of JUMP. */
980 static int
981 redirect_with_delay_list_safe_p (rtx_insn *jump, rtx newlabel,
982 const vec<rtx_insn *> &delay_list)
984 /* Make sure all the insns in DELAY_LIST would still be
985 valid after threading the jump. If they are still
986 valid, then return nonzero. */
988 int flags = get_jump_flags (jump, newlabel);
989 unsigned int delay_insns = delay_list.length ();
990 unsigned int i = 0;
991 for (; i < delay_insns; i++)
992 if (! (
993 #if ANNUL_IFFALSE_SLOTS
994 (INSN_ANNULLED_BRANCH_P (jump)
995 && INSN_FROM_TARGET_P (delay_list[i]))
996 ? eligible_for_annul_false (jump, i, delay_list[i], flags) :
997 #endif
998 #if ANNUL_IFTRUE_SLOTS
999 (INSN_ANNULLED_BRANCH_P (jump)
1000 && ! INSN_FROM_TARGET_P (delay_list[i]))
1001 ? eligible_for_annul_true (jump, i, delay_list[i], flags) :
1002 #endif
1003 eligible_for_delay (jump, i, delay_list[i], flags)))
1004 break;
1006 return i == delay_insns;
1009 /* DELAY_LIST is a list of insns that have already been placed into delay
1010 slots. See if all of them have the same annulling status as ANNUL_TRUE_P.
1011 If not, return 0; otherwise return 1. */
1013 static int
1014 check_annul_list_true_false (int annul_true_p,
1015 const vec<rtx_insn *> &delay_list)
1017 rtx_insn *trial;
1018 unsigned int i;
1019 FOR_EACH_VEC_ELT (delay_list, i, trial)
1020 if ((annul_true_p && INSN_FROM_TARGET_P (trial))
1021 || (!annul_true_p && !INSN_FROM_TARGET_P (trial)))
1022 return 0;
1024 return 1;
1027 /* INSN branches to an insn whose pattern SEQ is a SEQUENCE. Given that
1028 the condition tested by INSN is CONDITION and the resources shown in
1029 OTHER_NEEDED are needed after INSN, see whether INSN can take all the insns
1030 from SEQ's delay list, in addition to whatever insns it may execute
1031 (in DELAY_LIST). SETS and NEEDED are denote resources already set and
1032 needed while searching for delay slot insns. Return the concatenated
1033 delay list if possible, otherwise, return 0.
1035 SLOTS_TO_FILL is the total number of slots required by INSN, and
1036 PSLOTS_FILLED points to the number filled so far (also the number of
1037 insns in DELAY_LIST). It is updated with the number that have been
1038 filled from the SEQUENCE, if any.
1040 PANNUL_P points to a nonzero value if we already know that we need
1041 to annul INSN. If this routine determines that annulling is needed,
1042 it may set that value nonzero.
1044 PNEW_THREAD points to a location that is to receive the place at which
1045 execution should continue. */
1047 static void
1048 steal_delay_list_from_target (rtx_insn *insn, rtx condition, rtx_sequence *seq,
1049 vec<rtx_insn *> *delay_list, resources *sets,
1050 struct resources *needed,
1051 struct resources *other_needed,
1052 int slots_to_fill, int *pslots_filled,
1053 int *pannul_p, rtx *pnew_thread)
1055 int slots_remaining = slots_to_fill - *pslots_filled;
1056 int total_slots_filled = *pslots_filled;
1057 auto_vec<rtx_insn *, 5> new_delay_list;
1058 int must_annul = *pannul_p;
1059 int used_annul = 0;
1060 int i;
1061 struct resources cc_set;
1062 bool *redundant;
1064 /* We can't do anything if there are more delay slots in SEQ than we
1065 can handle, or if we don't know that it will be a taken branch.
1066 We know that it will be a taken branch if it is either an unconditional
1067 branch or a conditional branch with a stricter branch condition.
1069 Also, exit if the branch has more than one set, since then it is computing
1070 other results that can't be ignored, e.g. the HPPA mov&branch instruction.
1071 ??? It may be possible to move other sets into INSN in addition to
1072 moving the instructions in the delay slots.
1074 We can not steal the delay list if one of the instructions in the
1075 current delay_list modifies the condition codes and the jump in the
1076 sequence is a conditional jump. We can not do this because we can
1077 not change the direction of the jump because the condition codes
1078 will effect the direction of the jump in the sequence. */
1080 CLEAR_RESOURCE (&cc_set);
1082 rtx_insn *trial;
1083 FOR_EACH_VEC_ELT (*delay_list, i, trial)
1085 mark_set_resources (trial, &cc_set, 0, MARK_SRC_DEST_CALL);
1086 if (insn_references_resource_p (seq->insn (0), &cc_set, false))
1087 return;
1090 if (XVECLEN (seq, 0) - 1 > slots_remaining
1091 || ! condition_dominates_p (condition, seq->insn (0))
1092 || ! single_set (seq->insn (0)))
1093 return;
1095 /* On some targets, branches with delay slots can have a limited
1096 displacement. Give the back end a chance to tell us we can't do
1097 this. */
1098 if (! targetm.can_follow_jump (insn, seq->insn (0)))
1099 return;
1101 redundant = XALLOCAVEC (bool, XVECLEN (seq, 0));
1102 for (i = 1; i < seq->len (); i++)
1104 rtx_insn *trial = seq->insn (i);
1105 int flags;
1107 if (insn_references_resource_p (trial, sets, false)
1108 || insn_sets_resource_p (trial, needed, false)
1109 || insn_sets_resource_p (trial, sets, false)
1110 /* If TRIAL sets CC0, we can't copy it, so we can't steal this
1111 delay list. */
1112 || (HAVE_cc0 && find_reg_note (trial, REG_CC_USER, NULL_RTX))
1113 /* If TRIAL is from the fallthrough code of an annulled branch insn
1114 in SEQ, we cannot use it. */
1115 || (INSN_ANNULLED_BRANCH_P (seq->insn (0))
1116 && ! INSN_FROM_TARGET_P (trial)))
1117 return;
1119 /* If this insn was already done (usually in a previous delay slot),
1120 pretend we put it in our delay slot. */
1121 redundant[i] = redundant_insn (trial, insn, new_delay_list);
1122 if (redundant[i])
1123 continue;
1125 /* We will end up re-vectoring this branch, so compute flags
1126 based on jumping to the new label. */
1127 flags = get_jump_flags (insn, JUMP_LABEL (seq->insn (0)));
1129 if (! must_annul
1130 && ((condition == const_true_rtx
1131 || (! insn_sets_resource_p (trial, other_needed, false)
1132 && ! may_trap_or_fault_p (PATTERN (trial)))))
1133 ? eligible_for_delay (insn, total_slots_filled, trial, flags)
1134 : (must_annul || (delay_list->is_empty () && new_delay_list.is_empty ()))
1135 && (must_annul = 1,
1136 check_annul_list_true_false (0, *delay_list)
1137 && check_annul_list_true_false (0, new_delay_list)
1138 && eligible_for_annul_false (insn, total_slots_filled,
1139 trial, flags)))
1141 if (must_annul)
1142 used_annul = 1;
1143 rtx_insn *temp = copy_delay_slot_insn (trial);
1144 INSN_FROM_TARGET_P (temp) = 1;
1145 add_to_delay_list (temp, &new_delay_list);
1146 total_slots_filled++;
1148 if (--slots_remaining == 0)
1149 break;
1151 else
1152 return;
1155 /* Record the effect of the instructions that were redundant and which
1156 we therefore decided not to copy. */
1157 for (i = 1; i < seq->len (); i++)
1158 if (redundant[i])
1159 update_block (seq->insn (i), insn);
1161 /* Show the place to which we will be branching. */
1162 *pnew_thread = first_active_target_insn (JUMP_LABEL (seq->insn (0)));
1164 /* Add any new insns to the delay list and update the count of the
1165 number of slots filled. */
1166 *pslots_filled = total_slots_filled;
1167 if (used_annul)
1168 *pannul_p = 1;
1170 rtx_insn *temp;
1171 FOR_EACH_VEC_ELT (new_delay_list, i, temp)
1172 add_to_delay_list (temp, delay_list);
1175 /* Similar to steal_delay_list_from_target except that SEQ is on the
1176 fallthrough path of INSN. Here we only do something if the delay insn
1177 of SEQ is an unconditional branch. In that case we steal its delay slot
1178 for INSN since unconditional branches are much easier to fill. */
1180 static void
1181 steal_delay_list_from_fallthrough (rtx_insn *insn, rtx condition,
1182 rtx_sequence *seq,
1183 vec<rtx_insn *> *delay_list,
1184 struct resources *sets,
1185 struct resources *needed,
1186 struct resources *other_needed,
1187 int slots_to_fill, int *pslots_filled,
1188 int *pannul_p)
1190 int i;
1191 int flags;
1192 int must_annul = *pannul_p;
1193 int used_annul = 0;
1195 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1197 /* We can't do anything if SEQ's delay insn isn't an
1198 unconditional branch. */
1200 if (! simplejump_or_return_p (seq->insn (0)))
1201 return;
1203 for (i = 1; i < seq->len (); i++)
1205 rtx_insn *trial = seq->insn (i);
1207 /* If TRIAL sets CC0, stealing it will move it too far from the use
1208 of CC0. */
1209 if (insn_references_resource_p (trial, sets, false)
1210 || insn_sets_resource_p (trial, needed, false)
1211 || insn_sets_resource_p (trial, sets, false)
1212 || (HAVE_cc0 && sets_cc0_p (PATTERN (trial))))
1214 break;
1216 /* If this insn was already done, we don't need it. */
1217 if (redundant_insn (trial, insn, *delay_list))
1219 update_block (trial, insn);
1220 delete_from_delay_slot (trial);
1221 continue;
1224 if (! must_annul
1225 && ((condition == const_true_rtx
1226 || (! insn_sets_resource_p (trial, other_needed, false)
1227 && ! may_trap_or_fault_p (PATTERN (trial)))))
1228 ? eligible_for_delay (insn, *pslots_filled, trial, flags)
1229 : (must_annul || delay_list->is_empty ()) && (must_annul = 1,
1230 check_annul_list_true_false (1, *delay_list)
1231 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
1233 if (must_annul)
1234 used_annul = 1;
1235 delete_from_delay_slot (trial);
1236 add_to_delay_list (trial, delay_list);
1238 if (++(*pslots_filled) == slots_to_fill)
1239 break;
1241 else
1242 break;
1245 if (used_annul)
1246 *pannul_p = 1;
1249 /* Try merging insns starting at THREAD which match exactly the insns in
1250 INSN's delay list.
1252 If all insns were matched and the insn was previously annulling, the
1253 annul bit will be cleared.
1255 For each insn that is merged, if the branch is or will be non-annulling,
1256 we delete the merged insn. */
1258 static void
1259 try_merge_delay_insns (rtx_insn *insn, rtx_insn *thread)
1261 rtx_insn *trial, *next_trial;
1262 rtx_insn *delay_insn = as_a <rtx_insn *> (XVECEXP (PATTERN (insn), 0, 0));
1263 int annul_p = JUMP_P (delay_insn) && INSN_ANNULLED_BRANCH_P (delay_insn);
1264 int slot_number = 1;
1265 int num_slots = XVECLEN (PATTERN (insn), 0);
1266 rtx next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1267 struct resources set, needed, modified;
1268 rtx_insn_list *merged_insns = 0;
1269 int i, j;
1270 int flags;
1272 flags = get_jump_flags (delay_insn, JUMP_LABEL (delay_insn));
1274 CLEAR_RESOURCE (&needed);
1275 CLEAR_RESOURCE (&set);
1277 /* If this is not an annulling branch, take into account anything needed in
1278 INSN's delay slot. This prevents two increments from being incorrectly
1279 folded into one. If we are annulling, this would be the correct
1280 thing to do. (The alternative, looking at things set in NEXT_TO_MATCH
1281 will essentially disable this optimization. This method is somewhat of
1282 a kludge, but I don't see a better way.) */
1283 if (! annul_p)
1284 for (i = 1 ; i < num_slots; i++)
1285 if (XVECEXP (PATTERN (insn), 0, i))
1286 mark_referenced_resources (XVECEXP (PATTERN (insn), 0, i), &needed,
1287 true);
1289 for (trial = thread; !stop_search_p (trial, 1); trial = next_trial)
1291 rtx pat = PATTERN (trial);
1292 rtx oldtrial = trial;
1294 next_trial = next_nonnote_insn (trial);
1296 /* TRIAL must be a CALL_INSN or INSN. Skip USE and CLOBBER. */
1297 if (NONJUMP_INSN_P (trial)
1298 && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER))
1299 continue;
1301 if (GET_CODE (next_to_match) == GET_CODE (trial)
1302 /* We can't share an insn that sets cc0. */
1303 && (!HAVE_cc0 || ! sets_cc0_p (pat))
1304 && ! insn_references_resource_p (trial, &set, true)
1305 && ! insn_sets_resource_p (trial, &set, true)
1306 && ! insn_sets_resource_p (trial, &needed, true)
1307 && (trial = try_split (pat, trial, 0)) != 0
1308 /* Update next_trial, in case try_split succeeded. */
1309 && (next_trial = next_nonnote_insn (trial))
1310 /* Likewise THREAD. */
1311 && (thread = oldtrial == thread ? trial : thread)
1312 && rtx_equal_p (PATTERN (next_to_match), PATTERN (trial))
1313 /* Have to test this condition if annul condition is different
1314 from (and less restrictive than) non-annulling one. */
1315 && eligible_for_delay (delay_insn, slot_number - 1, trial, flags))
1318 if (! annul_p)
1320 update_block (trial, thread);
1321 if (trial == thread)
1322 thread = next_active_insn (thread);
1324 delete_related_insns (trial);
1325 INSN_FROM_TARGET_P (next_to_match) = 0;
1327 else
1328 merged_insns = gen_rtx_INSN_LIST (VOIDmode, trial, merged_insns);
1330 if (++slot_number == num_slots)
1331 break;
1333 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1336 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
1337 mark_referenced_resources (trial, &needed, true);
1340 /* See if we stopped on a filled insn. If we did, try to see if its
1341 delay slots match. */
1342 if (slot_number != num_slots
1343 && trial && NONJUMP_INSN_P (trial)
1344 && GET_CODE (PATTERN (trial)) == SEQUENCE
1345 && !(JUMP_P (XVECEXP (PATTERN (trial), 0, 0))
1346 && INSN_ANNULLED_BRANCH_P (XVECEXP (PATTERN (trial), 0, 0))))
1348 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (trial));
1349 rtx filled_insn = XVECEXP (pat, 0, 0);
1351 /* Account for resources set/needed by the filled insn. */
1352 mark_set_resources (filled_insn, &set, 0, MARK_SRC_DEST_CALL);
1353 mark_referenced_resources (filled_insn, &needed, true);
1355 for (i = 1; i < pat->len (); i++)
1357 rtx_insn *dtrial = pat->insn (i);
1359 CLEAR_RESOURCE (&modified);
1360 /* Account for resources set by the insn following NEXT_TO_MATCH
1361 inside INSN's delay list. */
1362 for (j = 1; slot_number + j < num_slots; j++)
1363 mark_set_resources (XVECEXP (PATTERN (insn), 0, slot_number + j),
1364 &modified, 0, MARK_SRC_DEST_CALL);
1365 /* Account for resources set by the insn before DTRIAL and inside
1366 TRIAL's delay list. */
1367 for (j = 1; j < i; j++)
1368 mark_set_resources (XVECEXP (pat, 0, j),
1369 &modified, 0, MARK_SRC_DEST_CALL);
1370 if (! insn_references_resource_p (dtrial, &set, true)
1371 && ! insn_sets_resource_p (dtrial, &set, true)
1372 && ! insn_sets_resource_p (dtrial, &needed, true)
1373 && (!HAVE_cc0 || ! sets_cc0_p (PATTERN (dtrial)))
1374 && rtx_equal_p (PATTERN (next_to_match), PATTERN (dtrial))
1375 /* Check that DTRIAL and NEXT_TO_MATCH does not reference a
1376 resource modified between them (only dtrial is checked because
1377 next_to_match and dtrial shall to be equal in order to hit
1378 this line) */
1379 && ! insn_references_resource_p (dtrial, &modified, true)
1380 && eligible_for_delay (delay_insn, slot_number - 1, dtrial, flags))
1382 if (! annul_p)
1384 rtx_insn *new_rtx;
1386 update_block (dtrial, thread);
1387 new_rtx = delete_from_delay_slot (dtrial);
1388 if (thread->deleted ())
1389 thread = new_rtx;
1390 INSN_FROM_TARGET_P (next_to_match) = 0;
1392 else
1393 merged_insns = gen_rtx_INSN_LIST (SImode, dtrial,
1394 merged_insns);
1396 if (++slot_number == num_slots)
1397 break;
1399 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1401 else
1403 /* Keep track of the set/referenced resources for the delay
1404 slots of any trial insns we encounter. */
1405 mark_set_resources (dtrial, &set, 0, MARK_SRC_DEST_CALL);
1406 mark_referenced_resources (dtrial, &needed, true);
1411 /* If all insns in the delay slot have been matched and we were previously
1412 annulling the branch, we need not any more. In that case delete all the
1413 merged insns. Also clear the INSN_FROM_TARGET_P bit of each insn in
1414 the delay list so that we know that it isn't only being used at the
1415 target. */
1416 if (slot_number == num_slots && annul_p)
1418 for (; merged_insns; merged_insns = merged_insns->next ())
1420 if (GET_MODE (merged_insns) == SImode)
1422 rtx_insn *new_rtx;
1424 update_block (merged_insns->insn (), thread);
1425 new_rtx = delete_from_delay_slot (merged_insns->insn ());
1426 if (thread->deleted ())
1427 thread = new_rtx;
1429 else
1431 update_block (merged_insns->insn (), thread);
1432 delete_related_insns (merged_insns->insn ());
1436 INSN_ANNULLED_BRANCH_P (delay_insn) = 0;
1438 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1439 INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i)) = 0;
1443 /* See if INSN is redundant with an insn in front of TARGET. Often this
1444 is called when INSN is a candidate for a delay slot of TARGET.
1445 DELAY_LIST are insns that will be placed in delay slots of TARGET in front
1446 of INSN. Often INSN will be redundant with an insn in a delay slot of
1447 some previous insn. This happens when we have a series of branches to the
1448 same label; in that case the first insn at the target might want to go
1449 into each of the delay slots.
1451 If we are not careful, this routine can take up a significant fraction
1452 of the total compilation time (4%), but only wins rarely. Hence we
1453 speed this routine up by making two passes. The first pass goes back
1454 until it hits a label and sees if it finds an insn with an identical
1455 pattern. Only in this (relatively rare) event does it check for
1456 data conflicts.
1458 We do not split insns we encounter. This could cause us not to find a
1459 redundant insn, but the cost of splitting seems greater than the possible
1460 gain in rare cases. */
1462 static rtx
1463 redundant_insn (rtx insn, rtx_insn *target, const vec<rtx_insn *> &delay_list)
1465 rtx target_main = target;
1466 rtx ipat = PATTERN (insn);
1467 rtx_insn *trial;
1468 rtx pat;
1469 struct resources needed, set;
1470 int i;
1471 unsigned insns_to_search;
1473 /* If INSN has any REG_UNUSED notes, it can't match anything since we
1474 are allowed to not actually assign to such a register. */
1475 if (find_reg_note (insn, REG_UNUSED, NULL_RTX) != 0)
1476 return 0;
1478 /* Scan backwards looking for a match. */
1479 for (trial = PREV_INSN (target),
1480 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1481 trial && insns_to_search > 0;
1482 trial = PREV_INSN (trial))
1484 /* (use (insn))s can come immediately after a barrier if the
1485 label that used to precede them has been deleted as dead.
1486 See delete_related_insns. */
1487 if (LABEL_P (trial) || BARRIER_P (trial))
1488 return 0;
1490 if (!INSN_P (trial))
1491 continue;
1492 --insns_to_search;
1494 pat = PATTERN (trial);
1495 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1496 continue;
1498 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1500 /* Stop for a CALL and its delay slots because it is difficult to
1501 track its resource needs correctly. */
1502 if (CALL_P (seq->element (0)))
1503 return 0;
1505 /* Stop for an INSN or JUMP_INSN with delayed effects and its delay
1506 slots because it is difficult to track its resource needs
1507 correctly. */
1509 if (INSN_SETS_ARE_DELAYED (seq->insn (0)))
1510 return 0;
1512 if (INSN_REFERENCES_ARE_DELAYED (seq->insn (0)))
1513 return 0;
1515 /* See if any of the insns in the delay slot match, updating
1516 resource requirements as we go. */
1517 for (i = seq->len () - 1; i > 0; i--)
1518 if (GET_CODE (seq->element (i)) == GET_CODE (insn)
1519 && rtx_equal_p (PATTERN (seq->element (i)), ipat)
1520 && ! find_reg_note (seq->element (i), REG_UNUSED, NULL_RTX))
1521 break;
1523 /* If found a match, exit this loop early. */
1524 if (i > 0)
1525 break;
1528 else if (GET_CODE (trial) == GET_CODE (insn) && rtx_equal_p (pat, ipat)
1529 && ! find_reg_note (trial, REG_UNUSED, NULL_RTX))
1530 break;
1533 /* If we didn't find an insn that matches, return 0. */
1534 if (trial == 0)
1535 return 0;
1537 /* See what resources this insn sets and needs. If they overlap, or
1538 if this insn references CC0, it can't be redundant. */
1540 CLEAR_RESOURCE (&needed);
1541 CLEAR_RESOURCE (&set);
1542 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
1543 mark_referenced_resources (insn, &needed, true);
1545 /* If TARGET is a SEQUENCE, get the main insn. */
1546 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1547 target_main = XVECEXP (PATTERN (target), 0, 0);
1549 if (resource_conflicts_p (&needed, &set)
1550 || (HAVE_cc0 && reg_mentioned_p (cc0_rtx, ipat))
1551 /* The insn requiring the delay may not set anything needed or set by
1552 INSN. */
1553 || insn_sets_resource_p (target_main, &needed, true)
1554 || insn_sets_resource_p (target_main, &set, true))
1555 return 0;
1557 /* Insns we pass may not set either NEEDED or SET, so merge them for
1558 simpler tests. */
1559 needed.memory |= set.memory;
1560 IOR_HARD_REG_SET (needed.regs, set.regs);
1562 /* This insn isn't redundant if it conflicts with an insn that either is
1563 or will be in a delay slot of TARGET. */
1565 unsigned int j;
1566 rtx_insn *temp;
1567 FOR_EACH_VEC_ELT (delay_list, j, temp)
1568 if (insn_sets_resource_p (temp, &needed, true))
1569 return 0;
1571 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1572 for (i = 1; i < XVECLEN (PATTERN (target), 0); i++)
1573 if (insn_sets_resource_p (XVECEXP (PATTERN (target), 0, i), &needed,
1574 true))
1575 return 0;
1577 /* Scan backwards until we reach a label or an insn that uses something
1578 INSN sets or sets something insn uses or sets. */
1580 for (trial = PREV_INSN (target),
1581 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1582 trial && !LABEL_P (trial) && insns_to_search > 0;
1583 trial = PREV_INSN (trial))
1585 if (!INSN_P (trial))
1586 continue;
1587 --insns_to_search;
1589 pat = PATTERN (trial);
1590 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1591 continue;
1593 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1595 bool annul_p = false;
1596 rtx_insn *control = seq->insn (0);
1598 /* If this is a CALL_INSN and its delay slots, it is hard to track
1599 the resource needs properly, so give up. */
1600 if (CALL_P (control))
1601 return 0;
1603 /* If this is an INSN or JUMP_INSN with delayed effects, it
1604 is hard to track the resource needs properly, so give up. */
1606 if (INSN_SETS_ARE_DELAYED (control))
1607 return 0;
1609 if (INSN_REFERENCES_ARE_DELAYED (control))
1610 return 0;
1612 if (JUMP_P (control))
1613 annul_p = INSN_ANNULLED_BRANCH_P (control);
1615 /* See if any of the insns in the delay slot match, updating
1616 resource requirements as we go. */
1617 for (i = seq->len () - 1; i > 0; i--)
1619 rtx candidate = seq->element (i);
1621 /* If an insn will be annulled if the branch is false, it isn't
1622 considered as a possible duplicate insn. */
1623 if (rtx_equal_p (PATTERN (candidate), ipat)
1624 && ! (annul_p && INSN_FROM_TARGET_P (candidate)))
1626 /* Show that this insn will be used in the sequel. */
1627 INSN_FROM_TARGET_P (candidate) = 0;
1628 return candidate;
1631 /* Unless this is an annulled insn from the target of a branch,
1632 we must stop if it sets anything needed or set by INSN. */
1633 if ((!annul_p || !INSN_FROM_TARGET_P (candidate))
1634 && insn_sets_resource_p (candidate, &needed, true))
1635 return 0;
1638 /* If the insn requiring the delay slot conflicts with INSN, we
1639 must stop. */
1640 if (insn_sets_resource_p (control, &needed, true))
1641 return 0;
1643 else
1645 /* See if TRIAL is the same as INSN. */
1646 pat = PATTERN (trial);
1647 if (rtx_equal_p (pat, ipat))
1648 return trial;
1650 /* Can't go any further if TRIAL conflicts with INSN. */
1651 if (insn_sets_resource_p (trial, &needed, true))
1652 return 0;
1656 return 0;
1659 /* Return 1 if THREAD can only be executed in one way. If LABEL is nonzero,
1660 it is the target of the branch insn being scanned. If ALLOW_FALLTHROUGH
1661 is nonzero, we are allowed to fall into this thread; otherwise, we are
1662 not.
1664 If LABEL is used more than one or we pass a label other than LABEL before
1665 finding an active insn, we do not own this thread. */
1667 static int
1668 own_thread_p (rtx thread, rtx label, int allow_fallthrough)
1670 rtx_insn *active_insn;
1671 rtx_insn *insn;
1673 /* We don't own the function end. */
1674 if (thread == 0 || ANY_RETURN_P (thread))
1675 return 0;
1677 /* We have a non-NULL insn. */
1678 rtx_insn *thread_insn = as_a <rtx_insn *> (thread);
1680 /* Get the first active insn, or THREAD_INSN, if it is an active insn. */
1681 active_insn = next_active_insn (PREV_INSN (thread_insn));
1683 for (insn = thread_insn; insn != active_insn; insn = NEXT_INSN (insn))
1684 if (LABEL_P (insn)
1685 && (insn != label || LABEL_NUSES (insn) != 1))
1686 return 0;
1688 if (allow_fallthrough)
1689 return 1;
1691 /* Ensure that we reach a BARRIER before any insn or label. */
1692 for (insn = prev_nonnote_insn (thread_insn);
1693 insn == 0 || !BARRIER_P (insn);
1694 insn = prev_nonnote_insn (insn))
1695 if (insn == 0
1696 || LABEL_P (insn)
1697 || (NONJUMP_INSN_P (insn)
1698 && GET_CODE (PATTERN (insn)) != USE
1699 && GET_CODE (PATTERN (insn)) != CLOBBER))
1700 return 0;
1702 return 1;
1705 /* Called when INSN is being moved from a location near the target of a jump.
1706 We leave a marker of the form (use (INSN)) immediately in front
1707 of WHERE for mark_target_live_regs. These markers will be deleted when
1708 reorg finishes.
1710 We used to try to update the live status of registers if WHERE is at
1711 the start of a basic block, but that can't work since we may remove a
1712 BARRIER in relax_delay_slots. */
1714 static void
1715 update_block (rtx_insn *insn, rtx where)
1717 /* Ignore if this was in a delay slot and it came from the target of
1718 a branch. */
1719 if (INSN_FROM_TARGET_P (insn))
1720 return;
1722 emit_insn_before (gen_rtx_USE (VOIDmode, insn), where);
1724 /* INSN might be making a value live in a block where it didn't use to
1725 be. So recompute liveness information for this block. */
1727 incr_ticks_for_insn (insn);
1730 /* Similar to REDIRECT_JUMP except that we update the BB_TICKS entry for
1731 the basic block containing the jump. */
1733 static int
1734 reorg_redirect_jump (rtx_jump_insn *jump, rtx nlabel)
1736 incr_ticks_for_insn (jump);
1737 return redirect_jump (jump, nlabel, 1);
1740 /* Called when INSN is being moved forward into a delay slot of DELAYED_INSN.
1741 We check every instruction between INSN and DELAYED_INSN for REG_DEAD notes
1742 that reference values used in INSN. If we find one, then we move the
1743 REG_DEAD note to INSN.
1745 This is needed to handle the case where a later insn (after INSN) has a
1746 REG_DEAD note for a register used by INSN, and this later insn subsequently
1747 gets moved before a CODE_LABEL because it is a redundant insn. In this
1748 case, mark_target_live_regs may be confused into thinking the register
1749 is dead because it sees a REG_DEAD note immediately before a CODE_LABEL. */
1751 static void
1752 update_reg_dead_notes (rtx_insn *insn, rtx_insn *delayed_insn)
1754 rtx link, next;
1755 rtx_insn *p;
1757 for (p = next_nonnote_insn (insn); p != delayed_insn;
1758 p = next_nonnote_insn (p))
1759 for (link = REG_NOTES (p); link; link = next)
1761 next = XEXP (link, 1);
1763 if (REG_NOTE_KIND (link) != REG_DEAD
1764 || !REG_P (XEXP (link, 0)))
1765 continue;
1767 if (reg_referenced_p (XEXP (link, 0), PATTERN (insn)))
1769 /* Move the REG_DEAD note from P to INSN. */
1770 remove_note (p, link);
1771 XEXP (link, 1) = REG_NOTES (insn);
1772 REG_NOTES (insn) = link;
1777 /* Called when an insn redundant with start_insn is deleted. If there
1778 is a REG_DEAD note for the target of start_insn between start_insn
1779 and stop_insn, then the REG_DEAD note needs to be deleted since the
1780 value no longer dies there.
1782 If the REG_DEAD note isn't deleted, then mark_target_live_regs may be
1783 confused into thinking the register is dead. */
1785 static void
1786 fix_reg_dead_note (rtx start_insn, rtx stop_insn)
1788 rtx link, next;
1789 rtx_insn *p;
1791 for (p = next_nonnote_insn (start_insn); p != stop_insn;
1792 p = next_nonnote_insn (p))
1793 for (link = REG_NOTES (p); link; link = next)
1795 next = XEXP (link, 1);
1797 if (REG_NOTE_KIND (link) != REG_DEAD
1798 || !REG_P (XEXP (link, 0)))
1799 continue;
1801 if (reg_set_p (XEXP (link, 0), PATTERN (start_insn)))
1803 remove_note (p, link);
1804 return;
1809 /* Delete any REG_UNUSED notes that exist on INSN but not on REDUNDANT_INSN.
1811 This handles the case of udivmodXi4 instructions which optimize their
1812 output depending on whether any REG_UNUSED notes are present.
1813 we must make sure that INSN calculates as many results as REDUNDANT_INSN
1814 does. */
1816 static void
1817 update_reg_unused_notes (rtx insn, rtx redundant_insn)
1819 rtx link, next;
1821 for (link = REG_NOTES (insn); link; link = next)
1823 next = XEXP (link, 1);
1825 if (REG_NOTE_KIND (link) != REG_UNUSED
1826 || !REG_P (XEXP (link, 0)))
1827 continue;
1829 if (! find_regno_note (redundant_insn, REG_UNUSED,
1830 REGNO (XEXP (link, 0))))
1831 remove_note (insn, link);
1835 static vec <rtx> sibling_labels;
1837 /* Return the label before INSN, or put a new label there. If SIBLING is
1838 non-zero, it is another label associated with the new label (if any),
1839 typically the former target of the jump that will be redirected to
1840 the new label. */
1842 static rtx_insn *
1843 get_label_before (rtx_insn *insn, rtx sibling)
1845 rtx_insn *label;
1847 /* Find an existing label at this point
1848 or make a new one if there is none. */
1849 label = prev_nonnote_insn (insn);
1851 if (label == 0 || !LABEL_P (label))
1853 rtx_insn *prev = PREV_INSN (insn);
1855 label = gen_label_rtx ();
1856 emit_label_after (label, prev);
1857 LABEL_NUSES (label) = 0;
1858 if (sibling)
1860 sibling_labels.safe_push (label);
1861 sibling_labels.safe_push (sibling);
1864 return label;
1867 /* Scan a function looking for insns that need a delay slot and find insns to
1868 put into the delay slot.
1870 NON_JUMPS_P is nonzero if we are to only try to fill non-jump insns (such
1871 as calls). We do these first since we don't want jump insns (that are
1872 easier to fill) to get the only insns that could be used for non-jump insns.
1873 When it is zero, only try to fill JUMP_INSNs.
1875 When slots are filled in this manner, the insns (including the
1876 delay_insn) are put together in a SEQUENCE rtx. In this fashion,
1877 it is possible to tell whether a delay slot has really been filled
1878 or not. `final' knows how to deal with this, by communicating
1879 through FINAL_SEQUENCE. */
1881 static void
1882 fill_simple_delay_slots (int non_jumps_p)
1884 rtx_insn *insn, *trial, *next_trial;
1885 rtx pat;
1886 int i;
1887 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
1888 struct resources needed, set;
1889 int slots_to_fill, slots_filled;
1890 auto_vec<rtx_insn *, 5> delay_list;
1892 for (i = 0; i < num_unfilled_slots; i++)
1894 int flags;
1895 /* Get the next insn to fill. If it has already had any slots assigned,
1896 we can't do anything with it. Maybe we'll improve this later. */
1898 insn = unfilled_slots_base[i];
1899 if (insn == 0
1900 || insn->deleted ()
1901 || (NONJUMP_INSN_P (insn)
1902 && GET_CODE (PATTERN (insn)) == SEQUENCE)
1903 || (JUMP_P (insn) && non_jumps_p)
1904 || (!JUMP_P (insn) && ! non_jumps_p))
1905 continue;
1907 /* It may have been that this insn used to need delay slots, but
1908 now doesn't; ignore in that case. This can happen, for example,
1909 on the HP PA RISC, where the number of delay slots depends on
1910 what insns are nearby. */
1911 slots_to_fill = num_delay_slots (insn);
1913 /* Some machine description have defined instructions to have
1914 delay slots only in certain circumstances which may depend on
1915 nearby insns (which change due to reorg's actions).
1917 For example, the PA port normally has delay slots for unconditional
1918 jumps.
1920 However, the PA port claims such jumps do not have a delay slot
1921 if they are immediate successors of certain CALL_INSNs. This
1922 allows the port to favor filling the delay slot of the call with
1923 the unconditional jump. */
1924 if (slots_to_fill == 0)
1925 continue;
1927 /* This insn needs, or can use, some delay slots. SLOTS_TO_FILL
1928 says how many. After initialization, first try optimizing
1930 call _foo call _foo
1931 nop add %o7,.-L1,%o7
1932 b,a L1
1935 If this case applies, the delay slot of the call is filled with
1936 the unconditional jump. This is done first to avoid having the
1937 delay slot of the call filled in the backward scan. Also, since
1938 the unconditional jump is likely to also have a delay slot, that
1939 insn must exist when it is subsequently scanned.
1941 This is tried on each insn with delay slots as some machines
1942 have insns which perform calls, but are not represented as
1943 CALL_INSNs. */
1945 slots_filled = 0;
1946 delay_list.truncate (0);
1948 if (JUMP_P (insn))
1949 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1950 else
1951 flags = get_jump_flags (insn, NULL_RTX);
1953 if ((trial = next_active_insn (insn))
1954 && JUMP_P (trial)
1955 && simplejump_p (trial)
1956 && eligible_for_delay (insn, slots_filled, trial, flags)
1957 && no_labels_between_p (insn, trial)
1958 && ! can_throw_internal (trial))
1960 rtx_insn **tmp;
1961 slots_filled++;
1962 add_to_delay_list (trial, &delay_list);
1964 /* TRIAL may have had its delay slot filled, then unfilled. When
1965 the delay slot is unfilled, TRIAL is placed back on the unfilled
1966 slots obstack. Unfortunately, it is placed on the end of the
1967 obstack, not in its original location. Therefore, we must search
1968 from entry i + 1 to the end of the unfilled slots obstack to
1969 try and find TRIAL. */
1970 tmp = &unfilled_slots_base[i + 1];
1971 while (*tmp != trial && tmp != unfilled_slots_next)
1972 tmp++;
1974 /* Remove the unconditional jump from consideration for delay slot
1975 filling and unthread it. */
1976 if (*tmp == trial)
1977 *tmp = 0;
1979 rtx_insn *next = NEXT_INSN (trial);
1980 rtx_insn *prev = PREV_INSN (trial);
1981 if (prev)
1982 SET_NEXT_INSN (prev) = next;
1983 if (next)
1984 SET_PREV_INSN (next) = prev;
1988 /* Now, scan backwards from the insn to search for a potential
1989 delay-slot candidate. Stop searching when a label or jump is hit.
1991 For each candidate, if it is to go into the delay slot (moved
1992 forward in execution sequence), it must not need or set any resources
1993 that were set by later insns and must not set any resources that
1994 are needed for those insns.
1996 The delay slot insn itself sets resources unless it is a call
1997 (in which case the called routine, not the insn itself, is doing
1998 the setting). */
2000 if (slots_filled < slots_to_fill)
2002 /* If the flags register is dead after the insn, then we want to be
2003 able to accept a candidate that clobbers it. For this purpose,
2004 we need to filter the flags register during life analysis, so
2005 that it doesn't create RAW and WAW dependencies, while still
2006 creating the necessary WAR dependencies. */
2007 bool filter_flags
2008 = (slots_to_fill == 1
2009 && targetm.flags_regnum != INVALID_REGNUM
2010 && find_regno_note (insn, REG_DEAD, targetm.flags_regnum));
2011 struct resources fset;
2012 CLEAR_RESOURCE (&needed);
2013 CLEAR_RESOURCE (&set);
2014 mark_set_resources (insn, &set, 0, MARK_SRC_DEST);
2015 if (filter_flags)
2017 CLEAR_RESOURCE (&fset);
2018 mark_set_resources (insn, &fset, 0, MARK_SRC_DEST);
2020 mark_referenced_resources (insn, &needed, false);
2022 for (trial = prev_nonnote_insn (insn); ! stop_search_p (trial, 1);
2023 trial = next_trial)
2025 next_trial = prev_nonnote_insn (trial);
2027 /* This must be an INSN or CALL_INSN. */
2028 pat = PATTERN (trial);
2030 /* Stand-alone USE and CLOBBER are just for flow. */
2031 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2032 continue;
2034 /* Check for resource conflict first, to avoid unnecessary
2035 splitting. */
2036 if (! insn_references_resource_p (trial, &set, true)
2037 && ! insn_sets_resource_p (trial,
2038 filter_flags ? &fset : &set,
2039 true)
2040 && ! insn_sets_resource_p (trial, &needed, true)
2041 /* Can't separate set of cc0 from its use. */
2042 && (!HAVE_cc0 || ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat)))
2043 && ! can_throw_internal (trial))
2045 trial = try_split (pat, trial, 1);
2046 next_trial = prev_nonnote_insn (trial);
2047 if (eligible_for_delay (insn, slots_filled, trial, flags))
2049 /* In this case, we are searching backward, so if we
2050 find insns to put on the delay list, we want
2051 to put them at the head, rather than the
2052 tail, of the list. */
2054 update_reg_dead_notes (trial, insn);
2055 delay_list.safe_insert (0, trial);
2056 update_block (trial, trial);
2057 delete_related_insns (trial);
2058 if (slots_to_fill == ++slots_filled)
2059 break;
2060 continue;
2064 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2065 if (filter_flags)
2067 mark_set_resources (trial, &fset, 0, MARK_SRC_DEST_CALL);
2068 /* If the flags register is set, then it doesn't create RAW
2069 dependencies any longer and it also doesn't create WAW
2070 dependencies since it's dead after the original insn. */
2071 if (TEST_HARD_REG_BIT (fset.regs, targetm.flags_regnum))
2073 CLEAR_HARD_REG_BIT (needed.regs, targetm.flags_regnum);
2074 CLEAR_HARD_REG_BIT (fset.regs, targetm.flags_regnum);
2077 mark_referenced_resources (trial, &needed, true);
2081 /* If all needed slots haven't been filled, we come here. */
2083 /* Try to optimize case of jumping around a single insn. */
2084 if ((ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
2085 && slots_filled != slots_to_fill
2086 && delay_list.is_empty ()
2087 && JUMP_P (insn)
2088 && (condjump_p (insn) || condjump_in_parallel_p (insn))
2089 && !ANY_RETURN_P (JUMP_LABEL (insn)))
2091 optimize_skip (as_a <rtx_jump_insn *> (insn), &delay_list);
2092 if (!delay_list.is_empty ())
2093 slots_filled += 1;
2096 /* Try to get insns from beyond the insn needing the delay slot.
2097 These insns can neither set or reference resources set in insns being
2098 skipped, cannot set resources in the insn being skipped, and, if this
2099 is a CALL_INSN (or a CALL_INSN is passed), cannot trap (because the
2100 call might not return).
2102 There used to be code which continued past the target label if
2103 we saw all uses of the target label. This code did not work,
2104 because it failed to account for some instructions which were
2105 both annulled and marked as from the target. This can happen as a
2106 result of optimize_skip. Since this code was redundant with
2107 fill_eager_delay_slots anyways, it was just deleted. */
2109 if (slots_filled != slots_to_fill
2110 /* If this instruction could throw an exception which is
2111 caught in the same function, then it's not safe to fill
2112 the delay slot with an instruction from beyond this
2113 point. For example, consider:
2115 int i = 2;
2117 try {
2118 f();
2119 i = 3;
2120 } catch (...) {}
2122 return i;
2124 Even though `i' is a local variable, we must be sure not
2125 to put `i = 3' in the delay slot if `f' might throw an
2126 exception.
2128 Presumably, we should also check to see if we could get
2129 back to this function via `setjmp'. */
2130 && ! can_throw_internal (insn)
2131 && !JUMP_P (insn))
2133 int maybe_never = 0;
2134 rtx pat, trial_delay;
2136 CLEAR_RESOURCE (&needed);
2137 CLEAR_RESOURCE (&set);
2138 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
2139 mark_referenced_resources (insn, &needed, true);
2141 if (CALL_P (insn))
2142 maybe_never = 1;
2144 for (trial = next_nonnote_insn (insn); !stop_search_p (trial, 1);
2145 trial = next_trial)
2147 next_trial = next_nonnote_insn (trial);
2149 /* This must be an INSN or CALL_INSN. */
2150 pat = PATTERN (trial);
2152 /* Stand-alone USE and CLOBBER are just for flow. */
2153 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2154 continue;
2156 /* If this already has filled delay slots, get the insn needing
2157 the delay slots. */
2158 if (GET_CODE (pat) == SEQUENCE)
2159 trial_delay = XVECEXP (pat, 0, 0);
2160 else
2161 trial_delay = trial;
2163 /* Stop our search when seeing a jump. */
2164 if (JUMP_P (trial_delay))
2165 break;
2167 /* See if we have a resource problem before we try to split. */
2168 if (GET_CODE (pat) != SEQUENCE
2169 && ! insn_references_resource_p (trial, &set, true)
2170 && ! insn_sets_resource_p (trial, &set, true)
2171 && ! insn_sets_resource_p (trial, &needed, true)
2172 && (!HAVE_cc0 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat)))
2173 && ! (maybe_never && may_trap_or_fault_p (pat))
2174 && (trial = try_split (pat, trial, 0))
2175 && eligible_for_delay (insn, slots_filled, trial, flags)
2176 && ! can_throw_internal (trial))
2178 next_trial = next_nonnote_insn (trial);
2179 add_to_delay_list (trial, &delay_list);
2180 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, pat))
2181 link_cc0_insns (trial);
2183 delete_related_insns (trial);
2184 if (slots_to_fill == ++slots_filled)
2185 break;
2186 continue;
2189 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2190 mark_referenced_resources (trial, &needed, true);
2192 /* Ensure we don't put insns between the setting of cc and the
2193 comparison by moving a setting of cc into an earlier delay
2194 slot since these insns could clobber the condition code. */
2195 set.cc = 1;
2197 /* If this is a call, we might not get here. */
2198 if (CALL_P (trial_delay))
2199 maybe_never = 1;
2202 /* If there are slots left to fill and our search was stopped by an
2203 unconditional branch, try the insn at the branch target. We can
2204 redirect the branch if it works.
2206 Don't do this if the insn at the branch target is a branch. */
2207 if (slots_to_fill != slots_filled
2208 && trial
2209 && jump_to_label_p (trial)
2210 && simplejump_p (trial)
2211 && (next_trial = next_active_insn (JUMP_LABEL (trial))) != 0
2212 && ! (NONJUMP_INSN_P (next_trial)
2213 && GET_CODE (PATTERN (next_trial)) == SEQUENCE)
2214 && !JUMP_P (next_trial)
2215 && ! insn_references_resource_p (next_trial, &set, true)
2216 && ! insn_sets_resource_p (next_trial, &set, true)
2217 && ! insn_sets_resource_p (next_trial, &needed, true)
2218 && (!HAVE_cc0 || ! reg_mentioned_p (cc0_rtx, PATTERN (next_trial)))
2219 && ! (maybe_never && may_trap_or_fault_p (PATTERN (next_trial)))
2220 && (next_trial = try_split (PATTERN (next_trial), next_trial, 0))
2221 && eligible_for_delay (insn, slots_filled, next_trial, flags)
2222 && ! can_throw_internal (trial))
2224 /* See comment in relax_delay_slots about necessity of using
2225 next_real_insn here. */
2226 rtx_insn *new_label = next_real_insn (next_trial);
2228 if (new_label != 0)
2229 new_label = get_label_before (new_label, JUMP_LABEL (trial));
2230 else
2231 new_label = find_end_label (simple_return_rtx);
2233 if (new_label)
2235 add_to_delay_list (copy_delay_slot_insn (next_trial),
2236 &delay_list);
2237 slots_filled++;
2238 reorg_redirect_jump (as_a <rtx_jump_insn *> (trial),
2239 new_label);
2244 /* If this is an unconditional jump, then try to get insns from the
2245 target of the jump. */
2246 rtx_jump_insn *jump_insn;
2247 if ((jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2248 && simplejump_p (jump_insn)
2249 && slots_filled != slots_to_fill)
2250 fill_slots_from_thread (jump_insn, const_true_rtx,
2251 next_active_insn (JUMP_LABEL (insn)), NULL, 1,
2252 1, own_thread_p (JUMP_LABEL (insn),
2253 JUMP_LABEL (insn), 0),
2254 slots_to_fill, &slots_filled, &delay_list);
2256 if (!delay_list.is_empty ())
2257 unfilled_slots_base[i]
2258 = emit_delay_sequence (insn, delay_list, slots_filled);
2260 if (slots_to_fill == slots_filled)
2261 unfilled_slots_base[i] = 0;
2263 note_delay_statistics (slots_filled, 0);
2267 /* Follow any unconditional jump at LABEL, for the purpose of redirecting JUMP;
2268 return the ultimate label reached by any such chain of jumps.
2269 Return a suitable return rtx if the chain ultimately leads to a
2270 return instruction.
2271 If LABEL is not followed by a jump, return LABEL.
2272 If the chain loops or we can't find end, return LABEL,
2273 since that tells caller to avoid changing the insn.
2274 If the returned label is obtained by following a crossing jump,
2275 set *CROSSING to true, otherwise set it to false. */
2277 static rtx
2278 follow_jumps (rtx label, rtx_insn *jump, bool *crossing)
2280 rtx_insn *insn;
2281 rtx_insn *next;
2282 int depth;
2284 *crossing = false;
2285 if (ANY_RETURN_P (label))
2286 return label;
2288 rtx_insn *value = as_a <rtx_insn *> (label);
2290 for (depth = 0;
2291 (depth < 10
2292 && (insn = next_active_insn (value)) != 0
2293 && JUMP_P (insn)
2294 && JUMP_LABEL (insn) != NULL_RTX
2295 && ((any_uncondjump_p (insn) && onlyjump_p (insn))
2296 || ANY_RETURN_P (PATTERN (insn)))
2297 && (next = NEXT_INSN (insn))
2298 && BARRIER_P (next));
2299 depth++)
2301 rtx this_label_or_return = JUMP_LABEL (insn);
2303 /* If we have found a cycle, make the insn jump to itself. */
2304 if (this_label_or_return == label)
2305 return label;
2307 /* Cannot follow returns and cannot look through tablejumps. */
2308 if (ANY_RETURN_P (this_label_or_return))
2309 return this_label_or_return;
2311 rtx_insn *this_label = as_a <rtx_insn *> (this_label_or_return);
2312 if (NEXT_INSN (this_label)
2313 && JUMP_TABLE_DATA_P (NEXT_INSN (this_label)))
2314 break;
2316 if (!targetm.can_follow_jump (jump, insn))
2317 break;
2318 if (!*crossing)
2319 *crossing = CROSSING_JUMP_P (jump);
2320 value = this_label;
2322 if (depth == 10)
2323 return label;
2324 return value;
2327 /* Try to find insns to place in delay slots.
2329 INSN is the jump needing SLOTS_TO_FILL delay slots. It tests CONDITION
2330 or is an unconditional branch if CONDITION is const_true_rtx.
2331 *PSLOTS_FILLED is updated with the number of slots that we have filled.
2333 THREAD is a flow-of-control, either the insns to be executed if the
2334 branch is true or if the branch is false, THREAD_IF_TRUE says which.
2336 OPPOSITE_THREAD is the thread in the opposite direction. It is used
2337 to see if any potential delay slot insns set things needed there.
2339 LIKELY is nonzero if it is extremely likely that the branch will be
2340 taken and THREAD_IF_TRUE is set. This is used for the branch at the
2341 end of a loop back up to the top.
2343 OWN_THREAD and OWN_OPPOSITE_THREAD are true if we are the only user of the
2344 thread. I.e., it is the fallthrough code of our jump or the target of the
2345 jump when we are the only jump going there.
2347 If OWN_THREAD is false, it must be the "true" thread of a jump. In that
2348 case, we can only take insns from the head of the thread for our delay
2349 slot. We then adjust the jump to point after the insns we have taken. */
2351 static void
2352 fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
2353 rtx thread_or_return, rtx opposite_thread, int likely,
2354 int thread_if_true, int own_thread, int slots_to_fill,
2355 int *pslots_filled, vec<rtx_insn *> *delay_list)
2357 rtx new_thread;
2358 struct resources opposite_needed, set, needed;
2359 rtx_insn *trial;
2360 int lose = 0;
2361 int must_annul = 0;
2362 int flags;
2364 /* Validate our arguments. */
2365 gcc_assert (condition != const_true_rtx || thread_if_true);
2366 gcc_assert (own_thread || thread_if_true);
2368 flags = get_jump_flags (insn, JUMP_LABEL (insn));
2370 /* If our thread is the end of subroutine, we can't get any delay
2371 insns from that. */
2372 if (thread_or_return == NULL_RTX || ANY_RETURN_P (thread_or_return))
2373 return;
2375 rtx_insn *thread = as_a <rtx_insn *> (thread_or_return);
2377 /* If this is an unconditional branch, nothing is needed at the
2378 opposite thread. Otherwise, compute what is needed there. */
2379 if (condition == const_true_rtx)
2380 CLEAR_RESOURCE (&opposite_needed);
2381 else
2382 mark_target_live_regs (get_insns (), opposite_thread, &opposite_needed);
2384 /* If the insn at THREAD can be split, do it here to avoid having to
2385 update THREAD and NEW_THREAD if it is done in the loop below. Also
2386 initialize NEW_THREAD. */
2388 new_thread = thread = try_split (PATTERN (thread), thread, 0);
2390 /* Scan insns at THREAD. We are looking for an insn that can be removed
2391 from THREAD (it neither sets nor references resources that were set
2392 ahead of it and it doesn't set anything needs by the insns ahead of
2393 it) and that either can be placed in an annulling insn or aren't
2394 needed at OPPOSITE_THREAD. */
2396 CLEAR_RESOURCE (&needed);
2397 CLEAR_RESOURCE (&set);
2399 /* If we do not own this thread, we must stop as soon as we find
2400 something that we can't put in a delay slot, since all we can do
2401 is branch into THREAD at a later point. Therefore, labels stop
2402 the search if this is not the `true' thread. */
2404 for (trial = thread;
2405 ! stop_search_p (trial, ! thread_if_true) && (! lose || own_thread);
2406 trial = next_nonnote_insn (trial))
2408 rtx pat, old_trial;
2410 /* If we have passed a label, we no longer own this thread. */
2411 if (LABEL_P (trial))
2413 own_thread = 0;
2414 continue;
2417 pat = PATTERN (trial);
2418 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2419 continue;
2421 /* If TRIAL conflicts with the insns ahead of it, we lose. Also,
2422 don't separate or copy insns that set and use CC0. */
2423 if (! insn_references_resource_p (trial, &set, true)
2424 && ! insn_sets_resource_p (trial, &set, true)
2425 && ! insn_sets_resource_p (trial, &needed, true)
2426 && (!HAVE_cc0 || (! (reg_mentioned_p (cc0_rtx, pat)
2427 && (! own_thread || ! sets_cc0_p (pat)))))
2428 && ! can_throw_internal (trial))
2430 rtx prior_insn;
2432 /* If TRIAL is redundant with some insn before INSN, we don't
2433 actually need to add it to the delay list; we can merely pretend
2434 we did. */
2435 if ((prior_insn = redundant_insn (trial, insn, *delay_list)))
2437 fix_reg_dead_note (prior_insn, insn);
2438 if (own_thread)
2440 update_block (trial, thread);
2441 if (trial == thread)
2443 thread = next_active_insn (thread);
2444 if (new_thread == trial)
2445 new_thread = thread;
2448 delete_related_insns (trial);
2450 else
2452 update_reg_unused_notes (prior_insn, trial);
2453 new_thread = next_active_insn (trial);
2456 continue;
2459 /* There are two ways we can win: If TRIAL doesn't set anything
2460 needed at the opposite thread and can't trap, or if it can
2461 go into an annulled delay slot. But we want neither to copy
2462 nor to speculate frame-related insns. */
2463 if (!must_annul
2464 && ((condition == const_true_rtx
2465 && (own_thread || !RTX_FRAME_RELATED_P (trial)))
2466 || (! insn_sets_resource_p (trial, &opposite_needed, true)
2467 && ! may_trap_or_fault_p (pat)
2468 && ! RTX_FRAME_RELATED_P (trial))))
2470 old_trial = trial;
2471 trial = try_split (pat, trial, 0);
2472 if (new_thread == old_trial)
2473 new_thread = trial;
2474 if (thread == old_trial)
2475 thread = trial;
2476 pat = PATTERN (trial);
2477 if (eligible_for_delay (insn, *pslots_filled, trial, flags))
2478 goto winner;
2480 else if (0
2481 || (ANNUL_IFTRUE_SLOTS && ! thread_if_true)
2482 || (ANNUL_IFFALSE_SLOTS && thread_if_true))
2484 old_trial = trial;
2485 trial = try_split (pat, trial, 0);
2486 if (new_thread == old_trial)
2487 new_thread = trial;
2488 if (thread == old_trial)
2489 thread = trial;
2490 pat = PATTERN (trial);
2491 if ((must_annul || delay_list->is_empty ()) && (thread_if_true
2492 ? check_annul_list_true_false (0, *delay_list)
2493 && eligible_for_annul_false (insn, *pslots_filled, trial, flags)
2494 : check_annul_list_true_false (1, *delay_list)
2495 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
2497 rtx_insn *temp;
2499 must_annul = 1;
2500 winner:
2502 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, pat))
2503 link_cc0_insns (trial);
2505 /* If we own this thread, delete the insn. If this is the
2506 destination of a branch, show that a basic block status
2507 may have been updated. In any case, mark the new
2508 starting point of this thread. */
2509 if (own_thread)
2511 rtx note;
2513 update_block (trial, thread);
2514 if (trial == thread)
2516 thread = next_active_insn (thread);
2517 if (new_thread == trial)
2518 new_thread = thread;
2521 /* We are moving this insn, not deleting it. We must
2522 temporarily increment the use count on any referenced
2523 label lest it be deleted by delete_related_insns. */
2524 for (note = REG_NOTES (trial);
2525 note != NULL_RTX;
2526 note = XEXP (note, 1))
2527 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2528 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2530 /* REG_LABEL_OPERAND could be
2531 NOTE_INSN_DELETED_LABEL too. */
2532 if (LABEL_P (XEXP (note, 0)))
2533 LABEL_NUSES (XEXP (note, 0))++;
2534 else
2535 gcc_assert (REG_NOTE_KIND (note)
2536 == REG_LABEL_OPERAND);
2538 if (jump_to_label_p (trial))
2539 LABEL_NUSES (JUMP_LABEL (trial))++;
2541 delete_related_insns (trial);
2543 for (note = REG_NOTES (trial);
2544 note != NULL_RTX;
2545 note = XEXP (note, 1))
2546 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2547 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2549 /* REG_LABEL_OPERAND could be
2550 NOTE_INSN_DELETED_LABEL too. */
2551 if (LABEL_P (XEXP (note, 0)))
2552 LABEL_NUSES (XEXP (note, 0))--;
2553 else
2554 gcc_assert (REG_NOTE_KIND (note)
2555 == REG_LABEL_OPERAND);
2557 if (jump_to_label_p (trial))
2558 LABEL_NUSES (JUMP_LABEL (trial))--;
2560 else
2561 new_thread = next_active_insn (trial);
2563 temp = own_thread ? trial : copy_delay_slot_insn (trial);
2564 if (thread_if_true)
2565 INSN_FROM_TARGET_P (temp) = 1;
2567 add_to_delay_list (temp, delay_list);
2569 if (slots_to_fill == ++(*pslots_filled))
2571 /* Even though we have filled all the slots, we
2572 may be branching to a location that has a
2573 redundant insn. Skip any if so. */
2574 while (new_thread && ! own_thread
2575 && ! insn_sets_resource_p (new_thread, &set, true)
2576 && ! insn_sets_resource_p (new_thread, &needed,
2577 true)
2578 && ! insn_references_resource_p (new_thread,
2579 &set, true)
2580 && (prior_insn
2581 = redundant_insn (new_thread, insn,
2582 *delay_list)))
2584 /* We know we do not own the thread, so no need
2585 to call update_block and delete_insn. */
2586 fix_reg_dead_note (prior_insn, insn);
2587 update_reg_unused_notes (prior_insn, new_thread);
2588 new_thread = next_active_insn (new_thread);
2590 break;
2593 continue;
2598 /* This insn can't go into a delay slot. */
2599 lose = 1;
2600 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2601 mark_referenced_resources (trial, &needed, true);
2603 /* Ensure we don't put insns between the setting of cc and the comparison
2604 by moving a setting of cc into an earlier delay slot since these insns
2605 could clobber the condition code. */
2606 set.cc = 1;
2608 /* If this insn is a register-register copy and the next insn has
2609 a use of our destination, change it to use our source. That way,
2610 it will become a candidate for our delay slot the next time
2611 through this loop. This case occurs commonly in loops that
2612 scan a list.
2614 We could check for more complex cases than those tested below,
2615 but it doesn't seem worth it. It might also be a good idea to try
2616 to swap the two insns. That might do better.
2618 We can't do this if the next insn modifies our destination, because
2619 that would make the replacement into the insn invalid. We also can't
2620 do this if it modifies our source, because it might be an earlyclobber
2621 operand. This latter test also prevents updating the contents of
2622 a PRE_INC. We also can't do this if there's overlap of source and
2623 destination. Overlap may happen for larger-than-register-size modes. */
2625 if (NONJUMP_INSN_P (trial) && GET_CODE (pat) == SET
2626 && REG_P (SET_SRC (pat))
2627 && REG_P (SET_DEST (pat))
2628 && !reg_overlap_mentioned_p (SET_DEST (pat), SET_SRC (pat)))
2630 rtx_insn *next = next_nonnote_insn (trial);
2632 if (next && NONJUMP_INSN_P (next)
2633 && GET_CODE (PATTERN (next)) != USE
2634 && ! reg_set_p (SET_DEST (pat), next)
2635 && ! reg_set_p (SET_SRC (pat), next)
2636 && reg_referenced_p (SET_DEST (pat), PATTERN (next))
2637 && ! modified_in_p (SET_DEST (pat), next))
2638 validate_replace_rtx (SET_DEST (pat), SET_SRC (pat), next);
2642 /* If we stopped on a branch insn that has delay slots, see if we can
2643 steal some of the insns in those slots. */
2644 if (trial && NONJUMP_INSN_P (trial)
2645 && GET_CODE (PATTERN (trial)) == SEQUENCE
2646 && JUMP_P (XVECEXP (PATTERN (trial), 0, 0)))
2648 rtx_sequence *sequence = as_a <rtx_sequence *> (PATTERN (trial));
2649 /* If this is the `true' thread, we will want to follow the jump,
2650 so we can only do this if we have taken everything up to here. */
2651 if (thread_if_true && trial == new_thread)
2653 steal_delay_list_from_target (insn, condition, sequence,
2654 delay_list, &set, &needed,
2655 &opposite_needed, slots_to_fill,
2656 pslots_filled, &must_annul,
2657 &new_thread);
2658 /* If we owned the thread and are told that it branched
2659 elsewhere, make sure we own the thread at the new location. */
2660 if (own_thread && trial != new_thread)
2661 own_thread = own_thread_p (new_thread, new_thread, 0);
2663 else if (! thread_if_true)
2664 steal_delay_list_from_fallthrough (insn, condition, sequence,
2665 delay_list, &set, &needed,
2666 &opposite_needed, slots_to_fill,
2667 pslots_filled, &must_annul);
2670 /* If we haven't found anything for this delay slot and it is very
2671 likely that the branch will be taken, see if the insn at our target
2672 increments or decrements a register with an increment that does not
2673 depend on the destination register. If so, try to place the opposite
2674 arithmetic insn after the jump insn and put the arithmetic insn in the
2675 delay slot. If we can't do this, return. */
2676 if (delay_list->is_empty () && likely
2677 && new_thread && !ANY_RETURN_P (new_thread)
2678 && NONJUMP_INSN_P (new_thread)
2679 && !RTX_FRAME_RELATED_P (new_thread)
2680 && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
2681 && asm_noperands (PATTERN (new_thread)) < 0)
2683 rtx pat = PATTERN (new_thread);
2684 rtx dest;
2685 rtx src;
2687 /* We know "new_thread" is an insn due to NONJUMP_INSN_P (new_thread)
2688 above. */
2689 trial = as_a <rtx_insn *> (new_thread);
2690 pat = PATTERN (trial);
2692 if (!NONJUMP_INSN_P (trial)
2693 || GET_CODE (pat) != SET
2694 || ! eligible_for_delay (insn, 0, trial, flags)
2695 || can_throw_internal (trial))
2696 return;
2698 dest = SET_DEST (pat), src = SET_SRC (pat);
2699 if ((GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
2700 && rtx_equal_p (XEXP (src, 0), dest)
2701 && (!FLOAT_MODE_P (GET_MODE (src))
2702 || flag_unsafe_math_optimizations)
2703 && ! reg_overlap_mentioned_p (dest, XEXP (src, 1))
2704 && ! side_effects_p (pat))
2706 rtx other = XEXP (src, 1);
2707 rtx new_arith;
2708 rtx_insn *ninsn;
2710 /* If this is a constant adjustment, use the same code with
2711 the negated constant. Otherwise, reverse the sense of the
2712 arithmetic. */
2713 if (CONST_INT_P (other))
2714 new_arith = gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src), dest,
2715 negate_rtx (GET_MODE (src), other));
2716 else
2717 new_arith = gen_rtx_fmt_ee (GET_CODE (src) == PLUS ? MINUS : PLUS,
2718 GET_MODE (src), dest, other);
2720 ninsn = emit_insn_after (gen_rtx_SET (dest, new_arith), insn);
2722 if (recog_memoized (ninsn) < 0
2723 || (extract_insn (ninsn),
2724 !constrain_operands (1, get_preferred_alternatives (ninsn))))
2726 delete_related_insns (ninsn);
2727 return;
2730 if (own_thread)
2732 update_block (trial, thread);
2733 if (trial == thread)
2735 thread = next_active_insn (thread);
2736 if (new_thread == trial)
2737 new_thread = thread;
2739 delete_related_insns (trial);
2741 else
2742 new_thread = next_active_insn (trial);
2744 ninsn = own_thread ? trial : copy_delay_slot_insn (trial);
2745 if (thread_if_true)
2746 INSN_FROM_TARGET_P (ninsn) = 1;
2748 add_to_delay_list (ninsn, delay_list);
2749 (*pslots_filled)++;
2753 if (!delay_list->is_empty () && must_annul)
2754 INSN_ANNULLED_BRANCH_P (insn) = 1;
2756 /* If we are to branch into the middle of this thread, find an appropriate
2757 label or make a new one if none, and redirect INSN to it. If we hit the
2758 end of the function, use the end-of-function label. */
2759 if (new_thread != thread)
2761 rtx label;
2762 bool crossing = false;
2764 gcc_assert (thread_if_true);
2766 if (new_thread && simplejump_or_return_p (new_thread)
2767 && redirect_with_delay_list_safe_p (insn,
2768 JUMP_LABEL (new_thread),
2769 *delay_list))
2770 new_thread = follow_jumps (JUMP_LABEL (new_thread), insn,
2771 &crossing);
2773 if (ANY_RETURN_P (new_thread))
2774 label = find_end_label (new_thread);
2775 else if (LABEL_P (new_thread))
2776 label = new_thread;
2777 else
2778 label = get_label_before (as_a <rtx_insn *> (new_thread),
2779 JUMP_LABEL (insn));
2781 if (label)
2783 reorg_redirect_jump (insn, label);
2784 if (crossing)
2785 CROSSING_JUMP_P (insn) = 1;
2790 /* Make another attempt to find insns to place in delay slots.
2792 We previously looked for insns located in front of the delay insn
2793 and, for non-jump delay insns, located behind the delay insn.
2795 Here only try to schedule jump insns and try to move insns from either
2796 the target or the following insns into the delay slot. If annulling is
2797 supported, we will be likely to do this. Otherwise, we can do this only
2798 if safe. */
2800 static void
2801 fill_eager_delay_slots (void)
2803 rtx_insn *insn;
2804 int i;
2805 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
2807 for (i = 0; i < num_unfilled_slots; i++)
2809 rtx condition;
2810 rtx target_label, insn_at_target;
2811 rtx_insn *fallthrough_insn;
2812 auto_vec<rtx_insn *, 5> delay_list;
2813 rtx_jump_insn *jump_insn;
2814 int own_target;
2815 int own_fallthrough;
2816 int prediction, slots_to_fill, slots_filled;
2818 insn = unfilled_slots_base[i];
2819 if (insn == 0
2820 || insn->deleted ()
2821 || ! (jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2822 || ! (condjump_p (jump_insn) || condjump_in_parallel_p (jump_insn)))
2823 continue;
2825 slots_to_fill = num_delay_slots (jump_insn);
2826 /* Some machine description have defined instructions to have
2827 delay slots only in certain circumstances which may depend on
2828 nearby insns (which change due to reorg's actions).
2830 For example, the PA port normally has delay slots for unconditional
2831 jumps.
2833 However, the PA port claims such jumps do not have a delay slot
2834 if they are immediate successors of certain CALL_INSNs. This
2835 allows the port to favor filling the delay slot of the call with
2836 the unconditional jump. */
2837 if (slots_to_fill == 0)
2838 continue;
2840 slots_filled = 0;
2841 target_label = JUMP_LABEL (jump_insn);
2842 condition = get_branch_condition (jump_insn, target_label);
2844 if (condition == 0)
2845 continue;
2847 /* Get the next active fallthrough and target insns and see if we own
2848 them. Then see whether the branch is likely true. We don't need
2849 to do a lot of this for unconditional branches. */
2851 insn_at_target = first_active_target_insn (target_label);
2852 own_target = own_thread_p (target_label, target_label, 0);
2854 if (condition == const_true_rtx)
2856 own_fallthrough = 0;
2857 fallthrough_insn = 0;
2858 prediction = 2;
2860 else
2862 fallthrough_insn = next_active_insn (jump_insn);
2863 own_fallthrough = own_thread_p (NEXT_INSN (jump_insn), NULL_RTX, 1);
2864 prediction = mostly_true_jump (jump_insn);
2867 /* If this insn is expected to branch, first try to get insns from our
2868 target, then our fallthrough insns. If it is not expected to branch,
2869 try the other order. */
2871 if (prediction > 0)
2873 fill_slots_from_thread (jump_insn, condition, insn_at_target,
2874 fallthrough_insn, prediction == 2, 1,
2875 own_target,
2876 slots_to_fill, &slots_filled, &delay_list);
2878 if (delay_list.is_empty () && own_fallthrough)
2880 /* Even though we didn't find anything for delay slots,
2881 we might have found a redundant insn which we deleted
2882 from the thread that was filled. So we have to recompute
2883 the next insn at the target. */
2884 target_label = JUMP_LABEL (jump_insn);
2885 insn_at_target = first_active_target_insn (target_label);
2887 fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2888 insn_at_target, 0, 0, own_fallthrough,
2889 slots_to_fill, &slots_filled,
2890 &delay_list);
2893 else
2895 if (own_fallthrough)
2896 fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2897 insn_at_target, 0, 0, own_fallthrough,
2898 slots_to_fill, &slots_filled, &delay_list);
2900 if (delay_list.is_empty ())
2901 fill_slots_from_thread (jump_insn, condition, insn_at_target,
2902 next_active_insn (insn), 0, 1, own_target,
2903 slots_to_fill, &slots_filled, &delay_list);
2906 if (!delay_list.is_empty ())
2907 unfilled_slots_base[i]
2908 = emit_delay_sequence (jump_insn, delay_list, slots_filled);
2910 if (slots_to_fill == slots_filled)
2911 unfilled_slots_base[i] = 0;
2913 note_delay_statistics (slots_filled, 1);
2917 static void delete_computation (rtx insn);
2919 /* Recursively delete prior insns that compute the value (used only by INSN
2920 which the caller is deleting) stored in the register mentioned by NOTE
2921 which is a REG_DEAD note associated with INSN. */
2923 static void
2924 delete_prior_computation (rtx note, rtx insn)
2926 rtx our_prev;
2927 rtx reg = XEXP (note, 0);
2929 for (our_prev = prev_nonnote_insn (insn);
2930 our_prev && (NONJUMP_INSN_P (our_prev)
2931 || CALL_P (our_prev));
2932 our_prev = prev_nonnote_insn (our_prev))
2934 rtx pat = PATTERN (our_prev);
2936 /* If we reach a CALL which is not calling a const function
2937 or the callee pops the arguments, then give up. */
2938 if (CALL_P (our_prev)
2939 && (! RTL_CONST_CALL_P (our_prev)
2940 || GET_CODE (pat) != SET || GET_CODE (SET_SRC (pat)) != CALL))
2941 break;
2943 /* If we reach a SEQUENCE, it is too complex to try to
2944 do anything with it, so give up. We can be run during
2945 and after reorg, so SEQUENCE rtl can legitimately show
2946 up here. */
2947 if (GET_CODE (pat) == SEQUENCE)
2948 break;
2950 if (GET_CODE (pat) == USE
2951 && NONJUMP_INSN_P (XEXP (pat, 0)))
2952 /* reorg creates USEs that look like this. We leave them
2953 alone because reorg needs them for its own purposes. */
2954 break;
2956 if (reg_set_p (reg, pat))
2958 if (side_effects_p (pat) && !CALL_P (our_prev))
2959 break;
2961 if (GET_CODE (pat) == PARALLEL)
2963 /* If we find a SET of something else, we can't
2964 delete the insn. */
2966 int i;
2968 for (i = 0; i < XVECLEN (pat, 0); i++)
2970 rtx part = XVECEXP (pat, 0, i);
2972 if (GET_CODE (part) == SET
2973 && SET_DEST (part) != reg)
2974 break;
2977 if (i == XVECLEN (pat, 0))
2978 delete_computation (our_prev);
2980 else if (GET_CODE (pat) == SET
2981 && REG_P (SET_DEST (pat)))
2983 int dest_regno = REGNO (SET_DEST (pat));
2984 int dest_endregno = END_REGNO (SET_DEST (pat));
2985 int regno = REGNO (reg);
2986 int endregno = END_REGNO (reg);
2988 if (dest_regno >= regno
2989 && dest_endregno <= endregno)
2990 delete_computation (our_prev);
2992 /* We may have a multi-word hard register and some, but not
2993 all, of the words of the register are needed in subsequent
2994 insns. Write REG_UNUSED notes for those parts that were not
2995 needed. */
2996 else if (dest_regno <= regno
2997 && dest_endregno >= endregno)
2999 int i;
3001 add_reg_note (our_prev, REG_UNUSED, reg);
3003 for (i = dest_regno; i < dest_endregno; i++)
3004 if (! find_regno_note (our_prev, REG_UNUSED, i))
3005 break;
3007 if (i == dest_endregno)
3008 delete_computation (our_prev);
3012 break;
3015 /* If PAT references the register that dies here, it is an
3016 additional use. Hence any prior SET isn't dead. However, this
3017 insn becomes the new place for the REG_DEAD note. */
3018 if (reg_overlap_mentioned_p (reg, pat))
3020 XEXP (note, 1) = REG_NOTES (our_prev);
3021 REG_NOTES (our_prev) = note;
3022 break;
3027 /* Delete INSN and recursively delete insns that compute values used only
3028 by INSN. This uses the REG_DEAD notes computed during flow analysis.
3030 Look at all our REG_DEAD notes. If a previous insn does nothing other
3031 than set a register that dies in this insn, we can delete that insn
3032 as well.
3034 On machines with CC0, if CC0 is used in this insn, we may be able to
3035 delete the insn that set it. */
3037 static void
3038 delete_computation (rtx insn)
3040 rtx note, next;
3042 if (HAVE_cc0 && reg_referenced_p (cc0_rtx, PATTERN (insn)))
3044 rtx_insn *prev = prev_nonnote_insn (insn);
3045 /* We assume that at this stage
3046 CC's are always set explicitly
3047 and always immediately before the jump that
3048 will use them. So if the previous insn
3049 exists to set the CC's, delete it
3050 (unless it performs auto-increments, etc.). */
3051 if (prev && NONJUMP_INSN_P (prev)
3052 && sets_cc0_p (PATTERN (prev)))
3054 if (sets_cc0_p (PATTERN (prev)) > 0
3055 && ! side_effects_p (PATTERN (prev)))
3056 delete_computation (prev);
3057 else
3058 /* Otherwise, show that cc0 won't be used. */
3059 add_reg_note (prev, REG_UNUSED, cc0_rtx);
3063 for (note = REG_NOTES (insn); note; note = next)
3065 next = XEXP (note, 1);
3067 if (REG_NOTE_KIND (note) != REG_DEAD
3068 /* Verify that the REG_NOTE is legitimate. */
3069 || !REG_P (XEXP (note, 0)))
3070 continue;
3072 delete_prior_computation (note, insn);
3075 delete_related_insns (insn);
3078 /* If all INSN does is set the pc, delete it,
3079 and delete the insn that set the condition codes for it
3080 if that's what the previous thing was. */
3082 static void
3083 delete_jump (rtx_insn *insn)
3085 rtx set = single_set (insn);
3087 if (set && GET_CODE (SET_DEST (set)) == PC)
3088 delete_computation (insn);
3091 static rtx_insn *
3092 label_before_next_insn (rtx x, rtx scan_limit)
3094 rtx_insn *insn = next_active_insn (x);
3095 while (insn)
3097 insn = PREV_INSN (insn);
3098 if (insn == scan_limit || insn == NULL_RTX)
3099 return NULL;
3100 if (LABEL_P (insn))
3101 break;
3103 return insn;
3106 /* Return TRUE if there is a NOTE_INSN_SWITCH_TEXT_SECTIONS note in between
3107 BEG and END. */
3109 static bool
3110 switch_text_sections_between_p (const rtx_insn *beg, const rtx_insn *end)
3112 const rtx_insn *p;
3113 for (p = beg; p != end; p = NEXT_INSN (p))
3114 if (NOTE_P (p) && NOTE_KIND (p) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
3115 return true;
3116 return false;
3120 /* Once we have tried two ways to fill a delay slot, make a pass over the
3121 code to try to improve the results and to do such things as more jump
3122 threading. */
3124 static void
3125 relax_delay_slots (rtx_insn *first)
3127 rtx_insn *insn, *next;
3128 rtx_sequence *pat;
3129 rtx trial;
3130 rtx_insn *delay_insn;
3131 rtx target_label;
3133 /* Look at every JUMP_INSN and see if we can improve it. */
3134 for (insn = first; insn; insn = next)
3136 rtx_insn *other;
3137 bool crossing;
3139 next = next_active_insn (insn);
3141 /* If this is a jump insn, see if it now jumps to a jump, jumps to
3142 the next insn, or jumps to a label that is not the last of a
3143 group of consecutive labels. */
3144 if (is_a <rtx_jump_insn *> (insn)
3145 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3146 && !ANY_RETURN_P (target_label = JUMP_LABEL (insn)))
3148 rtx_jump_insn *jump_insn = as_a <rtx_jump_insn *> (insn);
3149 target_label
3150 = skip_consecutive_labels (follow_jumps (target_label, jump_insn,
3151 &crossing));
3152 if (ANY_RETURN_P (target_label))
3153 target_label = find_end_label (target_label);
3155 if (target_label && next_active_insn (target_label) == next
3156 && ! condjump_in_parallel_p (jump_insn)
3157 && ! (next && switch_text_sections_between_p (jump_insn, next)))
3159 delete_jump (jump_insn);
3160 continue;
3163 if (target_label && target_label != JUMP_LABEL (jump_insn))
3165 reorg_redirect_jump (jump_insn, target_label);
3166 if (crossing)
3167 CROSSING_JUMP_P (jump_insn) = 1;
3170 /* See if this jump conditionally branches around an unconditional
3171 jump. If so, invert this jump and point it to the target of the
3172 second jump. Check if it's possible on the target. */
3173 if (next && simplejump_or_return_p (next)
3174 && any_condjump_p (jump_insn)
3175 && target_label
3176 && next_active_insn (target_label) == next_active_insn (next)
3177 && no_labels_between_p (jump_insn, next)
3178 && targetm.can_follow_jump (jump_insn, next))
3180 rtx label = JUMP_LABEL (next);
3182 /* Be careful how we do this to avoid deleting code or
3183 labels that are momentarily dead. See similar optimization
3184 in jump.c.
3186 We also need to ensure we properly handle the case when
3187 invert_jump fails. */
3189 ++LABEL_NUSES (target_label);
3190 if (!ANY_RETURN_P (label))
3191 ++LABEL_NUSES (label);
3193 if (invert_jump (jump_insn, label, 1))
3195 delete_related_insns (next);
3196 next = jump_insn;
3199 if (!ANY_RETURN_P (label))
3200 --LABEL_NUSES (label);
3202 if (--LABEL_NUSES (target_label) == 0)
3203 delete_related_insns (target_label);
3205 continue;
3209 /* If this is an unconditional jump and the previous insn is a
3210 conditional jump, try reversing the condition of the previous
3211 insn and swapping our targets. The next pass might be able to
3212 fill the slots.
3214 Don't do this if we expect the conditional branch to be true, because
3215 we would then be making the more common case longer. */
3217 if (simplejump_or_return_p (insn)
3218 && (other = prev_active_insn (insn)) != 0
3219 && any_condjump_p (other)
3220 && no_labels_between_p (other, insn)
3221 && 0 > mostly_true_jump (other))
3223 rtx other_target = JUMP_LABEL (other);
3224 target_label = JUMP_LABEL (insn);
3226 if (invert_jump (as_a <rtx_jump_insn *> (other), target_label, 0))
3227 reorg_redirect_jump (as_a <rtx_jump_insn *> (insn), other_target);
3230 /* Now look only at cases where we have a filled delay slot. */
3231 if (!NONJUMP_INSN_P (insn) || GET_CODE (PATTERN (insn)) != SEQUENCE)
3232 continue;
3234 pat = as_a <rtx_sequence *> (PATTERN (insn));
3235 delay_insn = pat->insn (0);
3237 /* See if the first insn in the delay slot is redundant with some
3238 previous insn. Remove it from the delay slot if so; then set up
3239 to reprocess this insn. */
3240 if (redundant_insn (pat->insn (1), delay_insn, vNULL))
3242 update_block (pat->insn (1), insn);
3243 delete_from_delay_slot (pat->insn (1));
3244 next = prev_active_insn (next);
3245 continue;
3248 /* See if we have a RETURN insn with a filled delay slot followed
3249 by a RETURN insn with an unfilled a delay slot. If so, we can delete
3250 the first RETURN (but not its delay insn). This gives the same
3251 effect in fewer instructions.
3253 Only do so if optimizing for size since this results in slower, but
3254 smaller code. */
3255 if (optimize_function_for_size_p (cfun)
3256 && ANY_RETURN_P (PATTERN (delay_insn))
3257 && next
3258 && JUMP_P (next)
3259 && PATTERN (next) == PATTERN (delay_insn))
3261 rtx_insn *after;
3262 int i;
3264 /* Delete the RETURN and just execute the delay list insns.
3266 We do this by deleting the INSN containing the SEQUENCE, then
3267 re-emitting the insns separately, and then deleting the RETURN.
3268 This allows the count of the jump target to be properly
3269 decremented.
3271 Note that we need to change the INSN_UID of the re-emitted insns
3272 since it is used to hash the insns for mark_target_live_regs and
3273 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3275 Clear the from target bit, since these insns are no longer
3276 in delay slots. */
3277 for (i = 0; i < XVECLEN (pat, 0); i++)
3278 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3280 trial = PREV_INSN (insn);
3281 delete_related_insns (insn);
3282 gcc_assert (GET_CODE (pat) == SEQUENCE);
3283 add_insn_after (delay_insn, trial, NULL);
3284 after = delay_insn;
3285 for (i = 1; i < pat->len (); i++)
3286 after = emit_copy_of_insn_after (pat->insn (i), after);
3287 delete_scheduled_jump (delay_insn);
3288 continue;
3291 /* Now look only at the cases where we have a filled JUMP_INSN. */
3292 rtx_jump_insn *delay_jump_insn =
3293 dyn_cast <rtx_jump_insn *> (delay_insn);
3294 if (! delay_jump_insn || !(condjump_p (delay_jump_insn)
3295 || condjump_in_parallel_p (delay_jump_insn)))
3296 continue;
3298 target_label = JUMP_LABEL (delay_jump_insn);
3299 if (target_label && ANY_RETURN_P (target_label))
3300 continue;
3302 /* If this jump goes to another unconditional jump, thread it, but
3303 don't convert a jump into a RETURN here. */
3304 trial = skip_consecutive_labels (follow_jumps (target_label,
3305 delay_jump_insn,
3306 &crossing));
3307 if (ANY_RETURN_P (trial))
3308 trial = find_end_label (trial);
3310 if (trial && trial != target_label
3311 && redirect_with_delay_slots_safe_p (delay_jump_insn, trial, insn))
3313 reorg_redirect_jump (delay_jump_insn, trial);
3314 target_label = trial;
3315 if (crossing)
3316 CROSSING_JUMP_P (insn) = 1;
3319 /* If the first insn at TARGET_LABEL is redundant with a previous
3320 insn, redirect the jump to the following insn and process again.
3321 We use next_real_insn instead of next_active_insn so we
3322 don't skip USE-markers, or we'll end up with incorrect
3323 liveness info. */
3324 trial = next_real_insn (target_label);
3325 if (trial && GET_CODE (PATTERN (trial)) != SEQUENCE
3326 && redundant_insn (trial, insn, vNULL)
3327 && ! can_throw_internal (trial))
3329 /* Figure out where to emit the special USE insn so we don't
3330 later incorrectly compute register live/death info. */
3331 rtx_insn *tmp = next_active_insn (trial);
3332 if (tmp == 0)
3333 tmp = find_end_label (simple_return_rtx);
3335 if (tmp)
3337 /* Insert the special USE insn and update dataflow info.
3338 We know "trial" is an insn here as it is the output of
3339 next_real_insn () above. */
3340 update_block (as_a <rtx_insn *> (trial), tmp);
3342 /* Now emit a label before the special USE insn, and
3343 redirect our jump to the new label. */
3344 target_label = get_label_before (PREV_INSN (tmp), target_label);
3345 reorg_redirect_jump (delay_jump_insn, target_label);
3346 next = insn;
3347 continue;
3351 /* Similarly, if it is an unconditional jump with one insn in its
3352 delay list and that insn is redundant, thread the jump. */
3353 rtx_sequence *trial_seq =
3354 trial ? dyn_cast <rtx_sequence *> (PATTERN (trial)) : NULL;
3355 if (trial_seq
3356 && trial_seq->len () == 2
3357 && JUMP_P (trial_seq->insn (0))
3358 && simplejump_or_return_p (trial_seq->insn (0))
3359 && redundant_insn (trial_seq->insn (1), insn, vNULL))
3361 target_label = JUMP_LABEL (trial_seq->insn (0));
3362 if (ANY_RETURN_P (target_label))
3363 target_label = find_end_label (target_label);
3365 if (target_label
3366 && redirect_with_delay_slots_safe_p (delay_jump_insn,
3367 target_label, insn))
3369 update_block (trial_seq->insn (1), insn);
3370 reorg_redirect_jump (delay_jump_insn, target_label);
3371 next = insn;
3372 continue;
3376 /* See if we have a simple (conditional) jump that is useless. */
3377 if (! INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3378 && ! condjump_in_parallel_p (delay_jump_insn)
3379 && prev_active_insn (target_label) == insn
3380 && ! BARRIER_P (prev_nonnote_insn (target_label))
3381 /* If the last insn in the delay slot sets CC0 for some insn,
3382 various code assumes that it is in a delay slot. We could
3383 put it back where it belonged and delete the register notes,
3384 but it doesn't seem worthwhile in this uncommon case. */
3385 && (!HAVE_cc0
3386 || ! find_reg_note (XVECEXP (pat, 0, XVECLEN (pat, 0) - 1),
3387 REG_CC_USER, NULL_RTX)))
3389 rtx_insn *after;
3390 int i;
3392 /* All this insn does is execute its delay list and jump to the
3393 following insn. So delete the jump and just execute the delay
3394 list insns.
3396 We do this by deleting the INSN containing the SEQUENCE, then
3397 re-emitting the insns separately, and then deleting the jump.
3398 This allows the count of the jump target to be properly
3399 decremented.
3401 Note that we need to change the INSN_UID of the re-emitted insns
3402 since it is used to hash the insns for mark_target_live_regs and
3403 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3405 Clear the from target bit, since these insns are no longer
3406 in delay slots. */
3407 for (i = 0; i < XVECLEN (pat, 0); i++)
3408 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3410 trial = PREV_INSN (insn);
3411 delete_related_insns (insn);
3412 gcc_assert (GET_CODE (pat) == SEQUENCE);
3413 add_insn_after (delay_jump_insn, trial, NULL);
3414 after = delay_jump_insn;
3415 for (i = 1; i < pat->len (); i++)
3416 after = emit_copy_of_insn_after (pat->insn (i), after);
3417 delete_scheduled_jump (delay_jump_insn);
3418 continue;
3421 /* See if this is an unconditional jump around a single insn which is
3422 identical to the one in its delay slot. In this case, we can just
3423 delete the branch and the insn in its delay slot. */
3424 if (next && NONJUMP_INSN_P (next)
3425 && label_before_next_insn (next, insn) == target_label
3426 && simplejump_p (insn)
3427 && XVECLEN (pat, 0) == 2
3428 && rtx_equal_p (PATTERN (next), PATTERN (pat->insn (1))))
3430 delete_related_insns (insn);
3431 continue;
3434 /* See if this jump (with its delay slots) conditionally branches
3435 around an unconditional jump (without delay slots). If so, invert
3436 this jump and point it to the target of the second jump. We cannot
3437 do this for annulled jumps, though. Again, don't convert a jump to
3438 a RETURN here. */
3439 if (! INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3440 && any_condjump_p (delay_jump_insn)
3441 && next && simplejump_or_return_p (next)
3442 && next_active_insn (target_label) == next_active_insn (next)
3443 && no_labels_between_p (insn, next))
3445 rtx label = JUMP_LABEL (next);
3446 rtx old_label = JUMP_LABEL (delay_jump_insn);
3448 if (ANY_RETURN_P (label))
3449 label = find_end_label (label);
3451 /* find_end_label can generate a new label. Check this first. */
3452 if (label
3453 && no_labels_between_p (insn, next)
3454 && redirect_with_delay_slots_safe_p (delay_jump_insn,
3455 label, insn))
3457 /* Be careful how we do this to avoid deleting code or labels
3458 that are momentarily dead. See similar optimization in
3459 jump.c */
3460 if (old_label)
3461 ++LABEL_NUSES (old_label);
3463 if (invert_jump (delay_jump_insn, label, 1))
3465 int i;
3467 /* Must update the INSN_FROM_TARGET_P bits now that
3468 the branch is reversed, so that mark_target_live_regs
3469 will handle the delay slot insn correctly. */
3470 for (i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
3472 rtx slot = XVECEXP (PATTERN (insn), 0, i);
3473 INSN_FROM_TARGET_P (slot) = ! INSN_FROM_TARGET_P (slot);
3476 delete_related_insns (next);
3477 next = insn;
3480 if (old_label && --LABEL_NUSES (old_label) == 0)
3481 delete_related_insns (old_label);
3482 continue;
3486 /* If we own the thread opposite the way this insn branches, see if we
3487 can merge its delay slots with following insns. */
3488 if (INSN_FROM_TARGET_P (pat->insn (1))
3489 && own_thread_p (NEXT_INSN (insn), 0, 1))
3490 try_merge_delay_insns (insn, next);
3491 else if (! INSN_FROM_TARGET_P (pat->insn (1))
3492 && own_thread_p (target_label, target_label, 0))
3493 try_merge_delay_insns (insn, next_active_insn (target_label));
3495 /* If we get here, we haven't deleted INSN. But we may have deleted
3496 NEXT, so recompute it. */
3497 next = next_active_insn (insn);
3502 /* Look for filled jumps to the end of function label. We can try to convert
3503 them into RETURN insns if the insns in the delay slot are valid for the
3504 RETURN as well. */
3506 static void
3507 make_return_insns (rtx_insn *first)
3509 rtx_insn *insn;
3510 rtx_jump_insn *jump_insn;
3511 rtx real_return_label = function_return_label;
3512 rtx real_simple_return_label = function_simple_return_label;
3513 int slots, i;
3515 /* See if there is a RETURN insn in the function other than the one we
3516 made for END_OF_FUNCTION_LABEL. If so, set up anything we can't change
3517 into a RETURN to jump to it. */
3518 for (insn = first; insn; insn = NEXT_INSN (insn))
3519 if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
3521 rtx t = get_label_before (insn, NULL_RTX);
3522 if (PATTERN (insn) == ret_rtx)
3523 real_return_label = t;
3524 else
3525 real_simple_return_label = t;
3526 break;
3529 /* Show an extra usage of REAL_RETURN_LABEL so it won't go away if it
3530 was equal to END_OF_FUNCTION_LABEL. */
3531 if (real_return_label)
3532 LABEL_NUSES (real_return_label)++;
3533 if (real_simple_return_label)
3534 LABEL_NUSES (real_simple_return_label)++;
3536 /* Clear the list of insns to fill so we can use it. */
3537 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3539 for (insn = first; insn; insn = NEXT_INSN (insn))
3541 int flags;
3542 rtx kind, real_label;
3544 /* Only look at filled JUMP_INSNs that go to the end of function
3545 label. */
3546 if (!NONJUMP_INSN_P (insn))
3547 continue;
3549 if (GET_CODE (PATTERN (insn)) != SEQUENCE)
3550 continue;
3552 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (insn));
3554 if (!jump_to_label_p (pat->insn (0)))
3555 continue;
3557 if (JUMP_LABEL (pat->insn (0)) == function_return_label)
3559 kind = ret_rtx;
3560 real_label = real_return_label;
3562 else if (JUMP_LABEL (pat->insn (0)) == function_simple_return_label)
3564 kind = simple_return_rtx;
3565 real_label = real_simple_return_label;
3567 else
3568 continue;
3570 jump_insn = as_a <rtx_jump_insn *> (pat->insn (0));
3572 /* If we can't make the jump into a RETURN, try to redirect it to the best
3573 RETURN and go on to the next insn. */
3574 if (!reorg_redirect_jump (jump_insn, kind))
3576 /* Make sure redirecting the jump will not invalidate the delay
3577 slot insns. */
3578 if (redirect_with_delay_slots_safe_p (jump_insn, real_label, insn))
3579 reorg_redirect_jump (jump_insn, real_label);
3580 continue;
3583 /* See if this RETURN can accept the insns current in its delay slot.
3584 It can if it has more or an equal number of slots and the contents
3585 of each is valid. */
3587 flags = get_jump_flags (jump_insn, JUMP_LABEL (jump_insn));
3588 slots = num_delay_slots (jump_insn);
3589 if (slots >= XVECLEN (pat, 0) - 1)
3591 for (i = 1; i < XVECLEN (pat, 0); i++)
3592 if (! (
3593 #if ANNUL_IFFALSE_SLOTS
3594 (INSN_ANNULLED_BRANCH_P (jump_insn)
3595 && INSN_FROM_TARGET_P (pat->insn (i)))
3596 ? eligible_for_annul_false (jump_insn, i - 1,
3597 pat->insn (i), flags) :
3598 #endif
3599 #if ANNUL_IFTRUE_SLOTS
3600 (INSN_ANNULLED_BRANCH_P (jump_insn)
3601 && ! INSN_FROM_TARGET_P (pat->insn (i)))
3602 ? eligible_for_annul_true (jump_insn, i - 1,
3603 pat->insn (i), flags) :
3604 #endif
3605 eligible_for_delay (jump_insn, i - 1,
3606 pat->insn (i), flags)))
3607 break;
3609 else
3610 i = 0;
3612 if (i == XVECLEN (pat, 0))
3613 continue;
3615 /* We have to do something with this insn. If it is an unconditional
3616 RETURN, delete the SEQUENCE and output the individual insns,
3617 followed by the RETURN. Then set things up so we try to find
3618 insns for its delay slots, if it needs some. */
3619 if (ANY_RETURN_P (PATTERN (jump_insn)))
3621 rtx_insn *prev = PREV_INSN (insn);
3623 delete_related_insns (insn);
3624 for (i = 1; i < XVECLEN (pat, 0); i++)
3625 prev = emit_insn_after (PATTERN (XVECEXP (pat, 0, i)), prev);
3627 insn = emit_jump_insn_after (PATTERN (jump_insn), prev);
3628 emit_barrier_after (insn);
3630 if (slots)
3631 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3633 else
3634 /* It is probably more efficient to keep this with its current
3635 delay slot as a branch to a RETURN. */
3636 reorg_redirect_jump (jump_insn, real_label);
3639 /* Now delete REAL_RETURN_LABEL if we never used it. Then try to fill any
3640 new delay slots we have created. */
3641 if (real_return_label != NULL_RTX && --LABEL_NUSES (real_return_label) == 0)
3642 delete_related_insns (real_return_label);
3643 if (real_simple_return_label != NULL_RTX
3644 && --LABEL_NUSES (real_simple_return_label) == 0)
3645 delete_related_insns (real_simple_return_label);
3647 fill_simple_delay_slots (1);
3648 fill_simple_delay_slots (0);
3651 /* Try to find insns to place in delay slots. */
3653 static void
3654 dbr_schedule (rtx_insn *first)
3656 rtx_insn *insn, *next, *epilogue_insn = 0;
3657 int i;
3658 bool need_return_insns;
3660 /* If the current function has no insns other than the prologue and
3661 epilogue, then do not try to fill any delay slots. */
3662 if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS)
3663 return;
3665 /* Find the highest INSN_UID and allocate and initialize our map from
3666 INSN_UID's to position in code. */
3667 for (max_uid = 0, insn = first; insn; insn = NEXT_INSN (insn))
3669 if (INSN_UID (insn) > max_uid)
3670 max_uid = INSN_UID (insn);
3671 if (NOTE_P (insn)
3672 && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
3673 epilogue_insn = insn;
3676 uid_to_ruid = XNEWVEC (int, max_uid + 1);
3677 for (i = 0, insn = first; insn; i++, insn = NEXT_INSN (insn))
3678 uid_to_ruid[INSN_UID (insn)] = i;
3680 /* Initialize the list of insns that need filling. */
3681 if (unfilled_firstobj == 0)
3683 gcc_obstack_init (&unfilled_slots_obstack);
3684 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3687 for (insn = next_active_insn (first); insn; insn = next_active_insn (insn))
3689 rtx target;
3691 /* Skip vector tables. We can't get attributes for them. */
3692 if (JUMP_TABLE_DATA_P (insn))
3693 continue;
3695 if (JUMP_P (insn))
3696 INSN_ANNULLED_BRANCH_P (insn) = 0;
3697 INSN_FROM_TARGET_P (insn) = 0;
3699 if (num_delay_slots (insn) > 0)
3700 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3702 /* Ensure all jumps go to the last of a set of consecutive labels. */
3703 if (JUMP_P (insn)
3704 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3705 && !ANY_RETURN_P (JUMP_LABEL (insn))
3706 && ((target = skip_consecutive_labels (JUMP_LABEL (insn)))
3707 != JUMP_LABEL (insn)))
3708 redirect_jump (as_a <rtx_jump_insn *> (insn), target, 1);
3711 init_resource_info (epilogue_insn);
3713 /* Show we haven't computed an end-of-function label yet. */
3714 function_return_label = function_simple_return_label = NULL;
3716 /* Initialize the statistics for this function. */
3717 memset (num_insns_needing_delays, 0, sizeof num_insns_needing_delays);
3718 memset (num_filled_delays, 0, sizeof num_filled_delays);
3720 /* Now do the delay slot filling. Try everything twice in case earlier
3721 changes make more slots fillable. */
3723 for (reorg_pass_number = 0;
3724 reorg_pass_number < MAX_REORG_PASSES;
3725 reorg_pass_number++)
3727 fill_simple_delay_slots (1);
3728 fill_simple_delay_slots (0);
3729 if (!targetm.no_speculation_in_delay_slots_p ())
3730 fill_eager_delay_slots ();
3731 relax_delay_slots (first);
3734 /* If we made an end of function label, indicate that it is now
3735 safe to delete it by undoing our prior adjustment to LABEL_NUSES.
3736 If it is now unused, delete it. */
3737 if (function_return_label && --LABEL_NUSES (function_return_label) == 0)
3738 delete_related_insns (function_return_label);
3739 if (function_simple_return_label
3740 && --LABEL_NUSES (function_simple_return_label) == 0)
3741 delete_related_insns (function_simple_return_label);
3743 need_return_insns = false;
3744 need_return_insns |= targetm.have_return () && function_return_label != 0;
3745 need_return_insns |= (targetm.have_simple_return ()
3746 && function_simple_return_label != 0);
3747 if (need_return_insns)
3748 make_return_insns (first);
3750 /* Delete any USE insns made by update_block; subsequent passes don't need
3751 them or know how to deal with them. */
3752 for (insn = first; insn; insn = next)
3754 next = NEXT_INSN (insn);
3756 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
3757 && INSN_P (XEXP (PATTERN (insn), 0)))
3758 next = delete_related_insns (insn);
3761 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3763 /* It is not clear why the line below is needed, but it does seem to be. */
3764 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3766 if (dump_file)
3768 int i, j, need_comma;
3769 int total_delay_slots[MAX_DELAY_HISTOGRAM + 1];
3770 int total_annul_slots[MAX_DELAY_HISTOGRAM + 1];
3772 for (reorg_pass_number = 0;
3773 reorg_pass_number < MAX_REORG_PASSES;
3774 reorg_pass_number++)
3776 fprintf (dump_file, ";; Reorg pass #%d:\n", reorg_pass_number + 1);
3777 for (i = 0; i < NUM_REORG_FUNCTIONS; i++)
3779 need_comma = 0;
3780 fprintf (dump_file, ";; Reorg function #%d\n", i);
3782 fprintf (dump_file, ";; %d insns needing delay slots\n;; ",
3783 num_insns_needing_delays[i][reorg_pass_number]);
3785 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3786 if (num_filled_delays[i][j][reorg_pass_number])
3788 if (need_comma)
3789 fprintf (dump_file, ", ");
3790 need_comma = 1;
3791 fprintf (dump_file, "%d got %d delays",
3792 num_filled_delays[i][j][reorg_pass_number], j);
3794 fprintf (dump_file, "\n");
3797 memset (total_delay_slots, 0, sizeof total_delay_slots);
3798 memset (total_annul_slots, 0, sizeof total_annul_slots);
3799 for (insn = first; insn; insn = NEXT_INSN (insn))
3801 if (! insn->deleted ()
3802 && NONJUMP_INSN_P (insn)
3803 && GET_CODE (PATTERN (insn)) != USE
3804 && GET_CODE (PATTERN (insn)) != CLOBBER)
3806 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
3808 rtx control;
3809 j = XVECLEN (PATTERN (insn), 0) - 1;
3810 if (j > MAX_DELAY_HISTOGRAM)
3811 j = MAX_DELAY_HISTOGRAM;
3812 control = XVECEXP (PATTERN (insn), 0, 0);
3813 if (JUMP_P (control) && INSN_ANNULLED_BRANCH_P (control))
3814 total_annul_slots[j]++;
3815 else
3816 total_delay_slots[j]++;
3818 else if (num_delay_slots (insn) > 0)
3819 total_delay_slots[0]++;
3822 fprintf (dump_file, ";; Reorg totals: ");
3823 need_comma = 0;
3824 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3826 if (total_delay_slots[j])
3828 if (need_comma)
3829 fprintf (dump_file, ", ");
3830 need_comma = 1;
3831 fprintf (dump_file, "%d got %d delays", total_delay_slots[j], j);
3834 fprintf (dump_file, "\n");
3836 if (ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
3838 fprintf (dump_file, ";; Reorg annuls: ");
3839 need_comma = 0;
3840 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3842 if (total_annul_slots[j])
3844 if (need_comma)
3845 fprintf (dump_file, ", ");
3846 need_comma = 1;
3847 fprintf (dump_file, "%d got %d delays", total_annul_slots[j], j);
3850 fprintf (dump_file, "\n");
3853 fprintf (dump_file, "\n");
3856 if (!sibling_labels.is_empty ())
3858 update_alignments (sibling_labels);
3859 sibling_labels.release ();
3862 free_resource_info ();
3863 free (uid_to_ruid);
3864 crtl->dbr_scheduled_p = true;
3867 /* Run delay slot optimization. */
3868 static unsigned int
3869 rest_of_handle_delay_slots (void)
3871 if (DELAY_SLOTS)
3872 dbr_schedule (get_insns ());
3874 return 0;
3877 namespace {
3879 const pass_data pass_data_delay_slots =
3881 RTL_PASS, /* type */
3882 "dbr", /* name */
3883 OPTGROUP_NONE, /* optinfo_flags */
3884 TV_DBR_SCHED, /* tv_id */
3885 0, /* properties_required */
3886 0, /* properties_provided */
3887 0, /* properties_destroyed */
3888 0, /* todo_flags_start */
3889 0, /* todo_flags_finish */
3892 class pass_delay_slots : public rtl_opt_pass
3894 public:
3895 pass_delay_slots (gcc::context *ctxt)
3896 : rtl_opt_pass (pass_data_delay_slots, ctxt)
3899 /* opt_pass methods: */
3900 virtual bool gate (function *);
3901 virtual unsigned int execute (function *)
3903 return rest_of_handle_delay_slots ();
3906 }; // class pass_delay_slots
3908 bool
3909 pass_delay_slots::gate (function *)
3911 /* At -O0 dataflow info isn't updated after RA. */
3912 if (DELAY_SLOTS)
3913 return optimize > 0 && flag_delayed_branch && !crtl->dbr_scheduled_p;
3915 return false;
3918 } // anon namespace
3920 rtl_opt_pass *
3921 make_pass_delay_slots (gcc::context *ctxt)
3923 return new pass_delay_slots (ctxt);
3926 /* Machine dependent reorg pass. */
3928 namespace {
3930 const pass_data pass_data_machine_reorg =
3932 RTL_PASS, /* type */
3933 "mach", /* name */
3934 OPTGROUP_NONE, /* optinfo_flags */
3935 TV_MACH_DEP, /* tv_id */
3936 0, /* properties_required */
3937 0, /* properties_provided */
3938 0, /* properties_destroyed */
3939 0, /* todo_flags_start */
3940 0, /* todo_flags_finish */
3943 class pass_machine_reorg : public rtl_opt_pass
3945 public:
3946 pass_machine_reorg (gcc::context *ctxt)
3947 : rtl_opt_pass (pass_data_machine_reorg, ctxt)
3950 /* opt_pass methods: */
3951 virtual bool gate (function *)
3953 return targetm.machine_dependent_reorg != 0;
3956 virtual unsigned int execute (function *)
3958 targetm.machine_dependent_reorg ();
3959 return 0;
3962 }; // class pass_machine_reorg
3964 } // anon namespace
3966 rtl_opt_pass *
3967 make_pass_machine_reorg (gcc::context *ctxt)
3969 return new pass_machine_reorg (ctxt);