[RS6000] lqarx and stqcx. registers
[official-gcc.git] / gcc / reorg.c
bloba02141f47e75dd2b3178d8f3bdcb9f324bfca502
1 /* Perform instruction reorganizations for delay slot filling.
2 Copyright (C) 1992-2016 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu).
4 Hacked by Michael Tiemann (tiemann@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Instruction reorganization pass.
24 This pass runs after register allocation and final jump
25 optimization. It should be the last pass to run before peephole.
26 It serves primarily to fill delay slots of insns, typically branch
27 and call insns. Other insns typically involve more complicated
28 interactions of data dependencies and resource constraints, and
29 are better handled by scheduling before register allocation (by the
30 function `schedule_insns').
32 The Branch Penalty is the number of extra cycles that are needed to
33 execute a branch insn. On an ideal machine, branches take a single
34 cycle, and the Branch Penalty is 0. Several RISC machines approach
35 branch delays differently:
37 The MIPS has a single branch delay slot. Most insns
38 (except other branches) can be used to fill this slot. When the
39 slot is filled, two insns execute in two cycles, reducing the
40 branch penalty to zero.
42 The SPARC always has a branch delay slot, but its effects can be
43 annulled when the branch is not taken. This means that failing to
44 find other sources of insns, we can hoist an insn from the branch
45 target that would only be safe to execute knowing that the branch
46 is taken.
48 The HP-PA always has a branch delay slot. For unconditional branches
49 its effects can be annulled when the branch is taken. The effects
50 of the delay slot in a conditional branch can be nullified for forward
51 taken branches, or for untaken backward branches. This means
52 we can hoist insns from the fall-through path for forward branches or
53 steal insns from the target of backward branches.
55 The TMS320C3x and C4x have three branch delay slots. When the three
56 slots are filled, the branch penalty is zero. Most insns can fill the
57 delay slots except jump insns.
59 Three techniques for filling delay slots have been implemented so far:
61 (1) `fill_simple_delay_slots' is the simplest, most efficient way
62 to fill delay slots. This pass first looks for insns which come
63 from before the branch and which are safe to execute after the
64 branch. Then it searches after the insn requiring delay slots or,
65 in the case of a branch, for insns that are after the point at
66 which the branch merges into the fallthrough code, if such a point
67 exists. When such insns are found, the branch penalty decreases
68 and no code expansion takes place.
70 (2) `fill_eager_delay_slots' is more complicated: it is used for
71 scheduling conditional jumps, or for scheduling jumps which cannot
72 be filled using (1). A machine need not have annulled jumps to use
73 this strategy, but it helps (by keeping more options open).
74 `fill_eager_delay_slots' tries to guess the direction the branch
75 will go; if it guesses right 100% of the time, it can reduce the
76 branch penalty as much as `fill_simple_delay_slots' does. If it
77 guesses wrong 100% of the time, it might as well schedule nops. When
78 `fill_eager_delay_slots' takes insns from the fall-through path of
79 the jump, usually there is no code expansion; when it takes insns
80 from the branch target, there is code expansion if it is not the
81 only way to reach that target.
83 (3) `relax_delay_slots' uses a set of rules to simplify code that
84 has been reorganized by (1) and (2). It finds cases where
85 conditional test can be eliminated, jumps can be threaded, extra
86 insns can be eliminated, etc. It is the job of (1) and (2) to do a
87 good job of scheduling locally; `relax_delay_slots' takes care of
88 making the various individual schedules work well together. It is
89 especially tuned to handle the control flow interactions of branch
90 insns. It does nothing for insns with delay slots that do not
91 branch.
93 On machines that use CC0, we are very conservative. We will not make
94 a copy of an insn involving CC0 since we want to maintain a 1-1
95 correspondence between the insn that sets and uses CC0. The insns are
96 allowed to be separated by placing an insn that sets CC0 (but not an insn
97 that uses CC0; we could do this, but it doesn't seem worthwhile) in a
98 delay slot. In that case, we point each insn at the other with REG_CC_USER
99 and REG_CC_SETTER notes. Note that these restrictions affect very few
100 machines because most RISC machines with delay slots will not use CC0
101 (the RT is the only known exception at this point). */
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "backend.h"
107 #include "target.h"
108 #include "rtl.h"
109 #include "tree.h"
110 #include "predict.h"
111 #include "tm_p.h"
112 #include "expmed.h"
113 #include "insn-config.h"
114 #include "emit-rtl.h"
115 #include "recog.h"
116 #include "insn-attr.h"
117 #include "resource.h"
118 #include "params.h"
119 #include "tree-pass.h"
122 /* First, some functions that were used before GCC got a control flow graph.
123 These functions are now only used here in reorg.c, and have therefore
124 been moved here to avoid inadvertent misuse elsewhere in the compiler. */
126 /* Return the last label to mark the same position as LABEL. Return LABEL
127 itself if it is null or any return rtx. */
129 static rtx
130 skip_consecutive_labels (rtx label_or_return)
132 rtx_insn *insn;
134 if (label_or_return && ANY_RETURN_P (label_or_return))
135 return label_or_return;
137 rtx_insn *label = as_a <rtx_insn *> (label_or_return);
139 for (insn = label; insn != 0 && !INSN_P (insn); insn = NEXT_INSN (insn))
140 if (LABEL_P (insn))
141 label = insn;
143 return label;
146 /* INSN uses CC0 and is being moved into a delay slot. Set up REG_CC_SETTER
147 and REG_CC_USER notes so we can find it. */
149 static void
150 link_cc0_insns (rtx_insn *insn)
152 rtx user = next_nonnote_insn (insn);
154 if (NONJUMP_INSN_P (user) && GET_CODE (PATTERN (user)) == SEQUENCE)
155 user = XVECEXP (PATTERN (user), 0, 0);
157 add_reg_note (user, REG_CC_SETTER, insn);
158 add_reg_note (insn, REG_CC_USER, user);
161 /* Insns which have delay slots that have not yet been filled. */
163 static struct obstack unfilled_slots_obstack;
164 static rtx *unfilled_firstobj;
166 /* Define macros to refer to the first and last slot containing unfilled
167 insns. These are used because the list may move and its address
168 should be recomputed at each use. */
170 #define unfilled_slots_base \
171 ((rtx_insn **) obstack_base (&unfilled_slots_obstack))
173 #define unfilled_slots_next \
174 ((rtx_insn **) obstack_next_free (&unfilled_slots_obstack))
176 /* Points to the label before the end of the function, or before a
177 return insn. */
178 static rtx_code_label *function_return_label;
179 /* Likewise for a simple_return. */
180 static rtx_code_label *function_simple_return_label;
182 /* Mapping between INSN_UID's and position in the code since INSN_UID's do
183 not always monotonically increase. */
184 static int *uid_to_ruid;
186 /* Highest valid index in `uid_to_ruid'. */
187 static int max_uid;
189 static int stop_search_p (rtx_insn *, int);
190 static int resource_conflicts_p (struct resources *, struct resources *);
191 static int insn_references_resource_p (rtx, struct resources *, bool);
192 static int insn_sets_resource_p (rtx, struct resources *, bool);
193 static rtx_code_label *find_end_label (rtx);
194 static rtx_insn *emit_delay_sequence (rtx_insn *, const vec<rtx_insn *> &,
195 int);
196 static void add_to_delay_list (rtx_insn *, vec<rtx_insn *> *);
197 static rtx_insn *delete_from_delay_slot (rtx_insn *);
198 static void delete_scheduled_jump (rtx_insn *);
199 static void note_delay_statistics (int, int);
200 static int get_jump_flags (const rtx_insn *, rtx);
201 static int mostly_true_jump (rtx);
202 static rtx get_branch_condition (const rtx_insn *, rtx);
203 static int condition_dominates_p (rtx, const rtx_insn *);
204 static int redirect_with_delay_slots_safe_p (rtx_insn *, rtx, rtx);
205 static int redirect_with_delay_list_safe_p (rtx_insn *, rtx,
206 const vec<rtx_insn *> &);
207 static int check_annul_list_true_false (int, const vec<rtx_insn *> &);
208 static void steal_delay_list_from_target (rtx_insn *, rtx, rtx_sequence *,
209 vec<rtx_insn *> *,
210 struct resources *,
211 struct resources *,
212 struct resources *,
213 int, int *, int *,
214 rtx *);
215 static void steal_delay_list_from_fallthrough (rtx_insn *, rtx, rtx_sequence *,
216 vec<rtx_insn *> *,
217 struct resources *,
218 struct resources *,
219 struct resources *,
220 int, int *, int *);
221 static void try_merge_delay_insns (rtx_insn *, rtx_insn *);
222 static rtx redundant_insn (rtx, rtx_insn *, const vec<rtx_insn *> &);
223 static int own_thread_p (rtx, rtx, int);
224 static void update_block (rtx_insn *, rtx);
225 static int reorg_redirect_jump (rtx_jump_insn *, rtx);
226 static void update_reg_dead_notes (rtx_insn *, rtx_insn *);
227 static void fix_reg_dead_note (rtx, rtx);
228 static void update_reg_unused_notes (rtx, rtx);
229 static void fill_simple_delay_slots (int);
230 static void fill_slots_from_thread (rtx_jump_insn *, rtx, rtx, rtx,
231 int, int, int, int,
232 int *, vec<rtx_insn *> *);
233 static void fill_eager_delay_slots (void);
234 static void relax_delay_slots (rtx_insn *);
235 static void make_return_insns (rtx_insn *);
237 /* A wrapper around next_active_insn which takes care to return ret_rtx
238 unchanged. */
240 static rtx
241 first_active_target_insn (rtx insn)
243 if (ANY_RETURN_P (insn))
244 return insn;
245 return next_active_insn (as_a <rtx_insn *> (insn));
248 /* Return true iff INSN is a simplejump, or any kind of return insn. */
250 static bool
251 simplejump_or_return_p (rtx insn)
253 return (JUMP_P (insn)
254 && (simplejump_p (as_a <rtx_insn *> (insn))
255 || ANY_RETURN_P (PATTERN (insn))));
258 /* Return TRUE if this insn should stop the search for insn to fill delay
259 slots. LABELS_P indicates that labels should terminate the search.
260 In all cases, jumps terminate the search. */
262 static int
263 stop_search_p (rtx_insn *insn, int labels_p)
265 if (insn == 0)
266 return 1;
268 /* If the insn can throw an exception that is caught within the function,
269 it may effectively perform a jump from the viewpoint of the function.
270 Therefore act like for a jump. */
271 if (can_throw_internal (insn))
272 return 1;
274 switch (GET_CODE (insn))
276 case NOTE:
277 case CALL_INSN:
278 return 0;
280 case CODE_LABEL:
281 return labels_p;
283 case JUMP_INSN:
284 case BARRIER:
285 return 1;
287 case INSN:
288 /* OK unless it contains a delay slot or is an `asm' insn of some type.
289 We don't know anything about these. */
290 return (GET_CODE (PATTERN (insn)) == SEQUENCE
291 || GET_CODE (PATTERN (insn)) == ASM_INPUT
292 || asm_noperands (PATTERN (insn)) >= 0);
294 default:
295 gcc_unreachable ();
299 /* Return TRUE if any resources are marked in both RES1 and RES2 or if either
300 resource set contains a volatile memory reference. Otherwise, return FALSE. */
302 static int
303 resource_conflicts_p (struct resources *res1, struct resources *res2)
305 if ((res1->cc && res2->cc) || (res1->memory && res2->memory)
306 || res1->volatil || res2->volatil)
307 return 1;
309 return hard_reg_set_intersect_p (res1->regs, res2->regs);
312 /* Return TRUE if any resource marked in RES, a `struct resources', is
313 referenced by INSN. If INCLUDE_DELAYED_EFFECTS is set, return if the called
314 routine is using those resources.
316 We compute this by computing all the resources referenced by INSN and
317 seeing if this conflicts with RES. It might be faster to directly check
318 ourselves, and this is the way it used to work, but it means duplicating
319 a large block of complex code. */
321 static int
322 insn_references_resource_p (rtx insn, struct resources *res,
323 bool include_delayed_effects)
325 struct resources insn_res;
327 CLEAR_RESOURCE (&insn_res);
328 mark_referenced_resources (insn, &insn_res, include_delayed_effects);
329 return resource_conflicts_p (&insn_res, res);
332 /* Return TRUE if INSN modifies resources that are marked in RES.
333 INCLUDE_DELAYED_EFFECTS is set if the actions of that routine should be
334 included. CC0 is only modified if it is explicitly set; see comments
335 in front of mark_set_resources for details. */
337 static int
338 insn_sets_resource_p (rtx insn, struct resources *res,
339 bool include_delayed_effects)
341 struct resources insn_sets;
343 CLEAR_RESOURCE (&insn_sets);
344 mark_set_resources (insn, &insn_sets, 0,
345 (include_delayed_effects
346 ? MARK_SRC_DEST_CALL
347 : MARK_SRC_DEST));
348 return resource_conflicts_p (&insn_sets, res);
351 /* Find a label at the end of the function or before a RETURN. If there
352 is none, try to make one. If that fails, returns 0.
354 The property of such a label is that it is placed just before the
355 epilogue or a bare RETURN insn, so that another bare RETURN can be
356 turned into a jump to the label unconditionally. In particular, the
357 label cannot be placed before a RETURN insn with a filled delay slot.
359 ??? There may be a problem with the current implementation. Suppose
360 we start with a bare RETURN insn and call find_end_label. It may set
361 function_return_label just before the RETURN. Suppose the machinery
362 is able to fill the delay slot of the RETURN insn afterwards. Then
363 function_return_label is no longer valid according to the property
364 described above and find_end_label will still return it unmodified.
365 Note that this is probably mitigated by the following observation:
366 once function_return_label is made, it is very likely the target of
367 a jump, so filling the delay slot of the RETURN will be much more
368 difficult.
369 KIND is either simple_return_rtx or ret_rtx, indicating which type of
370 return we're looking for. */
372 static rtx_code_label *
373 find_end_label (rtx kind)
375 rtx_insn *insn;
376 rtx_code_label **plabel;
378 if (kind == ret_rtx)
379 plabel = &function_return_label;
380 else
382 gcc_assert (kind == simple_return_rtx);
383 plabel = &function_simple_return_label;
386 /* If we found one previously, return it. */
387 if (*plabel)
388 return *plabel;
390 /* Otherwise, see if there is a label at the end of the function. If there
391 is, it must be that RETURN insns aren't needed, so that is our return
392 label and we don't have to do anything else. */
394 insn = get_last_insn ();
395 while (NOTE_P (insn)
396 || (NONJUMP_INSN_P (insn)
397 && (GET_CODE (PATTERN (insn)) == USE
398 || GET_CODE (PATTERN (insn)) == CLOBBER)))
399 insn = PREV_INSN (insn);
401 /* When a target threads its epilogue we might already have a
402 suitable return insn. If so put a label before it for the
403 function_return_label. */
404 if (BARRIER_P (insn)
405 && JUMP_P (PREV_INSN (insn))
406 && PATTERN (PREV_INSN (insn)) == kind)
408 rtx_insn *temp = PREV_INSN (PREV_INSN (insn));
409 rtx_code_label *label = gen_label_rtx ();
410 LABEL_NUSES (label) = 0;
412 /* Put the label before any USE insns that may precede the RETURN
413 insn. */
414 while (GET_CODE (temp) == USE)
415 temp = PREV_INSN (temp);
417 emit_label_after (label, temp);
418 *plabel = label;
421 else if (LABEL_P (insn))
422 *plabel = as_a <rtx_code_label *> (insn);
423 else
425 rtx_code_label *label = gen_label_rtx ();
426 LABEL_NUSES (label) = 0;
427 /* If the basic block reorder pass moves the return insn to
428 some other place try to locate it again and put our
429 function_return_label there. */
430 while (insn && ! (JUMP_P (insn) && (PATTERN (insn) == kind)))
431 insn = PREV_INSN (insn);
432 if (insn)
434 insn = PREV_INSN (insn);
436 /* Put the label before any USE insns that may precede the
437 RETURN insn. */
438 while (GET_CODE (insn) == USE)
439 insn = PREV_INSN (insn);
441 emit_label_after (label, insn);
443 else
445 if (targetm.have_epilogue () && ! targetm.have_return ())
446 /* The RETURN insn has its delay slot filled so we cannot
447 emit the label just before it. Since we already have
448 an epilogue and cannot emit a new RETURN, we cannot
449 emit the label at all. */
450 return NULL;
452 /* Otherwise, make a new label and emit a RETURN and BARRIER,
453 if needed. */
454 emit_label (label);
455 if (targetm.have_return ())
457 /* The return we make may have delay slots too. */
458 rtx_insn *pat = targetm.gen_return ();
459 rtx_insn *insn = emit_jump_insn (pat);
460 set_return_jump_label (insn);
461 emit_barrier ();
462 if (num_delay_slots (insn) > 0)
463 obstack_ptr_grow (&unfilled_slots_obstack, insn);
466 *plabel = label;
469 /* Show one additional use for this label so it won't go away until
470 we are done. */
471 ++LABEL_NUSES (*plabel);
473 return *plabel;
476 /* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace
477 the pattern of INSN with the SEQUENCE.
479 Returns the insn containing the SEQUENCE that replaces INSN. */
481 static rtx_insn *
482 emit_delay_sequence (rtx_insn *insn, const vec<rtx_insn *> &list, int length)
484 /* Allocate the rtvec to hold the insns and the SEQUENCE. */
485 rtvec seqv = rtvec_alloc (length + 1);
486 rtx seq = gen_rtx_SEQUENCE (VOIDmode, seqv);
487 rtx_insn *seq_insn = make_insn_raw (seq);
489 /* If DELAY_INSN has a location, use it for SEQ_INSN. If DELAY_INSN does
490 not have a location, but one of the delayed insns does, we pick up a
491 location from there later. */
492 INSN_LOCATION (seq_insn) = INSN_LOCATION (insn);
494 /* Unlink INSN from the insn chain, so that we can put it into
495 the SEQUENCE. Remember where we want to emit SEQUENCE in AFTER. */
496 rtx_insn *after = PREV_INSN (insn);
497 remove_insn (insn);
498 SET_NEXT_INSN (insn) = SET_PREV_INSN (insn) = NULL;
500 /* Build our SEQUENCE and rebuild the insn chain. */
501 start_sequence ();
502 XVECEXP (seq, 0, 0) = emit_insn (insn);
504 unsigned int delay_insns = list.length ();
505 gcc_assert (delay_insns == (unsigned int) length);
506 for (unsigned int i = 0; i < delay_insns; i++)
508 rtx_insn *tem = list[i];
509 rtx note, next;
511 /* Show that this copy of the insn isn't deleted. */
512 tem->set_undeleted ();
514 /* Unlink insn from its original place, and re-emit it into
515 the sequence. */
516 SET_NEXT_INSN (tem) = SET_PREV_INSN (tem) = NULL;
517 XVECEXP (seq, 0, i + 1) = emit_insn (tem);
519 /* SPARC assembler, for instance, emit warning when debug info is output
520 into the delay slot. */
521 if (INSN_LOCATION (tem) && !INSN_LOCATION (seq_insn))
522 INSN_LOCATION (seq_insn) = INSN_LOCATION (tem);
523 INSN_LOCATION (tem) = 0;
525 for (note = REG_NOTES (tem); note; note = next)
527 next = XEXP (note, 1);
528 switch (REG_NOTE_KIND (note))
530 case REG_DEAD:
531 /* Remove any REG_DEAD notes because we can't rely on them now
532 that the insn has been moved. */
533 remove_note (tem, note);
534 break;
536 case REG_LABEL_OPERAND:
537 case REG_LABEL_TARGET:
538 /* Keep the label reference count up to date. */
539 if (LABEL_P (XEXP (note, 0)))
540 LABEL_NUSES (XEXP (note, 0)) ++;
541 break;
543 default:
544 break;
548 end_sequence ();
550 /* Splice our SEQUENCE into the insn stream where INSN used to be. */
551 add_insn_after (seq_insn, after, NULL);
553 return seq_insn;
556 /* Add INSN to DELAY_LIST and return the head of the new list. The list must
557 be in the order in which the insns are to be executed. */
559 static void
560 add_to_delay_list (rtx_insn *insn, vec<rtx_insn *> *delay_list)
562 /* If INSN has its block number recorded, clear it since we may
563 be moving the insn to a new block. */
564 clear_hashed_info_for_insn (insn);
565 delay_list->safe_push (insn);
568 /* Delete INSN from the delay slot of the insn that it is in, which may
569 produce an insn with no delay slots. Return the new insn. */
571 static rtx_insn *
572 delete_from_delay_slot (rtx_insn *insn)
574 rtx_insn *trial, *seq_insn, *prev;
575 rtx_sequence *seq;
576 int i;
577 int had_barrier = 0;
579 /* We first must find the insn containing the SEQUENCE with INSN in its
580 delay slot. Do this by finding an insn, TRIAL, where
581 PREV_INSN (NEXT_INSN (TRIAL)) != TRIAL. */
583 for (trial = insn;
584 PREV_INSN (NEXT_INSN (trial)) == trial;
585 trial = NEXT_INSN (trial))
588 seq_insn = PREV_INSN (NEXT_INSN (trial));
589 seq = as_a <rtx_sequence *> (PATTERN (seq_insn));
591 if (NEXT_INSN (seq_insn) && BARRIER_P (NEXT_INSN (seq_insn)))
592 had_barrier = 1;
594 /* Create a delay list consisting of all the insns other than the one
595 we are deleting (unless we were the only one). */
596 auto_vec<rtx_insn *, 5> delay_list;
597 if (seq->len () > 2)
598 for (i = 1; i < seq->len (); i++)
599 if (seq->insn (i) != insn)
600 add_to_delay_list (seq->insn (i), &delay_list);
602 /* Delete the old SEQUENCE, re-emit the insn that used to have the delay
603 list, and rebuild the delay list if non-empty. */
604 prev = PREV_INSN (seq_insn);
605 trial = seq->insn (0);
606 delete_related_insns (seq_insn);
607 add_insn_after (trial, prev, NULL);
609 /* If there was a barrier after the old SEQUENCE, remit it. */
610 if (had_barrier)
611 emit_barrier_after (trial);
613 /* If there are any delay insns, remit them. Otherwise clear the
614 annul flag. */
615 if (!delay_list.is_empty ())
616 trial = emit_delay_sequence (trial, delay_list, XVECLEN (seq, 0) - 2);
617 else if (JUMP_P (trial))
618 INSN_ANNULLED_BRANCH_P (trial) = 0;
620 INSN_FROM_TARGET_P (insn) = 0;
622 /* Show we need to fill this insn again. */
623 obstack_ptr_grow (&unfilled_slots_obstack, trial);
625 return trial;
628 /* Delete INSN, a JUMP_INSN. If it is a conditional jump, we must track down
629 the insn that sets CC0 for it and delete it too. */
631 static void
632 delete_scheduled_jump (rtx_insn *insn)
634 /* Delete the insn that sets cc0 for us. On machines without cc0, we could
635 delete the insn that sets the condition code, but it is hard to find it.
636 Since this case is rare anyway, don't bother trying; there would likely
637 be other insns that became dead anyway, which we wouldn't know to
638 delete. */
640 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, insn))
642 rtx note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
644 /* If a reg-note was found, it points to an insn to set CC0. This
645 insn is in the delay list of some other insn. So delete it from
646 the delay list it was in. */
647 if (note)
649 if (! FIND_REG_INC_NOTE (XEXP (note, 0), NULL_RTX)
650 && sets_cc0_p (PATTERN (XEXP (note, 0))) == 1)
651 delete_from_delay_slot (as_a <rtx_insn *> (XEXP (note, 0)));
653 else
655 /* The insn setting CC0 is our previous insn, but it may be in
656 a delay slot. It will be the last insn in the delay slot, if
657 it is. */
658 rtx_insn *trial = previous_insn (insn);
659 if (NOTE_P (trial))
660 trial = prev_nonnote_insn (trial);
661 if (sets_cc0_p (PATTERN (trial)) != 1
662 || FIND_REG_INC_NOTE (trial, NULL_RTX))
663 return;
664 if (PREV_INSN (NEXT_INSN (trial)) == trial)
665 delete_related_insns (trial);
666 else
667 delete_from_delay_slot (trial);
671 delete_related_insns (insn);
674 /* Counters for delay-slot filling. */
676 #define NUM_REORG_FUNCTIONS 2
677 #define MAX_DELAY_HISTOGRAM 3
678 #define MAX_REORG_PASSES 2
680 static int num_insns_needing_delays[NUM_REORG_FUNCTIONS][MAX_REORG_PASSES];
682 static int num_filled_delays[NUM_REORG_FUNCTIONS][MAX_DELAY_HISTOGRAM+1][MAX_REORG_PASSES];
684 static int reorg_pass_number;
686 static void
687 note_delay_statistics (int slots_filled, int index)
689 num_insns_needing_delays[index][reorg_pass_number]++;
690 if (slots_filled > MAX_DELAY_HISTOGRAM)
691 slots_filled = MAX_DELAY_HISTOGRAM;
692 num_filled_delays[index][slots_filled][reorg_pass_number]++;
695 /* Optimize the following cases:
697 1. When a conditional branch skips over only one instruction,
698 use an annulling branch and put that insn in the delay slot.
699 Use either a branch that annuls when the condition if true or
700 invert the test with a branch that annuls when the condition is
701 false. This saves insns, since otherwise we must copy an insn
702 from the L1 target.
704 (orig) (skip) (otherwise)
705 Bcc.n L1 Bcc',a L1 Bcc,a L1'
706 insn insn insn2
707 L1: L1: L1:
708 insn2 insn2 insn2
709 insn3 insn3 L1':
710 insn3
712 2. When a conditional branch skips over only one instruction,
713 and after that, it unconditionally branches somewhere else,
714 perform the similar optimization. This saves executing the
715 second branch in the case where the inverted condition is true.
717 Bcc.n L1 Bcc',a L2
718 insn insn
719 L1: L1:
720 Bra L2 Bra L2
722 INSN is a JUMP_INSN.
724 This should be expanded to skip over N insns, where N is the number
725 of delay slots required. */
727 static void
728 optimize_skip (rtx_jump_insn *insn, vec<rtx_insn *> *delay_list)
730 rtx_insn *trial = next_nonnote_insn (insn);
731 rtx_insn *next_trial = next_active_insn (trial);
732 int flags;
734 flags = get_jump_flags (insn, JUMP_LABEL (insn));
736 if (trial == 0
737 || !NONJUMP_INSN_P (trial)
738 || GET_CODE (PATTERN (trial)) == SEQUENCE
739 || recog_memoized (trial) < 0
740 || (! eligible_for_annul_false (insn, 0, trial, flags)
741 && ! eligible_for_annul_true (insn, 0, trial, flags))
742 || RTX_FRAME_RELATED_P (trial)
743 || can_throw_internal (trial))
744 return;
746 /* There are two cases where we are just executing one insn (we assume
747 here that a branch requires only one insn; this should be generalized
748 at some point): Where the branch goes around a single insn or where
749 we have one insn followed by a branch to the same label we branch to.
750 In both of these cases, inverting the jump and annulling the delay
751 slot give the same effect in fewer insns. */
752 if (next_trial == next_active_insn (JUMP_LABEL (insn))
753 || (next_trial != 0
754 && simplejump_or_return_p (next_trial)
755 && JUMP_LABEL (insn) == JUMP_LABEL (next_trial)))
757 if (eligible_for_annul_false (insn, 0, trial, flags))
759 if (invert_jump (insn, JUMP_LABEL (insn), 1))
760 INSN_FROM_TARGET_P (trial) = 1;
761 else if (! eligible_for_annul_true (insn, 0, trial, flags))
762 return;
765 add_to_delay_list (trial, delay_list);
766 next_trial = next_active_insn (trial);
767 update_block (trial, trial);
768 delete_related_insns (trial);
770 /* Also, if we are targeting an unconditional
771 branch, thread our jump to the target of that branch. Don't
772 change this into a RETURN here, because it may not accept what
773 we have in the delay slot. We'll fix this up later. */
774 if (next_trial && simplejump_or_return_p (next_trial))
776 rtx target_label = JUMP_LABEL (next_trial);
777 if (ANY_RETURN_P (target_label))
778 target_label = find_end_label (target_label);
780 if (target_label)
782 /* Recompute the flags based on TARGET_LABEL since threading
783 the jump to TARGET_LABEL may change the direction of the
784 jump (which may change the circumstances in which the
785 delay slot is nullified). */
786 flags = get_jump_flags (insn, target_label);
787 if (eligible_for_annul_true (insn, 0, trial, flags))
788 reorg_redirect_jump (insn, target_label);
792 INSN_ANNULLED_BRANCH_P (insn) = 1;
796 /* Encode and return branch direction and prediction information for
797 INSN assuming it will jump to LABEL.
799 Non conditional branches return no direction information and
800 are predicted as very likely taken. */
802 static int
803 get_jump_flags (const rtx_insn *insn, rtx label)
805 int flags;
807 /* get_jump_flags can be passed any insn with delay slots, these may
808 be INSNs, CALL_INSNs, or JUMP_INSNs. Only JUMP_INSNs have branch
809 direction information, and only if they are conditional jumps.
811 If LABEL is a return, then there is no way to determine the branch
812 direction. */
813 if (JUMP_P (insn)
814 && (condjump_p (insn) || condjump_in_parallel_p (insn))
815 && !ANY_RETURN_P (label)
816 && INSN_UID (insn) <= max_uid
817 && INSN_UID (label) <= max_uid)
818 flags
819 = (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)])
820 ? ATTR_FLAG_forward : ATTR_FLAG_backward;
821 /* No valid direction information. */
822 else
823 flags = 0;
825 return flags;
828 /* Return truth value of the statement that this branch
829 is mostly taken. If we think that the branch is extremely likely
830 to be taken, we return 2. If the branch is slightly more likely to be
831 taken, return 1. If the branch is slightly less likely to be taken,
832 return 0 and if the branch is highly unlikely to be taken, return -1. */
834 static int
835 mostly_true_jump (rtx jump_insn)
837 /* If branch probabilities are available, then use that number since it
838 always gives a correct answer. */
839 rtx note = find_reg_note (jump_insn, REG_BR_PROB, 0);
840 if (note)
842 int prob = XINT (note, 0);
844 if (prob >= REG_BR_PROB_BASE * 9 / 10)
845 return 2;
846 else if (prob >= REG_BR_PROB_BASE / 2)
847 return 1;
848 else if (prob >= REG_BR_PROB_BASE / 10)
849 return 0;
850 else
851 return -1;
854 /* If there is no note, assume branches are not taken.
855 This should be rare. */
856 return 0;
859 /* Return the condition under which INSN will branch to TARGET. If TARGET
860 is zero, return the condition under which INSN will return. If INSN is
861 an unconditional branch, return const_true_rtx. If INSN isn't a simple
862 type of jump, or it doesn't go to TARGET, return 0. */
864 static rtx
865 get_branch_condition (const rtx_insn *insn, rtx target)
867 rtx pat = PATTERN (insn);
868 rtx src;
870 if (condjump_in_parallel_p (insn))
871 pat = XVECEXP (pat, 0, 0);
873 if (ANY_RETURN_P (pat) && pat == target)
874 return const_true_rtx;
876 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
877 return 0;
879 src = SET_SRC (pat);
880 if (GET_CODE (src) == LABEL_REF && LABEL_REF_LABEL (src) == target)
881 return const_true_rtx;
883 else if (GET_CODE (src) == IF_THEN_ELSE
884 && XEXP (src, 2) == pc_rtx
885 && ((GET_CODE (XEXP (src, 1)) == LABEL_REF
886 && LABEL_REF_LABEL (XEXP (src, 1)) == target)
887 || (ANY_RETURN_P (XEXP (src, 1)) && XEXP (src, 1) == target)))
888 return XEXP (src, 0);
890 else if (GET_CODE (src) == IF_THEN_ELSE
891 && XEXP (src, 1) == pc_rtx
892 && ((GET_CODE (XEXP (src, 2)) == LABEL_REF
893 && LABEL_REF_LABEL (XEXP (src, 2)) == target)
894 || (ANY_RETURN_P (XEXP (src, 2)) && XEXP (src, 2) == target)))
896 enum rtx_code rev;
897 rev = reversed_comparison_code (XEXP (src, 0), insn);
898 if (rev != UNKNOWN)
899 return gen_rtx_fmt_ee (rev, GET_MODE (XEXP (src, 0)),
900 XEXP (XEXP (src, 0), 0),
901 XEXP (XEXP (src, 0), 1));
904 return 0;
907 /* Return nonzero if CONDITION is more strict than the condition of
908 INSN, i.e., if INSN will always branch if CONDITION is true. */
910 static int
911 condition_dominates_p (rtx condition, const rtx_insn *insn)
913 rtx other_condition = get_branch_condition (insn, JUMP_LABEL (insn));
914 enum rtx_code code = GET_CODE (condition);
915 enum rtx_code other_code;
917 if (rtx_equal_p (condition, other_condition)
918 || other_condition == const_true_rtx)
919 return 1;
921 else if (condition == const_true_rtx || other_condition == 0)
922 return 0;
924 other_code = GET_CODE (other_condition);
925 if (GET_RTX_LENGTH (code) != 2 || GET_RTX_LENGTH (other_code) != 2
926 || ! rtx_equal_p (XEXP (condition, 0), XEXP (other_condition, 0))
927 || ! rtx_equal_p (XEXP (condition, 1), XEXP (other_condition, 1)))
928 return 0;
930 return comparison_dominates_p (code, other_code);
933 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
934 any insns already in the delay slot of JUMP. */
936 static int
937 redirect_with_delay_slots_safe_p (rtx_insn *jump, rtx newlabel, rtx seq)
939 int flags, i;
940 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (seq));
942 /* Make sure all the delay slots of this jump would still
943 be valid after threading the jump. If they are still
944 valid, then return nonzero. */
946 flags = get_jump_flags (jump, newlabel);
947 for (i = 1; i < pat->len (); i++)
948 if (! (
949 #if ANNUL_IFFALSE_SLOTS
950 (INSN_ANNULLED_BRANCH_P (jump)
951 && INSN_FROM_TARGET_P (pat->insn (i)))
952 ? eligible_for_annul_false (jump, i - 1, pat->insn (i), flags) :
953 #endif
954 #if ANNUL_IFTRUE_SLOTS
955 (INSN_ANNULLED_BRANCH_P (jump)
956 && ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
957 ? eligible_for_annul_true (jump, i - 1, pat->insn (i), flags) :
958 #endif
959 eligible_for_delay (jump, i - 1, pat->insn (i), flags)))
960 break;
962 return (i == pat->len ());
965 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
966 any insns we wish to place in the delay slot of JUMP. */
968 static int
969 redirect_with_delay_list_safe_p (rtx_insn *jump, rtx newlabel,
970 const vec<rtx_insn *> &delay_list)
972 /* Make sure all the insns in DELAY_LIST would still be
973 valid after threading the jump. If they are still
974 valid, then return nonzero. */
976 int flags = get_jump_flags (jump, newlabel);
977 unsigned int delay_insns = delay_list.length ();
978 unsigned int i = 0;
979 for (; i < delay_insns; i++)
980 if (! (
981 #if ANNUL_IFFALSE_SLOTS
982 (INSN_ANNULLED_BRANCH_P (jump)
983 && INSN_FROM_TARGET_P (delay_list[i]))
984 ? eligible_for_annul_false (jump, i, delay_list[i], flags) :
985 #endif
986 #if ANNUL_IFTRUE_SLOTS
987 (INSN_ANNULLED_BRANCH_P (jump)
988 && ! INSN_FROM_TARGET_P (delay_list[i]))
989 ? eligible_for_annul_true (jump, i, delay_list[i], flags) :
990 #endif
991 eligible_for_delay (jump, i, delay_list[i], flags)))
992 break;
994 return i == delay_insns;
997 /* DELAY_LIST is a list of insns that have already been placed into delay
998 slots. See if all of them have the same annulling status as ANNUL_TRUE_P.
999 If not, return 0; otherwise return 1. */
1001 static int
1002 check_annul_list_true_false (int annul_true_p,
1003 const vec<rtx_insn *> &delay_list)
1005 rtx_insn *trial;
1006 unsigned int i;
1007 FOR_EACH_VEC_ELT (delay_list, i, trial)
1008 if ((annul_true_p && INSN_FROM_TARGET_P (trial))
1009 || (!annul_true_p && !INSN_FROM_TARGET_P (trial)))
1010 return 0;
1012 return 1;
1015 /* INSN branches to an insn whose pattern SEQ is a SEQUENCE. Given that
1016 the condition tested by INSN is CONDITION and the resources shown in
1017 OTHER_NEEDED are needed after INSN, see whether INSN can take all the insns
1018 from SEQ's delay list, in addition to whatever insns it may execute
1019 (in DELAY_LIST). SETS and NEEDED are denote resources already set and
1020 needed while searching for delay slot insns. Return the concatenated
1021 delay list if possible, otherwise, return 0.
1023 SLOTS_TO_FILL is the total number of slots required by INSN, and
1024 PSLOTS_FILLED points to the number filled so far (also the number of
1025 insns in DELAY_LIST). It is updated with the number that have been
1026 filled from the SEQUENCE, if any.
1028 PANNUL_P points to a nonzero value if we already know that we need
1029 to annul INSN. If this routine determines that annulling is needed,
1030 it may set that value nonzero.
1032 PNEW_THREAD points to a location that is to receive the place at which
1033 execution should continue. */
1035 static void
1036 steal_delay_list_from_target (rtx_insn *insn, rtx condition, rtx_sequence *seq,
1037 vec<rtx_insn *> *delay_list, resources *sets,
1038 struct resources *needed,
1039 struct resources *other_needed,
1040 int slots_to_fill, int *pslots_filled,
1041 int *pannul_p, rtx *pnew_thread)
1043 int slots_remaining = slots_to_fill - *pslots_filled;
1044 int total_slots_filled = *pslots_filled;
1045 auto_vec<rtx_insn *, 5> new_delay_list;
1046 int must_annul = *pannul_p;
1047 int used_annul = 0;
1048 int i;
1049 struct resources cc_set;
1050 bool *redundant;
1052 /* We can't do anything if there are more delay slots in SEQ than we
1053 can handle, or if we don't know that it will be a taken branch.
1054 We know that it will be a taken branch if it is either an unconditional
1055 branch or a conditional branch with a stricter branch condition.
1057 Also, exit if the branch has more than one set, since then it is computing
1058 other results that can't be ignored, e.g. the HPPA mov&branch instruction.
1059 ??? It may be possible to move other sets into INSN in addition to
1060 moving the instructions in the delay slots.
1062 We can not steal the delay list if one of the instructions in the
1063 current delay_list modifies the condition codes and the jump in the
1064 sequence is a conditional jump. We can not do this because we can
1065 not change the direction of the jump because the condition codes
1066 will effect the direction of the jump in the sequence. */
1068 CLEAR_RESOURCE (&cc_set);
1070 rtx_insn *trial;
1071 FOR_EACH_VEC_ELT (*delay_list, i, trial)
1073 mark_set_resources (trial, &cc_set, 0, MARK_SRC_DEST_CALL);
1074 if (insn_references_resource_p (seq->insn (0), &cc_set, false))
1075 return;
1078 if (XVECLEN (seq, 0) - 1 > slots_remaining
1079 || ! condition_dominates_p (condition, seq->insn (0))
1080 || ! single_set (seq->insn (0)))
1081 return;
1083 /* On some targets, branches with delay slots can have a limited
1084 displacement. Give the back end a chance to tell us we can't do
1085 this. */
1086 if (! targetm.can_follow_jump (insn, seq->insn (0)))
1087 return;
1089 redundant = XALLOCAVEC (bool, XVECLEN (seq, 0));
1090 for (i = 1; i < seq->len (); i++)
1092 rtx_insn *trial = seq->insn (i);
1093 int flags;
1095 if (insn_references_resource_p (trial, sets, false)
1096 || insn_sets_resource_p (trial, needed, false)
1097 || insn_sets_resource_p (trial, sets, false)
1098 /* If TRIAL sets CC0, we can't copy it, so we can't steal this
1099 delay list. */
1100 || (HAVE_cc0 && find_reg_note (trial, REG_CC_USER, NULL_RTX))
1101 /* If TRIAL is from the fallthrough code of an annulled branch insn
1102 in SEQ, we cannot use it. */
1103 || (INSN_ANNULLED_BRANCH_P (seq->insn (0))
1104 && ! INSN_FROM_TARGET_P (trial)))
1105 return;
1107 /* If this insn was already done (usually in a previous delay slot),
1108 pretend we put it in our delay slot. */
1109 redundant[i] = redundant_insn (trial, insn, new_delay_list);
1110 if (redundant[i])
1111 continue;
1113 /* We will end up re-vectoring this branch, so compute flags
1114 based on jumping to the new label. */
1115 flags = get_jump_flags (insn, JUMP_LABEL (seq->insn (0)));
1117 if (! must_annul
1118 && ((condition == const_true_rtx
1119 || (! insn_sets_resource_p (trial, other_needed, false)
1120 && ! may_trap_or_fault_p (PATTERN (trial)))))
1121 ? eligible_for_delay (insn, total_slots_filled, trial, flags)
1122 : (must_annul || (delay_list->is_empty () && new_delay_list.is_empty ()))
1123 && (must_annul = 1,
1124 check_annul_list_true_false (0, *delay_list)
1125 && check_annul_list_true_false (0, new_delay_list)
1126 && eligible_for_annul_false (insn, total_slots_filled,
1127 trial, flags)))
1129 if (must_annul)
1131 /* Frame related instructions cannot go into annulled delay
1132 slots, it messes up the dwarf info. */
1133 if (RTX_FRAME_RELATED_P (trial))
1134 return;
1135 used_annul = 1;
1137 rtx_insn *temp = copy_delay_slot_insn (trial);
1138 INSN_FROM_TARGET_P (temp) = 1;
1139 add_to_delay_list (temp, &new_delay_list);
1140 total_slots_filled++;
1142 if (--slots_remaining == 0)
1143 break;
1145 else
1146 return;
1149 /* Record the effect of the instructions that were redundant and which
1150 we therefore decided not to copy. */
1151 for (i = 1; i < seq->len (); i++)
1152 if (redundant[i])
1153 update_block (seq->insn (i), insn);
1155 /* Show the place to which we will be branching. */
1156 *pnew_thread = first_active_target_insn (JUMP_LABEL (seq->insn (0)));
1158 /* Add any new insns to the delay list and update the count of the
1159 number of slots filled. */
1160 *pslots_filled = total_slots_filled;
1161 if (used_annul)
1162 *pannul_p = 1;
1164 rtx_insn *temp;
1165 FOR_EACH_VEC_ELT (new_delay_list, i, temp)
1166 add_to_delay_list (temp, delay_list);
1169 /* Similar to steal_delay_list_from_target except that SEQ is on the
1170 fallthrough path of INSN. Here we only do something if the delay insn
1171 of SEQ is an unconditional branch. In that case we steal its delay slot
1172 for INSN since unconditional branches are much easier to fill. */
1174 static void
1175 steal_delay_list_from_fallthrough (rtx_insn *insn, rtx condition,
1176 rtx_sequence *seq,
1177 vec<rtx_insn *> *delay_list,
1178 struct resources *sets,
1179 struct resources *needed,
1180 struct resources *other_needed,
1181 int slots_to_fill, int *pslots_filled,
1182 int *pannul_p)
1184 int i;
1185 int flags;
1186 int must_annul = *pannul_p;
1187 int used_annul = 0;
1189 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1191 /* We can't do anything if SEQ's delay insn isn't an
1192 unconditional branch. */
1194 if (! simplejump_or_return_p (seq->insn (0)))
1195 return;
1197 for (i = 1; i < seq->len (); i++)
1199 rtx_insn *trial = seq->insn (i);
1201 /* If TRIAL sets CC0, stealing it will move it too far from the use
1202 of CC0. */
1203 if (insn_references_resource_p (trial, sets, false)
1204 || insn_sets_resource_p (trial, needed, false)
1205 || insn_sets_resource_p (trial, sets, false)
1206 || (HAVE_cc0 && sets_cc0_p (PATTERN (trial))))
1208 break;
1210 /* If this insn was already done, we don't need it. */
1211 if (redundant_insn (trial, insn, *delay_list))
1213 update_block (trial, insn);
1214 delete_from_delay_slot (trial);
1215 continue;
1218 if (! must_annul
1219 && ((condition == const_true_rtx
1220 || (! insn_sets_resource_p (trial, other_needed, false)
1221 && ! may_trap_or_fault_p (PATTERN (trial)))))
1222 ? eligible_for_delay (insn, *pslots_filled, trial, flags)
1223 : (must_annul || delay_list->is_empty ()) && (must_annul = 1,
1224 check_annul_list_true_false (1, *delay_list)
1225 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
1227 if (must_annul)
1228 used_annul = 1;
1229 delete_from_delay_slot (trial);
1230 add_to_delay_list (trial, delay_list);
1232 if (++(*pslots_filled) == slots_to_fill)
1233 break;
1235 else
1236 break;
1239 if (used_annul)
1240 *pannul_p = 1;
1243 /* Try merging insns starting at THREAD which match exactly the insns in
1244 INSN's delay list.
1246 If all insns were matched and the insn was previously annulling, the
1247 annul bit will be cleared.
1249 For each insn that is merged, if the branch is or will be non-annulling,
1250 we delete the merged insn. */
1252 static void
1253 try_merge_delay_insns (rtx_insn *insn, rtx_insn *thread)
1255 rtx_insn *trial, *next_trial;
1256 rtx_insn *delay_insn = as_a <rtx_insn *> (XVECEXP (PATTERN (insn), 0, 0));
1257 int annul_p = JUMP_P (delay_insn) && INSN_ANNULLED_BRANCH_P (delay_insn);
1258 int slot_number = 1;
1259 int num_slots = XVECLEN (PATTERN (insn), 0);
1260 rtx next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1261 struct resources set, needed, modified;
1262 rtx_insn_list *merged_insns = 0;
1263 int i, j;
1264 int flags;
1266 flags = get_jump_flags (delay_insn, JUMP_LABEL (delay_insn));
1268 CLEAR_RESOURCE (&needed);
1269 CLEAR_RESOURCE (&set);
1271 /* If this is not an annulling branch, take into account anything needed in
1272 INSN's delay slot. This prevents two increments from being incorrectly
1273 folded into one. If we are annulling, this would be the correct
1274 thing to do. (The alternative, looking at things set in NEXT_TO_MATCH
1275 will essentially disable this optimization. This method is somewhat of
1276 a kludge, but I don't see a better way.) */
1277 if (! annul_p)
1278 for (i = 1 ; i < num_slots; i++)
1279 if (XVECEXP (PATTERN (insn), 0, i))
1280 mark_referenced_resources (XVECEXP (PATTERN (insn), 0, i), &needed,
1281 true);
1283 for (trial = thread; !stop_search_p (trial, 1); trial = next_trial)
1285 rtx pat = PATTERN (trial);
1286 rtx oldtrial = trial;
1288 next_trial = next_nonnote_insn (trial);
1290 /* TRIAL must be a CALL_INSN or INSN. Skip USE and CLOBBER. */
1291 if (NONJUMP_INSN_P (trial)
1292 && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER))
1293 continue;
1295 if (GET_CODE (next_to_match) == GET_CODE (trial)
1296 /* We can't share an insn that sets cc0. */
1297 && (!HAVE_cc0 || ! sets_cc0_p (pat))
1298 && ! insn_references_resource_p (trial, &set, true)
1299 && ! insn_sets_resource_p (trial, &set, true)
1300 && ! insn_sets_resource_p (trial, &needed, true)
1301 && (trial = try_split (pat, trial, 0)) != 0
1302 /* Update next_trial, in case try_split succeeded. */
1303 && (next_trial = next_nonnote_insn (trial))
1304 /* Likewise THREAD. */
1305 && (thread = oldtrial == thread ? trial : thread)
1306 && rtx_equal_p (PATTERN (next_to_match), PATTERN (trial))
1307 /* Have to test this condition if annul condition is different
1308 from (and less restrictive than) non-annulling one. */
1309 && eligible_for_delay (delay_insn, slot_number - 1, trial, flags))
1312 if (! annul_p)
1314 update_block (trial, thread);
1315 if (trial == thread)
1316 thread = next_active_insn (thread);
1318 delete_related_insns (trial);
1319 INSN_FROM_TARGET_P (next_to_match) = 0;
1321 else
1322 merged_insns = gen_rtx_INSN_LIST (VOIDmode, trial, merged_insns);
1324 if (++slot_number == num_slots)
1325 break;
1327 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1330 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
1331 mark_referenced_resources (trial, &needed, true);
1334 /* See if we stopped on a filled insn. If we did, try to see if its
1335 delay slots match. */
1336 if (slot_number != num_slots
1337 && trial && NONJUMP_INSN_P (trial)
1338 && GET_CODE (PATTERN (trial)) == SEQUENCE
1339 && !(JUMP_P (XVECEXP (PATTERN (trial), 0, 0))
1340 && INSN_ANNULLED_BRANCH_P (XVECEXP (PATTERN (trial), 0, 0))))
1342 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (trial));
1343 rtx filled_insn = XVECEXP (pat, 0, 0);
1345 /* Account for resources set/needed by the filled insn. */
1346 mark_set_resources (filled_insn, &set, 0, MARK_SRC_DEST_CALL);
1347 mark_referenced_resources (filled_insn, &needed, true);
1349 for (i = 1; i < pat->len (); i++)
1351 rtx_insn *dtrial = pat->insn (i);
1353 CLEAR_RESOURCE (&modified);
1354 /* Account for resources set by the insn following NEXT_TO_MATCH
1355 inside INSN's delay list. */
1356 for (j = 1; slot_number + j < num_slots; j++)
1357 mark_set_resources (XVECEXP (PATTERN (insn), 0, slot_number + j),
1358 &modified, 0, MARK_SRC_DEST_CALL);
1359 /* Account for resources set by the insn before DTRIAL and inside
1360 TRIAL's delay list. */
1361 for (j = 1; j < i; j++)
1362 mark_set_resources (XVECEXP (pat, 0, j),
1363 &modified, 0, MARK_SRC_DEST_CALL);
1364 if (! insn_references_resource_p (dtrial, &set, true)
1365 && ! insn_sets_resource_p (dtrial, &set, true)
1366 && ! insn_sets_resource_p (dtrial, &needed, true)
1367 && (!HAVE_cc0 || ! sets_cc0_p (PATTERN (dtrial)))
1368 && rtx_equal_p (PATTERN (next_to_match), PATTERN (dtrial))
1369 /* Check that DTRIAL and NEXT_TO_MATCH does not reference a
1370 resource modified between them (only dtrial is checked because
1371 next_to_match and dtrial shall to be equal in order to hit
1372 this line) */
1373 && ! insn_references_resource_p (dtrial, &modified, true)
1374 && eligible_for_delay (delay_insn, slot_number - 1, dtrial, flags))
1376 if (! annul_p)
1378 rtx_insn *new_rtx;
1380 update_block (dtrial, thread);
1381 new_rtx = delete_from_delay_slot (dtrial);
1382 if (thread->deleted ())
1383 thread = new_rtx;
1384 INSN_FROM_TARGET_P (next_to_match) = 0;
1386 else
1387 merged_insns = gen_rtx_INSN_LIST (SImode, dtrial,
1388 merged_insns);
1390 if (++slot_number == num_slots)
1391 break;
1393 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1395 else
1397 /* Keep track of the set/referenced resources for the delay
1398 slots of any trial insns we encounter. */
1399 mark_set_resources (dtrial, &set, 0, MARK_SRC_DEST_CALL);
1400 mark_referenced_resources (dtrial, &needed, true);
1405 /* If all insns in the delay slot have been matched and we were previously
1406 annulling the branch, we need not any more. In that case delete all the
1407 merged insns. Also clear the INSN_FROM_TARGET_P bit of each insn in
1408 the delay list so that we know that it isn't only being used at the
1409 target. */
1410 if (slot_number == num_slots && annul_p)
1412 for (; merged_insns; merged_insns = merged_insns->next ())
1414 if (GET_MODE (merged_insns) == SImode)
1416 rtx_insn *new_rtx;
1418 update_block (merged_insns->insn (), thread);
1419 new_rtx = delete_from_delay_slot (merged_insns->insn ());
1420 if (thread->deleted ())
1421 thread = new_rtx;
1423 else
1425 update_block (merged_insns->insn (), thread);
1426 delete_related_insns (merged_insns->insn ());
1430 INSN_ANNULLED_BRANCH_P (delay_insn) = 0;
1432 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1433 INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i)) = 0;
1437 /* See if INSN is redundant with an insn in front of TARGET. Often this
1438 is called when INSN is a candidate for a delay slot of TARGET.
1439 DELAY_LIST are insns that will be placed in delay slots of TARGET in front
1440 of INSN. Often INSN will be redundant with an insn in a delay slot of
1441 some previous insn. This happens when we have a series of branches to the
1442 same label; in that case the first insn at the target might want to go
1443 into each of the delay slots.
1445 If we are not careful, this routine can take up a significant fraction
1446 of the total compilation time (4%), but only wins rarely. Hence we
1447 speed this routine up by making two passes. The first pass goes back
1448 until it hits a label and sees if it finds an insn with an identical
1449 pattern. Only in this (relatively rare) event does it check for
1450 data conflicts.
1452 We do not split insns we encounter. This could cause us not to find a
1453 redundant insn, but the cost of splitting seems greater than the possible
1454 gain in rare cases. */
1456 static rtx
1457 redundant_insn (rtx insn, rtx_insn *target, const vec<rtx_insn *> &delay_list)
1459 rtx target_main = target;
1460 rtx ipat = PATTERN (insn);
1461 rtx_insn *trial;
1462 rtx pat;
1463 struct resources needed, set;
1464 int i;
1465 unsigned insns_to_search;
1467 /* If INSN has any REG_UNUSED notes, it can't match anything since we
1468 are allowed to not actually assign to such a register. */
1469 if (find_reg_note (insn, REG_UNUSED, NULL_RTX) != 0)
1470 return 0;
1472 /* Scan backwards looking for a match. */
1473 for (trial = PREV_INSN (target),
1474 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1475 trial && insns_to_search > 0;
1476 trial = PREV_INSN (trial))
1478 /* (use (insn))s can come immediately after a barrier if the
1479 label that used to precede them has been deleted as dead.
1480 See delete_related_insns. */
1481 if (LABEL_P (trial) || BARRIER_P (trial))
1482 return 0;
1484 if (!INSN_P (trial))
1485 continue;
1486 --insns_to_search;
1488 pat = PATTERN (trial);
1489 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1490 continue;
1492 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1494 /* Stop for a CALL and its delay slots because it is difficult to
1495 track its resource needs correctly. */
1496 if (CALL_P (seq->element (0)))
1497 return 0;
1499 /* Stop for an INSN or JUMP_INSN with delayed effects and its delay
1500 slots because it is difficult to track its resource needs
1501 correctly. */
1503 if (INSN_SETS_ARE_DELAYED (seq->insn (0)))
1504 return 0;
1506 if (INSN_REFERENCES_ARE_DELAYED (seq->insn (0)))
1507 return 0;
1509 /* See if any of the insns in the delay slot match, updating
1510 resource requirements as we go. */
1511 for (i = seq->len () - 1; i > 0; i--)
1512 if (GET_CODE (seq->element (i)) == GET_CODE (insn)
1513 && rtx_equal_p (PATTERN (seq->element (i)), ipat)
1514 && ! find_reg_note (seq->element (i), REG_UNUSED, NULL_RTX))
1515 break;
1517 /* If found a match, exit this loop early. */
1518 if (i > 0)
1519 break;
1522 else if (GET_CODE (trial) == GET_CODE (insn) && rtx_equal_p (pat, ipat)
1523 && ! find_reg_note (trial, REG_UNUSED, NULL_RTX))
1524 break;
1527 /* If we didn't find an insn that matches, return 0. */
1528 if (trial == 0)
1529 return 0;
1531 /* See what resources this insn sets and needs. If they overlap, or
1532 if this insn references CC0, it can't be redundant. */
1534 CLEAR_RESOURCE (&needed);
1535 CLEAR_RESOURCE (&set);
1536 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
1537 mark_referenced_resources (insn, &needed, true);
1539 /* If TARGET is a SEQUENCE, get the main insn. */
1540 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1541 target_main = XVECEXP (PATTERN (target), 0, 0);
1543 if (resource_conflicts_p (&needed, &set)
1544 || (HAVE_cc0 && reg_mentioned_p (cc0_rtx, ipat))
1545 /* The insn requiring the delay may not set anything needed or set by
1546 INSN. */
1547 || insn_sets_resource_p (target_main, &needed, true)
1548 || insn_sets_resource_p (target_main, &set, true))
1549 return 0;
1551 /* Insns we pass may not set either NEEDED or SET, so merge them for
1552 simpler tests. */
1553 needed.memory |= set.memory;
1554 IOR_HARD_REG_SET (needed.regs, set.regs);
1556 /* This insn isn't redundant if it conflicts with an insn that either is
1557 or will be in a delay slot of TARGET. */
1559 unsigned int j;
1560 rtx_insn *temp;
1561 FOR_EACH_VEC_ELT (delay_list, j, temp)
1562 if (insn_sets_resource_p (temp, &needed, true))
1563 return 0;
1565 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1566 for (i = 1; i < XVECLEN (PATTERN (target), 0); i++)
1567 if (insn_sets_resource_p (XVECEXP (PATTERN (target), 0, i), &needed,
1568 true))
1569 return 0;
1571 /* Scan backwards until we reach a label or an insn that uses something
1572 INSN sets or sets something insn uses or sets. */
1574 for (trial = PREV_INSN (target),
1575 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1576 trial && !LABEL_P (trial) && insns_to_search > 0;
1577 trial = PREV_INSN (trial))
1579 if (!INSN_P (trial))
1580 continue;
1581 --insns_to_search;
1583 pat = PATTERN (trial);
1584 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1585 continue;
1587 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1589 bool annul_p = false;
1590 rtx_insn *control = seq->insn (0);
1592 /* If this is a CALL_INSN and its delay slots, it is hard to track
1593 the resource needs properly, so give up. */
1594 if (CALL_P (control))
1595 return 0;
1597 /* If this is an INSN or JUMP_INSN with delayed effects, it
1598 is hard to track the resource needs properly, so give up. */
1600 if (INSN_SETS_ARE_DELAYED (control))
1601 return 0;
1603 if (INSN_REFERENCES_ARE_DELAYED (control))
1604 return 0;
1606 if (JUMP_P (control))
1607 annul_p = INSN_ANNULLED_BRANCH_P (control);
1609 /* See if any of the insns in the delay slot match, updating
1610 resource requirements as we go. */
1611 for (i = seq->len () - 1; i > 0; i--)
1613 rtx candidate = seq->element (i);
1615 /* If an insn will be annulled if the branch is false, it isn't
1616 considered as a possible duplicate insn. */
1617 if (rtx_equal_p (PATTERN (candidate), ipat)
1618 && ! (annul_p && INSN_FROM_TARGET_P (candidate)))
1620 /* Show that this insn will be used in the sequel. */
1621 INSN_FROM_TARGET_P (candidate) = 0;
1622 return candidate;
1625 /* Unless this is an annulled insn from the target of a branch,
1626 we must stop if it sets anything needed or set by INSN. */
1627 if ((!annul_p || !INSN_FROM_TARGET_P (candidate))
1628 && insn_sets_resource_p (candidate, &needed, true))
1629 return 0;
1632 /* If the insn requiring the delay slot conflicts with INSN, we
1633 must stop. */
1634 if (insn_sets_resource_p (control, &needed, true))
1635 return 0;
1637 else
1639 /* See if TRIAL is the same as INSN. */
1640 pat = PATTERN (trial);
1641 if (rtx_equal_p (pat, ipat))
1642 return trial;
1644 /* Can't go any further if TRIAL conflicts with INSN. */
1645 if (insn_sets_resource_p (trial, &needed, true))
1646 return 0;
1650 return 0;
1653 /* Return 1 if THREAD can only be executed in one way. If LABEL is nonzero,
1654 it is the target of the branch insn being scanned. If ALLOW_FALLTHROUGH
1655 is nonzero, we are allowed to fall into this thread; otherwise, we are
1656 not.
1658 If LABEL is used more than one or we pass a label other than LABEL before
1659 finding an active insn, we do not own this thread. */
1661 static int
1662 own_thread_p (rtx thread, rtx label, int allow_fallthrough)
1664 rtx_insn *active_insn;
1665 rtx_insn *insn;
1667 /* We don't own the function end. */
1668 if (thread == 0 || ANY_RETURN_P (thread))
1669 return 0;
1671 /* We have a non-NULL insn. */
1672 rtx_insn *thread_insn = as_a <rtx_insn *> (thread);
1674 /* Get the first active insn, or THREAD_INSN, if it is an active insn. */
1675 active_insn = next_active_insn (PREV_INSN (thread_insn));
1677 for (insn = thread_insn; insn != active_insn; insn = NEXT_INSN (insn))
1678 if (LABEL_P (insn)
1679 && (insn != label || LABEL_NUSES (insn) != 1))
1680 return 0;
1682 if (allow_fallthrough)
1683 return 1;
1685 /* Ensure that we reach a BARRIER before any insn or label. */
1686 for (insn = prev_nonnote_insn (thread_insn);
1687 insn == 0 || !BARRIER_P (insn);
1688 insn = prev_nonnote_insn (insn))
1689 if (insn == 0
1690 || LABEL_P (insn)
1691 || (NONJUMP_INSN_P (insn)
1692 && GET_CODE (PATTERN (insn)) != USE
1693 && GET_CODE (PATTERN (insn)) != CLOBBER))
1694 return 0;
1696 return 1;
1699 /* Called when INSN is being moved from a location near the target of a jump.
1700 We leave a marker of the form (use (INSN)) immediately in front
1701 of WHERE for mark_target_live_regs. These markers will be deleted when
1702 reorg finishes.
1704 We used to try to update the live status of registers if WHERE is at
1705 the start of a basic block, but that can't work since we may remove a
1706 BARRIER in relax_delay_slots. */
1708 static void
1709 update_block (rtx_insn *insn, rtx where)
1711 /* Ignore if this was in a delay slot and it came from the target of
1712 a branch. */
1713 if (INSN_FROM_TARGET_P (insn))
1714 return;
1716 emit_insn_before (gen_rtx_USE (VOIDmode, insn), where);
1718 /* INSN might be making a value live in a block where it didn't use to
1719 be. So recompute liveness information for this block. */
1721 incr_ticks_for_insn (insn);
1724 /* Similar to REDIRECT_JUMP except that we update the BB_TICKS entry for
1725 the basic block containing the jump. */
1727 static int
1728 reorg_redirect_jump (rtx_jump_insn *jump, rtx nlabel)
1730 incr_ticks_for_insn (jump);
1731 return redirect_jump (jump, nlabel, 1);
1734 /* Called when INSN is being moved forward into a delay slot of DELAYED_INSN.
1735 We check every instruction between INSN and DELAYED_INSN for REG_DEAD notes
1736 that reference values used in INSN. If we find one, then we move the
1737 REG_DEAD note to INSN.
1739 This is needed to handle the case where a later insn (after INSN) has a
1740 REG_DEAD note for a register used by INSN, and this later insn subsequently
1741 gets moved before a CODE_LABEL because it is a redundant insn. In this
1742 case, mark_target_live_regs may be confused into thinking the register
1743 is dead because it sees a REG_DEAD note immediately before a CODE_LABEL. */
1745 static void
1746 update_reg_dead_notes (rtx_insn *insn, rtx_insn *delayed_insn)
1748 rtx link, next;
1749 rtx_insn *p;
1751 for (p = next_nonnote_insn (insn); p != delayed_insn;
1752 p = next_nonnote_insn (p))
1753 for (link = REG_NOTES (p); link; link = next)
1755 next = XEXP (link, 1);
1757 if (REG_NOTE_KIND (link) != REG_DEAD
1758 || !REG_P (XEXP (link, 0)))
1759 continue;
1761 if (reg_referenced_p (XEXP (link, 0), PATTERN (insn)))
1763 /* Move the REG_DEAD note from P to INSN. */
1764 remove_note (p, link);
1765 XEXP (link, 1) = REG_NOTES (insn);
1766 REG_NOTES (insn) = link;
1771 /* Called when an insn redundant with start_insn is deleted. If there
1772 is a REG_DEAD note for the target of start_insn between start_insn
1773 and stop_insn, then the REG_DEAD note needs to be deleted since the
1774 value no longer dies there.
1776 If the REG_DEAD note isn't deleted, then mark_target_live_regs may be
1777 confused into thinking the register is dead. */
1779 static void
1780 fix_reg_dead_note (rtx start_insn, rtx stop_insn)
1782 rtx link, next;
1783 rtx_insn *p;
1785 for (p = next_nonnote_insn (start_insn); p != stop_insn;
1786 p = next_nonnote_insn (p))
1787 for (link = REG_NOTES (p); link; link = next)
1789 next = XEXP (link, 1);
1791 if (REG_NOTE_KIND (link) != REG_DEAD
1792 || !REG_P (XEXP (link, 0)))
1793 continue;
1795 if (reg_set_p (XEXP (link, 0), PATTERN (start_insn)))
1797 remove_note (p, link);
1798 return;
1803 /* Delete any REG_UNUSED notes that exist on INSN but not on REDUNDANT_INSN.
1805 This handles the case of udivmodXi4 instructions which optimize their
1806 output depending on whether any REG_UNUSED notes are present.
1807 we must make sure that INSN calculates as many results as REDUNDANT_INSN
1808 does. */
1810 static void
1811 update_reg_unused_notes (rtx insn, rtx redundant_insn)
1813 rtx link, next;
1815 for (link = REG_NOTES (insn); link; link = next)
1817 next = XEXP (link, 1);
1819 if (REG_NOTE_KIND (link) != REG_UNUSED
1820 || !REG_P (XEXP (link, 0)))
1821 continue;
1823 if (! find_regno_note (redundant_insn, REG_UNUSED,
1824 REGNO (XEXP (link, 0))))
1825 remove_note (insn, link);
1829 static vec <rtx> sibling_labels;
1831 /* Return the label before INSN, or put a new label there. If SIBLING is
1832 non-zero, it is another label associated with the new label (if any),
1833 typically the former target of the jump that will be redirected to
1834 the new label. */
1836 static rtx_insn *
1837 get_label_before (rtx_insn *insn, rtx sibling)
1839 rtx_insn *label;
1841 /* Find an existing label at this point
1842 or make a new one if there is none. */
1843 label = prev_nonnote_insn (insn);
1845 if (label == 0 || !LABEL_P (label))
1847 rtx_insn *prev = PREV_INSN (insn);
1849 label = gen_label_rtx ();
1850 emit_label_after (label, prev);
1851 LABEL_NUSES (label) = 0;
1852 if (sibling)
1854 sibling_labels.safe_push (label);
1855 sibling_labels.safe_push (sibling);
1858 return label;
1861 /* Scan a function looking for insns that need a delay slot and find insns to
1862 put into the delay slot.
1864 NON_JUMPS_P is nonzero if we are to only try to fill non-jump insns (such
1865 as calls). We do these first since we don't want jump insns (that are
1866 easier to fill) to get the only insns that could be used for non-jump insns.
1867 When it is zero, only try to fill JUMP_INSNs.
1869 When slots are filled in this manner, the insns (including the
1870 delay_insn) are put together in a SEQUENCE rtx. In this fashion,
1871 it is possible to tell whether a delay slot has really been filled
1872 or not. `final' knows how to deal with this, by communicating
1873 through FINAL_SEQUENCE. */
1875 static void
1876 fill_simple_delay_slots (int non_jumps_p)
1878 rtx_insn *insn, *trial, *next_trial;
1879 rtx pat;
1880 int i;
1881 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
1882 struct resources needed, set;
1883 int slots_to_fill, slots_filled;
1884 auto_vec<rtx_insn *, 5> delay_list;
1886 for (i = 0; i < num_unfilled_slots; i++)
1888 int flags;
1889 /* Get the next insn to fill. If it has already had any slots assigned,
1890 we can't do anything with it. Maybe we'll improve this later. */
1892 insn = unfilled_slots_base[i];
1893 if (insn == 0
1894 || insn->deleted ()
1895 || (NONJUMP_INSN_P (insn)
1896 && GET_CODE (PATTERN (insn)) == SEQUENCE)
1897 || (JUMP_P (insn) && non_jumps_p)
1898 || (!JUMP_P (insn) && ! non_jumps_p))
1899 continue;
1901 /* It may have been that this insn used to need delay slots, but
1902 now doesn't; ignore in that case. This can happen, for example,
1903 on the HP PA RISC, where the number of delay slots depends on
1904 what insns are nearby. */
1905 slots_to_fill = num_delay_slots (insn);
1907 /* Some machine description have defined instructions to have
1908 delay slots only in certain circumstances which may depend on
1909 nearby insns (which change due to reorg's actions).
1911 For example, the PA port normally has delay slots for unconditional
1912 jumps.
1914 However, the PA port claims such jumps do not have a delay slot
1915 if they are immediate successors of certain CALL_INSNs. This
1916 allows the port to favor filling the delay slot of the call with
1917 the unconditional jump. */
1918 if (slots_to_fill == 0)
1919 continue;
1921 /* This insn needs, or can use, some delay slots. SLOTS_TO_FILL
1922 says how many. After initialization, first try optimizing
1924 call _foo call _foo
1925 nop add %o7,.-L1,%o7
1926 b,a L1
1929 If this case applies, the delay slot of the call is filled with
1930 the unconditional jump. This is done first to avoid having the
1931 delay slot of the call filled in the backward scan. Also, since
1932 the unconditional jump is likely to also have a delay slot, that
1933 insn must exist when it is subsequently scanned.
1935 This is tried on each insn with delay slots as some machines
1936 have insns which perform calls, but are not represented as
1937 CALL_INSNs. */
1939 slots_filled = 0;
1940 delay_list.truncate (0);
1942 if (JUMP_P (insn))
1943 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1944 else
1945 flags = get_jump_flags (insn, NULL_RTX);
1947 if ((trial = next_active_insn (insn))
1948 && JUMP_P (trial)
1949 && simplejump_p (trial)
1950 && eligible_for_delay (insn, slots_filled, trial, flags)
1951 && no_labels_between_p (insn, trial)
1952 && ! can_throw_internal (trial))
1954 rtx_insn **tmp;
1955 slots_filled++;
1956 add_to_delay_list (trial, &delay_list);
1958 /* TRIAL may have had its delay slot filled, then unfilled. When
1959 the delay slot is unfilled, TRIAL is placed back on the unfilled
1960 slots obstack. Unfortunately, it is placed on the end of the
1961 obstack, not in its original location. Therefore, we must search
1962 from entry i + 1 to the end of the unfilled slots obstack to
1963 try and find TRIAL. */
1964 tmp = &unfilled_slots_base[i + 1];
1965 while (*tmp != trial && tmp != unfilled_slots_next)
1966 tmp++;
1968 /* Remove the unconditional jump from consideration for delay slot
1969 filling and unthread it. */
1970 if (*tmp == trial)
1971 *tmp = 0;
1973 rtx_insn *next = NEXT_INSN (trial);
1974 rtx_insn *prev = PREV_INSN (trial);
1975 if (prev)
1976 SET_NEXT_INSN (prev) = next;
1977 if (next)
1978 SET_PREV_INSN (next) = prev;
1982 /* Now, scan backwards from the insn to search for a potential
1983 delay-slot candidate. Stop searching when a label or jump is hit.
1985 For each candidate, if it is to go into the delay slot (moved
1986 forward in execution sequence), it must not need or set any resources
1987 that were set by later insns and must not set any resources that
1988 are needed for those insns.
1990 The delay slot insn itself sets resources unless it is a call
1991 (in which case the called routine, not the insn itself, is doing
1992 the setting). */
1994 if (slots_filled < slots_to_fill)
1996 /* If the flags register is dead after the insn, then we want to be
1997 able to accept a candidate that clobbers it. For this purpose,
1998 we need to filter the flags register during life analysis, so
1999 that it doesn't create RAW and WAW dependencies, while still
2000 creating the necessary WAR dependencies. */
2001 bool filter_flags
2002 = (slots_to_fill == 1
2003 && targetm.flags_regnum != INVALID_REGNUM
2004 && find_regno_note (insn, REG_DEAD, targetm.flags_regnum));
2005 struct resources fset;
2006 CLEAR_RESOURCE (&needed);
2007 CLEAR_RESOURCE (&set);
2008 mark_set_resources (insn, &set, 0, MARK_SRC_DEST);
2009 if (filter_flags)
2011 CLEAR_RESOURCE (&fset);
2012 mark_set_resources (insn, &fset, 0, MARK_SRC_DEST);
2014 mark_referenced_resources (insn, &needed, false);
2016 for (trial = prev_nonnote_insn (insn); ! stop_search_p (trial, 1);
2017 trial = next_trial)
2019 next_trial = prev_nonnote_insn (trial);
2021 /* This must be an INSN or CALL_INSN. */
2022 pat = PATTERN (trial);
2024 /* Stand-alone USE and CLOBBER are just for flow. */
2025 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2026 continue;
2028 /* Check for resource conflict first, to avoid unnecessary
2029 splitting. */
2030 if (! insn_references_resource_p (trial, &set, true)
2031 && ! insn_sets_resource_p (trial,
2032 filter_flags ? &fset : &set,
2033 true)
2034 && ! insn_sets_resource_p (trial, &needed, true)
2035 /* Can't separate set of cc0 from its use. */
2036 && (!HAVE_cc0 || ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat)))
2037 && ! can_throw_internal (trial))
2039 trial = try_split (pat, trial, 1);
2040 next_trial = prev_nonnote_insn (trial);
2041 if (eligible_for_delay (insn, slots_filled, trial, flags))
2043 /* In this case, we are searching backward, so if we
2044 find insns to put on the delay list, we want
2045 to put them at the head, rather than the
2046 tail, of the list. */
2048 update_reg_dead_notes (trial, insn);
2049 delay_list.safe_insert (0, trial);
2050 update_block (trial, trial);
2051 delete_related_insns (trial);
2052 if (slots_to_fill == ++slots_filled)
2053 break;
2054 continue;
2058 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2059 if (filter_flags)
2061 mark_set_resources (trial, &fset, 0, MARK_SRC_DEST_CALL);
2062 /* If the flags register is set, then it doesn't create RAW
2063 dependencies any longer and it also doesn't create WAW
2064 dependencies since it's dead after the original insn. */
2065 if (TEST_HARD_REG_BIT (fset.regs, targetm.flags_regnum))
2067 CLEAR_HARD_REG_BIT (needed.regs, targetm.flags_regnum);
2068 CLEAR_HARD_REG_BIT (fset.regs, targetm.flags_regnum);
2071 mark_referenced_resources (trial, &needed, true);
2075 /* If all needed slots haven't been filled, we come here. */
2077 /* Try to optimize case of jumping around a single insn. */
2078 if ((ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
2079 && slots_filled != slots_to_fill
2080 && delay_list.is_empty ()
2081 && JUMP_P (insn)
2082 && (condjump_p (insn) || condjump_in_parallel_p (insn))
2083 && !ANY_RETURN_P (JUMP_LABEL (insn)))
2085 optimize_skip (as_a <rtx_jump_insn *> (insn), &delay_list);
2086 if (!delay_list.is_empty ())
2087 slots_filled += 1;
2090 /* Try to get insns from beyond the insn needing the delay slot.
2091 These insns can neither set or reference resources set in insns being
2092 skipped, cannot set resources in the insn being skipped, and, if this
2093 is a CALL_INSN (or a CALL_INSN is passed), cannot trap (because the
2094 call might not return).
2096 There used to be code which continued past the target label if
2097 we saw all uses of the target label. This code did not work,
2098 because it failed to account for some instructions which were
2099 both annulled and marked as from the target. This can happen as a
2100 result of optimize_skip. Since this code was redundant with
2101 fill_eager_delay_slots anyways, it was just deleted. */
2103 if (slots_filled != slots_to_fill
2104 /* If this instruction could throw an exception which is
2105 caught in the same function, then it's not safe to fill
2106 the delay slot with an instruction from beyond this
2107 point. For example, consider:
2109 int i = 2;
2111 try {
2112 f();
2113 i = 3;
2114 } catch (...) {}
2116 return i;
2118 Even though `i' is a local variable, we must be sure not
2119 to put `i = 3' in the delay slot if `f' might throw an
2120 exception.
2122 Presumably, we should also check to see if we could get
2123 back to this function via `setjmp'. */
2124 && ! can_throw_internal (insn)
2125 && !JUMP_P (insn))
2127 int maybe_never = 0;
2128 rtx pat, trial_delay;
2130 CLEAR_RESOURCE (&needed);
2131 CLEAR_RESOURCE (&set);
2132 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
2133 mark_referenced_resources (insn, &needed, true);
2135 if (CALL_P (insn))
2136 maybe_never = 1;
2138 for (trial = next_nonnote_insn (insn); !stop_search_p (trial, 1);
2139 trial = next_trial)
2141 next_trial = next_nonnote_insn (trial);
2143 /* This must be an INSN or CALL_INSN. */
2144 pat = PATTERN (trial);
2146 /* Stand-alone USE and CLOBBER are just for flow. */
2147 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2148 continue;
2150 /* If this already has filled delay slots, get the insn needing
2151 the delay slots. */
2152 if (GET_CODE (pat) == SEQUENCE)
2153 trial_delay = XVECEXP (pat, 0, 0);
2154 else
2155 trial_delay = trial;
2157 /* Stop our search when seeing a jump. */
2158 if (JUMP_P (trial_delay))
2159 break;
2161 /* See if we have a resource problem before we try to split. */
2162 if (GET_CODE (pat) != SEQUENCE
2163 && ! insn_references_resource_p (trial, &set, true)
2164 && ! insn_sets_resource_p (trial, &set, true)
2165 && ! insn_sets_resource_p (trial, &needed, true)
2166 && (!HAVE_cc0 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat)))
2167 && ! (maybe_never && may_trap_or_fault_p (pat))
2168 && (trial = try_split (pat, trial, 0))
2169 && eligible_for_delay (insn, slots_filled, trial, flags)
2170 && ! can_throw_internal (trial))
2172 next_trial = next_nonnote_insn (trial);
2173 add_to_delay_list (trial, &delay_list);
2174 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, pat))
2175 link_cc0_insns (trial);
2177 delete_related_insns (trial);
2178 if (slots_to_fill == ++slots_filled)
2179 break;
2180 continue;
2183 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2184 mark_referenced_resources (trial, &needed, true);
2186 /* Ensure we don't put insns between the setting of cc and the
2187 comparison by moving a setting of cc into an earlier delay
2188 slot since these insns could clobber the condition code. */
2189 set.cc = 1;
2191 /* If this is a call, we might not get here. */
2192 if (CALL_P (trial_delay))
2193 maybe_never = 1;
2196 /* If there are slots left to fill and our search was stopped by an
2197 unconditional branch, try the insn at the branch target. We can
2198 redirect the branch if it works.
2200 Don't do this if the insn at the branch target is a branch. */
2201 if (slots_to_fill != slots_filled
2202 && trial
2203 && jump_to_label_p (trial)
2204 && simplejump_p (trial)
2205 && (next_trial = next_active_insn (JUMP_LABEL (trial))) != 0
2206 && ! (NONJUMP_INSN_P (next_trial)
2207 && GET_CODE (PATTERN (next_trial)) == SEQUENCE)
2208 && !JUMP_P (next_trial)
2209 && ! insn_references_resource_p (next_trial, &set, true)
2210 && ! insn_sets_resource_p (next_trial, &set, true)
2211 && ! insn_sets_resource_p (next_trial, &needed, true)
2212 && (!HAVE_cc0 || ! reg_mentioned_p (cc0_rtx, PATTERN (next_trial)))
2213 && ! (maybe_never && may_trap_or_fault_p (PATTERN (next_trial)))
2214 && (next_trial = try_split (PATTERN (next_trial), next_trial, 0))
2215 && eligible_for_delay (insn, slots_filled, next_trial, flags)
2216 && ! can_throw_internal (trial))
2218 /* See comment in relax_delay_slots about necessity of using
2219 next_real_insn here. */
2220 rtx_insn *new_label = next_real_insn (next_trial);
2222 if (new_label != 0)
2223 new_label = get_label_before (new_label, JUMP_LABEL (trial));
2224 else
2225 new_label = find_end_label (simple_return_rtx);
2227 if (new_label)
2229 add_to_delay_list (copy_delay_slot_insn (next_trial),
2230 &delay_list);
2231 slots_filled++;
2232 reorg_redirect_jump (as_a <rtx_jump_insn *> (trial),
2233 new_label);
2238 /* If this is an unconditional jump, then try to get insns from the
2239 target of the jump. */
2240 rtx_jump_insn *jump_insn;
2241 if ((jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2242 && simplejump_p (jump_insn)
2243 && slots_filled != slots_to_fill)
2244 fill_slots_from_thread (jump_insn, const_true_rtx,
2245 next_active_insn (JUMP_LABEL (insn)), NULL, 1,
2246 1, own_thread_p (JUMP_LABEL (insn),
2247 JUMP_LABEL (insn), 0),
2248 slots_to_fill, &slots_filled, &delay_list);
2250 if (!delay_list.is_empty ())
2251 unfilled_slots_base[i]
2252 = emit_delay_sequence (insn, delay_list, slots_filled);
2254 if (slots_to_fill == slots_filled)
2255 unfilled_slots_base[i] = 0;
2257 note_delay_statistics (slots_filled, 0);
2261 /* Follow any unconditional jump at LABEL, for the purpose of redirecting JUMP;
2262 return the ultimate label reached by any such chain of jumps.
2263 Return a suitable return rtx if the chain ultimately leads to a
2264 return instruction.
2265 If LABEL is not followed by a jump, return LABEL.
2266 If the chain loops or we can't find end, return LABEL,
2267 since that tells caller to avoid changing the insn.
2268 If the returned label is obtained by following a crossing jump,
2269 set *CROSSING to true, otherwise set it to false. */
2271 static rtx
2272 follow_jumps (rtx label, rtx_insn *jump, bool *crossing)
2274 rtx_insn *insn;
2275 rtx_insn *next;
2276 int depth;
2278 *crossing = false;
2279 if (ANY_RETURN_P (label))
2280 return label;
2282 rtx_insn *value = as_a <rtx_insn *> (label);
2284 for (depth = 0;
2285 (depth < 10
2286 && (insn = next_active_insn (value)) != 0
2287 && JUMP_P (insn)
2288 && JUMP_LABEL (insn) != NULL_RTX
2289 && ((any_uncondjump_p (insn) && onlyjump_p (insn))
2290 || ANY_RETURN_P (PATTERN (insn)))
2291 && (next = NEXT_INSN (insn))
2292 && BARRIER_P (next));
2293 depth++)
2295 rtx this_label_or_return = JUMP_LABEL (insn);
2297 /* If we have found a cycle, make the insn jump to itself. */
2298 if (this_label_or_return == label)
2299 return label;
2301 /* Cannot follow returns and cannot look through tablejumps. */
2302 if (ANY_RETURN_P (this_label_or_return))
2303 return this_label_or_return;
2305 rtx_insn *this_label = as_a <rtx_insn *> (this_label_or_return);
2306 if (NEXT_INSN (this_label)
2307 && JUMP_TABLE_DATA_P (NEXT_INSN (this_label)))
2308 break;
2310 if (!targetm.can_follow_jump (jump, insn))
2311 break;
2312 if (!*crossing)
2313 *crossing = CROSSING_JUMP_P (jump);
2314 value = this_label;
2316 if (depth == 10)
2317 return label;
2318 return value;
2321 /* Try to find insns to place in delay slots.
2323 INSN is the jump needing SLOTS_TO_FILL delay slots. It tests CONDITION
2324 or is an unconditional branch if CONDITION is const_true_rtx.
2325 *PSLOTS_FILLED is updated with the number of slots that we have filled.
2327 THREAD is a flow-of-control, either the insns to be executed if the
2328 branch is true or if the branch is false, THREAD_IF_TRUE says which.
2330 OPPOSITE_THREAD is the thread in the opposite direction. It is used
2331 to see if any potential delay slot insns set things needed there.
2333 LIKELY is nonzero if it is extremely likely that the branch will be
2334 taken and THREAD_IF_TRUE is set. This is used for the branch at the
2335 end of a loop back up to the top.
2337 OWN_THREAD and OWN_OPPOSITE_THREAD are true if we are the only user of the
2338 thread. I.e., it is the fallthrough code of our jump or the target of the
2339 jump when we are the only jump going there.
2341 If OWN_THREAD is false, it must be the "true" thread of a jump. In that
2342 case, we can only take insns from the head of the thread for our delay
2343 slot. We then adjust the jump to point after the insns we have taken. */
2345 static void
2346 fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
2347 rtx thread_or_return, rtx opposite_thread, int likely,
2348 int thread_if_true, int own_thread, int slots_to_fill,
2349 int *pslots_filled, vec<rtx_insn *> *delay_list)
2351 rtx new_thread;
2352 struct resources opposite_needed, set, needed;
2353 rtx_insn *trial;
2354 int lose = 0;
2355 int must_annul = 0;
2356 int flags;
2358 /* Validate our arguments. */
2359 gcc_assert (condition != const_true_rtx || thread_if_true);
2360 gcc_assert (own_thread || thread_if_true);
2362 flags = get_jump_flags (insn, JUMP_LABEL (insn));
2364 /* If our thread is the end of subroutine, we can't get any delay
2365 insns from that. */
2366 if (thread_or_return == NULL_RTX || ANY_RETURN_P (thread_or_return))
2367 return;
2369 rtx_insn *thread = as_a <rtx_insn *> (thread_or_return);
2371 /* If this is an unconditional branch, nothing is needed at the
2372 opposite thread. Otherwise, compute what is needed there. */
2373 if (condition == const_true_rtx)
2374 CLEAR_RESOURCE (&opposite_needed);
2375 else
2376 mark_target_live_regs (get_insns (), opposite_thread, &opposite_needed);
2378 /* If the insn at THREAD can be split, do it here to avoid having to
2379 update THREAD and NEW_THREAD if it is done in the loop below. Also
2380 initialize NEW_THREAD. */
2382 new_thread = thread = try_split (PATTERN (thread), thread, 0);
2384 /* Scan insns at THREAD. We are looking for an insn that can be removed
2385 from THREAD (it neither sets nor references resources that were set
2386 ahead of it and it doesn't set anything needs by the insns ahead of
2387 it) and that either can be placed in an annulling insn or aren't
2388 needed at OPPOSITE_THREAD. */
2390 CLEAR_RESOURCE (&needed);
2391 CLEAR_RESOURCE (&set);
2393 /* If we do not own this thread, we must stop as soon as we find
2394 something that we can't put in a delay slot, since all we can do
2395 is branch into THREAD at a later point. Therefore, labels stop
2396 the search if this is not the `true' thread. */
2398 for (trial = thread;
2399 ! stop_search_p (trial, ! thread_if_true) && (! lose || own_thread);
2400 trial = next_nonnote_insn (trial))
2402 rtx pat, old_trial;
2404 /* If we have passed a label, we no longer own this thread. */
2405 if (LABEL_P (trial))
2407 own_thread = 0;
2408 continue;
2411 pat = PATTERN (trial);
2412 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2413 continue;
2415 /* If TRIAL conflicts with the insns ahead of it, we lose. Also,
2416 don't separate or copy insns that set and use CC0. */
2417 if (! insn_references_resource_p (trial, &set, true)
2418 && ! insn_sets_resource_p (trial, &set, true)
2419 && ! insn_sets_resource_p (trial, &needed, true)
2420 && (!HAVE_cc0 || (! (reg_mentioned_p (cc0_rtx, pat)
2421 && (! own_thread || ! sets_cc0_p (pat)))))
2422 && ! can_throw_internal (trial))
2424 rtx prior_insn;
2426 /* If TRIAL is redundant with some insn before INSN, we don't
2427 actually need to add it to the delay list; we can merely pretend
2428 we did. */
2429 if ((prior_insn = redundant_insn (trial, insn, *delay_list)))
2431 fix_reg_dead_note (prior_insn, insn);
2432 if (own_thread)
2434 update_block (trial, thread);
2435 if (trial == thread)
2437 thread = next_active_insn (thread);
2438 if (new_thread == trial)
2439 new_thread = thread;
2442 delete_related_insns (trial);
2444 else
2446 update_reg_unused_notes (prior_insn, trial);
2447 new_thread = next_active_insn (trial);
2450 continue;
2453 /* There are two ways we can win: If TRIAL doesn't set anything
2454 needed at the opposite thread and can't trap, or if it can
2455 go into an annulled delay slot. But we want neither to copy
2456 nor to speculate frame-related insns. */
2457 if (!must_annul
2458 && ((condition == const_true_rtx
2459 && (own_thread || !RTX_FRAME_RELATED_P (trial)))
2460 || (! insn_sets_resource_p (trial, &opposite_needed, true)
2461 && ! may_trap_or_fault_p (pat)
2462 && ! RTX_FRAME_RELATED_P (trial))))
2464 old_trial = trial;
2465 trial = try_split (pat, trial, 0);
2466 if (new_thread == old_trial)
2467 new_thread = trial;
2468 if (thread == old_trial)
2469 thread = trial;
2470 pat = PATTERN (trial);
2471 if (eligible_for_delay (insn, *pslots_filled, trial, flags))
2472 goto winner;
2474 else if (!RTX_FRAME_RELATED_P (trial)
2475 && ((ANNUL_IFTRUE_SLOTS && ! thread_if_true)
2476 || (ANNUL_IFFALSE_SLOTS && thread_if_true)))
2478 old_trial = trial;
2479 trial = try_split (pat, trial, 0);
2480 if (new_thread == old_trial)
2481 new_thread = trial;
2482 if (thread == old_trial)
2483 thread = trial;
2484 pat = PATTERN (trial);
2485 if ((must_annul || delay_list->is_empty ()) && (thread_if_true
2486 ? check_annul_list_true_false (0, *delay_list)
2487 && eligible_for_annul_false (insn, *pslots_filled, trial, flags)
2488 : check_annul_list_true_false (1, *delay_list)
2489 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
2491 rtx_insn *temp;
2493 must_annul = 1;
2494 winner:
2496 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, pat))
2497 link_cc0_insns (trial);
2499 /* If we own this thread, delete the insn. If this is the
2500 destination of a branch, show that a basic block status
2501 may have been updated. In any case, mark the new
2502 starting point of this thread. */
2503 if (own_thread)
2505 rtx note;
2507 update_block (trial, thread);
2508 if (trial == thread)
2510 thread = next_active_insn (thread);
2511 if (new_thread == trial)
2512 new_thread = thread;
2515 /* We are moving this insn, not deleting it. We must
2516 temporarily increment the use count on any referenced
2517 label lest it be deleted by delete_related_insns. */
2518 for (note = REG_NOTES (trial);
2519 note != NULL_RTX;
2520 note = XEXP (note, 1))
2521 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2522 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2524 /* REG_LABEL_OPERAND could be
2525 NOTE_INSN_DELETED_LABEL too. */
2526 if (LABEL_P (XEXP (note, 0)))
2527 LABEL_NUSES (XEXP (note, 0))++;
2528 else
2529 gcc_assert (REG_NOTE_KIND (note)
2530 == REG_LABEL_OPERAND);
2532 if (jump_to_label_p (trial))
2533 LABEL_NUSES (JUMP_LABEL (trial))++;
2535 delete_related_insns (trial);
2537 for (note = REG_NOTES (trial);
2538 note != NULL_RTX;
2539 note = XEXP (note, 1))
2540 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2541 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2543 /* REG_LABEL_OPERAND could be
2544 NOTE_INSN_DELETED_LABEL too. */
2545 if (LABEL_P (XEXP (note, 0)))
2546 LABEL_NUSES (XEXP (note, 0))--;
2547 else
2548 gcc_assert (REG_NOTE_KIND (note)
2549 == REG_LABEL_OPERAND);
2551 if (jump_to_label_p (trial))
2552 LABEL_NUSES (JUMP_LABEL (trial))--;
2554 else
2555 new_thread = next_active_insn (trial);
2557 temp = own_thread ? trial : copy_delay_slot_insn (trial);
2558 if (thread_if_true)
2559 INSN_FROM_TARGET_P (temp) = 1;
2561 add_to_delay_list (temp, delay_list);
2563 if (slots_to_fill == ++(*pslots_filled))
2565 /* Even though we have filled all the slots, we
2566 may be branching to a location that has a
2567 redundant insn. Skip any if so. */
2568 while (new_thread && ! own_thread
2569 && ! insn_sets_resource_p (new_thread, &set, true)
2570 && ! insn_sets_resource_p (new_thread, &needed,
2571 true)
2572 && ! insn_references_resource_p (new_thread,
2573 &set, true)
2574 && (prior_insn
2575 = redundant_insn (new_thread, insn,
2576 *delay_list)))
2578 /* We know we do not own the thread, so no need
2579 to call update_block and delete_insn. */
2580 fix_reg_dead_note (prior_insn, insn);
2581 update_reg_unused_notes (prior_insn, new_thread);
2582 new_thread = next_active_insn (new_thread);
2584 break;
2587 continue;
2592 /* This insn can't go into a delay slot. */
2593 lose = 1;
2594 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2595 mark_referenced_resources (trial, &needed, true);
2597 /* Ensure we don't put insns between the setting of cc and the comparison
2598 by moving a setting of cc into an earlier delay slot since these insns
2599 could clobber the condition code. */
2600 set.cc = 1;
2602 /* If this insn is a register-register copy and the next insn has
2603 a use of our destination, change it to use our source. That way,
2604 it will become a candidate for our delay slot the next time
2605 through this loop. This case occurs commonly in loops that
2606 scan a list.
2608 We could check for more complex cases than those tested below,
2609 but it doesn't seem worth it. It might also be a good idea to try
2610 to swap the two insns. That might do better.
2612 We can't do this if the next insn modifies our destination, because
2613 that would make the replacement into the insn invalid. We also can't
2614 do this if it modifies our source, because it might be an earlyclobber
2615 operand. This latter test also prevents updating the contents of
2616 a PRE_INC. We also can't do this if there's overlap of source and
2617 destination. Overlap may happen for larger-than-register-size modes. */
2619 if (NONJUMP_INSN_P (trial) && GET_CODE (pat) == SET
2620 && REG_P (SET_SRC (pat))
2621 && REG_P (SET_DEST (pat))
2622 && !reg_overlap_mentioned_p (SET_DEST (pat), SET_SRC (pat)))
2624 rtx_insn *next = next_nonnote_insn (trial);
2626 if (next && NONJUMP_INSN_P (next)
2627 && GET_CODE (PATTERN (next)) != USE
2628 && ! reg_set_p (SET_DEST (pat), next)
2629 && ! reg_set_p (SET_SRC (pat), next)
2630 && reg_referenced_p (SET_DEST (pat), PATTERN (next))
2631 && ! modified_in_p (SET_DEST (pat), next))
2632 validate_replace_rtx (SET_DEST (pat), SET_SRC (pat), next);
2636 /* If we stopped on a branch insn that has delay slots, see if we can
2637 steal some of the insns in those slots. */
2638 if (trial && NONJUMP_INSN_P (trial)
2639 && GET_CODE (PATTERN (trial)) == SEQUENCE
2640 && JUMP_P (XVECEXP (PATTERN (trial), 0, 0)))
2642 rtx_sequence *sequence = as_a <rtx_sequence *> (PATTERN (trial));
2643 /* If this is the `true' thread, we will want to follow the jump,
2644 so we can only do this if we have taken everything up to here. */
2645 if (thread_if_true && trial == new_thread)
2647 steal_delay_list_from_target (insn, condition, sequence,
2648 delay_list, &set, &needed,
2649 &opposite_needed, slots_to_fill,
2650 pslots_filled, &must_annul,
2651 &new_thread);
2652 /* If we owned the thread and are told that it branched
2653 elsewhere, make sure we own the thread at the new location. */
2654 if (own_thread && trial != new_thread)
2655 own_thread = own_thread_p (new_thread, new_thread, 0);
2657 else if (! thread_if_true)
2658 steal_delay_list_from_fallthrough (insn, condition, sequence,
2659 delay_list, &set, &needed,
2660 &opposite_needed, slots_to_fill,
2661 pslots_filled, &must_annul);
2664 /* If we haven't found anything for this delay slot and it is very
2665 likely that the branch will be taken, see if the insn at our target
2666 increments or decrements a register with an increment that does not
2667 depend on the destination register. If so, try to place the opposite
2668 arithmetic insn after the jump insn and put the arithmetic insn in the
2669 delay slot. If we can't do this, return. */
2670 if (delay_list->is_empty () && likely
2671 && new_thread && !ANY_RETURN_P (new_thread)
2672 && NONJUMP_INSN_P (new_thread)
2673 && !RTX_FRAME_RELATED_P (new_thread)
2674 && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
2675 && asm_noperands (PATTERN (new_thread)) < 0)
2677 rtx pat = PATTERN (new_thread);
2678 rtx dest;
2679 rtx src;
2681 /* We know "new_thread" is an insn due to NONJUMP_INSN_P (new_thread)
2682 above. */
2683 trial = as_a <rtx_insn *> (new_thread);
2684 pat = PATTERN (trial);
2686 if (!NONJUMP_INSN_P (trial)
2687 || GET_CODE (pat) != SET
2688 || ! eligible_for_delay (insn, 0, trial, flags)
2689 || can_throw_internal (trial))
2690 return;
2692 dest = SET_DEST (pat), src = SET_SRC (pat);
2693 if ((GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
2694 && rtx_equal_p (XEXP (src, 0), dest)
2695 && (!FLOAT_MODE_P (GET_MODE (src))
2696 || flag_unsafe_math_optimizations)
2697 && ! reg_overlap_mentioned_p (dest, XEXP (src, 1))
2698 && ! side_effects_p (pat))
2700 rtx other = XEXP (src, 1);
2701 rtx new_arith;
2702 rtx_insn *ninsn;
2704 /* If this is a constant adjustment, use the same code with
2705 the negated constant. Otherwise, reverse the sense of the
2706 arithmetic. */
2707 if (CONST_INT_P (other))
2708 new_arith = gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src), dest,
2709 negate_rtx (GET_MODE (src), other));
2710 else
2711 new_arith = gen_rtx_fmt_ee (GET_CODE (src) == PLUS ? MINUS : PLUS,
2712 GET_MODE (src), dest, other);
2714 ninsn = emit_insn_after (gen_rtx_SET (dest, new_arith), insn);
2716 if (recog_memoized (ninsn) < 0
2717 || (extract_insn (ninsn),
2718 !constrain_operands (1, get_preferred_alternatives (ninsn))))
2720 delete_related_insns (ninsn);
2721 return;
2724 if (own_thread)
2726 update_block (trial, thread);
2727 if (trial == thread)
2729 thread = next_active_insn (thread);
2730 if (new_thread == trial)
2731 new_thread = thread;
2733 delete_related_insns (trial);
2735 else
2736 new_thread = next_active_insn (trial);
2738 ninsn = own_thread ? trial : copy_delay_slot_insn (trial);
2739 if (thread_if_true)
2740 INSN_FROM_TARGET_P (ninsn) = 1;
2742 add_to_delay_list (ninsn, delay_list);
2743 (*pslots_filled)++;
2747 if (!delay_list->is_empty () && must_annul)
2748 INSN_ANNULLED_BRANCH_P (insn) = 1;
2750 /* If we are to branch into the middle of this thread, find an appropriate
2751 label or make a new one if none, and redirect INSN to it. If we hit the
2752 end of the function, use the end-of-function label. */
2753 if (new_thread != thread)
2755 rtx label;
2756 bool crossing = false;
2758 gcc_assert (thread_if_true);
2760 if (new_thread && simplejump_or_return_p (new_thread)
2761 && redirect_with_delay_list_safe_p (insn,
2762 JUMP_LABEL (new_thread),
2763 *delay_list))
2764 new_thread = follow_jumps (JUMP_LABEL (new_thread), insn,
2765 &crossing);
2767 if (ANY_RETURN_P (new_thread))
2768 label = find_end_label (new_thread);
2769 else if (LABEL_P (new_thread))
2770 label = new_thread;
2771 else
2772 label = get_label_before (as_a <rtx_insn *> (new_thread),
2773 JUMP_LABEL (insn));
2775 if (label)
2777 reorg_redirect_jump (insn, label);
2778 if (crossing)
2779 CROSSING_JUMP_P (insn) = 1;
2784 /* Make another attempt to find insns to place in delay slots.
2786 We previously looked for insns located in front of the delay insn
2787 and, for non-jump delay insns, located behind the delay insn.
2789 Here only try to schedule jump insns and try to move insns from either
2790 the target or the following insns into the delay slot. If annulling is
2791 supported, we will be likely to do this. Otherwise, we can do this only
2792 if safe. */
2794 static void
2795 fill_eager_delay_slots (void)
2797 rtx_insn *insn;
2798 int i;
2799 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
2801 for (i = 0; i < num_unfilled_slots; i++)
2803 rtx condition;
2804 rtx target_label, insn_at_target;
2805 rtx_insn *fallthrough_insn;
2806 auto_vec<rtx_insn *, 5> delay_list;
2807 rtx_jump_insn *jump_insn;
2808 int own_target;
2809 int own_fallthrough;
2810 int prediction, slots_to_fill, slots_filled;
2812 insn = unfilled_slots_base[i];
2813 if (insn == 0
2814 || insn->deleted ()
2815 || ! (jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2816 || ! (condjump_p (jump_insn) || condjump_in_parallel_p (jump_insn)))
2817 continue;
2819 slots_to_fill = num_delay_slots (jump_insn);
2820 /* Some machine description have defined instructions to have
2821 delay slots only in certain circumstances which may depend on
2822 nearby insns (which change due to reorg's actions).
2824 For example, the PA port normally has delay slots for unconditional
2825 jumps.
2827 However, the PA port claims such jumps do not have a delay slot
2828 if they are immediate successors of certain CALL_INSNs. This
2829 allows the port to favor filling the delay slot of the call with
2830 the unconditional jump. */
2831 if (slots_to_fill == 0)
2832 continue;
2834 slots_filled = 0;
2835 target_label = JUMP_LABEL (jump_insn);
2836 condition = get_branch_condition (jump_insn, target_label);
2838 if (condition == 0)
2839 continue;
2841 /* Get the next active fallthrough and target insns and see if we own
2842 them. Then see whether the branch is likely true. We don't need
2843 to do a lot of this for unconditional branches. */
2845 insn_at_target = first_active_target_insn (target_label);
2846 own_target = own_thread_p (target_label, target_label, 0);
2848 if (condition == const_true_rtx)
2850 own_fallthrough = 0;
2851 fallthrough_insn = 0;
2852 prediction = 2;
2854 else
2856 fallthrough_insn = next_active_insn (jump_insn);
2857 own_fallthrough = own_thread_p (NEXT_INSN (jump_insn), NULL_RTX, 1);
2858 prediction = mostly_true_jump (jump_insn);
2861 /* If this insn is expected to branch, first try to get insns from our
2862 target, then our fallthrough insns. If it is not expected to branch,
2863 try the other order. */
2865 if (prediction > 0)
2867 fill_slots_from_thread (jump_insn, condition, insn_at_target,
2868 fallthrough_insn, prediction == 2, 1,
2869 own_target,
2870 slots_to_fill, &slots_filled, &delay_list);
2872 if (delay_list.is_empty () && own_fallthrough)
2874 /* Even though we didn't find anything for delay slots,
2875 we might have found a redundant insn which we deleted
2876 from the thread that was filled. So we have to recompute
2877 the next insn at the target. */
2878 target_label = JUMP_LABEL (jump_insn);
2879 insn_at_target = first_active_target_insn (target_label);
2881 fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2882 insn_at_target, 0, 0, own_fallthrough,
2883 slots_to_fill, &slots_filled,
2884 &delay_list);
2887 else
2889 if (own_fallthrough)
2890 fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2891 insn_at_target, 0, 0, own_fallthrough,
2892 slots_to_fill, &slots_filled, &delay_list);
2894 if (delay_list.is_empty ())
2895 fill_slots_from_thread (jump_insn, condition, insn_at_target,
2896 next_active_insn (insn), 0, 1, own_target,
2897 slots_to_fill, &slots_filled, &delay_list);
2900 if (!delay_list.is_empty ())
2901 unfilled_slots_base[i]
2902 = emit_delay_sequence (jump_insn, delay_list, slots_filled);
2904 if (slots_to_fill == slots_filled)
2905 unfilled_slots_base[i] = 0;
2907 note_delay_statistics (slots_filled, 1);
2911 static void delete_computation (rtx insn);
2913 /* Recursively delete prior insns that compute the value (used only by INSN
2914 which the caller is deleting) stored in the register mentioned by NOTE
2915 which is a REG_DEAD note associated with INSN. */
2917 static void
2918 delete_prior_computation (rtx note, rtx insn)
2920 rtx our_prev;
2921 rtx reg = XEXP (note, 0);
2923 for (our_prev = prev_nonnote_insn (insn);
2924 our_prev && (NONJUMP_INSN_P (our_prev)
2925 || CALL_P (our_prev));
2926 our_prev = prev_nonnote_insn (our_prev))
2928 rtx pat = PATTERN (our_prev);
2930 /* If we reach a CALL which is not calling a const function
2931 or the callee pops the arguments, then give up. */
2932 if (CALL_P (our_prev)
2933 && (! RTL_CONST_CALL_P (our_prev)
2934 || GET_CODE (pat) != SET || GET_CODE (SET_SRC (pat)) != CALL))
2935 break;
2937 /* If we reach a SEQUENCE, it is too complex to try to
2938 do anything with it, so give up. We can be run during
2939 and after reorg, so SEQUENCE rtl can legitimately show
2940 up here. */
2941 if (GET_CODE (pat) == SEQUENCE)
2942 break;
2944 if (GET_CODE (pat) == USE
2945 && NONJUMP_INSN_P (XEXP (pat, 0)))
2946 /* reorg creates USEs that look like this. We leave them
2947 alone because reorg needs them for its own purposes. */
2948 break;
2950 if (reg_set_p (reg, pat))
2952 if (side_effects_p (pat) && !CALL_P (our_prev))
2953 break;
2955 if (GET_CODE (pat) == PARALLEL)
2957 /* If we find a SET of something else, we can't
2958 delete the insn. */
2960 int i;
2962 for (i = 0; i < XVECLEN (pat, 0); i++)
2964 rtx part = XVECEXP (pat, 0, i);
2966 if (GET_CODE (part) == SET
2967 && SET_DEST (part) != reg)
2968 break;
2971 if (i == XVECLEN (pat, 0))
2972 delete_computation (our_prev);
2974 else if (GET_CODE (pat) == SET
2975 && REG_P (SET_DEST (pat)))
2977 int dest_regno = REGNO (SET_DEST (pat));
2978 int dest_endregno = END_REGNO (SET_DEST (pat));
2979 int regno = REGNO (reg);
2980 int endregno = END_REGNO (reg);
2982 if (dest_regno >= regno
2983 && dest_endregno <= endregno)
2984 delete_computation (our_prev);
2986 /* We may have a multi-word hard register and some, but not
2987 all, of the words of the register are needed in subsequent
2988 insns. Write REG_UNUSED notes for those parts that were not
2989 needed. */
2990 else if (dest_regno <= regno
2991 && dest_endregno >= endregno)
2993 int i;
2995 add_reg_note (our_prev, REG_UNUSED, reg);
2997 for (i = dest_regno; i < dest_endregno; i++)
2998 if (! find_regno_note (our_prev, REG_UNUSED, i))
2999 break;
3001 if (i == dest_endregno)
3002 delete_computation (our_prev);
3006 break;
3009 /* If PAT references the register that dies here, it is an
3010 additional use. Hence any prior SET isn't dead. However, this
3011 insn becomes the new place for the REG_DEAD note. */
3012 if (reg_overlap_mentioned_p (reg, pat))
3014 XEXP (note, 1) = REG_NOTES (our_prev);
3015 REG_NOTES (our_prev) = note;
3016 break;
3021 /* Delete INSN and recursively delete insns that compute values used only
3022 by INSN. This uses the REG_DEAD notes computed during flow analysis.
3024 Look at all our REG_DEAD notes. If a previous insn does nothing other
3025 than set a register that dies in this insn, we can delete that insn
3026 as well.
3028 On machines with CC0, if CC0 is used in this insn, we may be able to
3029 delete the insn that set it. */
3031 static void
3032 delete_computation (rtx insn)
3034 rtx note, next;
3036 if (HAVE_cc0 && reg_referenced_p (cc0_rtx, PATTERN (insn)))
3038 rtx_insn *prev = prev_nonnote_insn (insn);
3039 /* We assume that at this stage
3040 CC's are always set explicitly
3041 and always immediately before the jump that
3042 will use them. So if the previous insn
3043 exists to set the CC's, delete it
3044 (unless it performs auto-increments, etc.). */
3045 if (prev && NONJUMP_INSN_P (prev)
3046 && sets_cc0_p (PATTERN (prev)))
3048 if (sets_cc0_p (PATTERN (prev)) > 0
3049 && ! side_effects_p (PATTERN (prev)))
3050 delete_computation (prev);
3051 else
3052 /* Otherwise, show that cc0 won't be used. */
3053 add_reg_note (prev, REG_UNUSED, cc0_rtx);
3057 for (note = REG_NOTES (insn); note; note = next)
3059 next = XEXP (note, 1);
3061 if (REG_NOTE_KIND (note) != REG_DEAD
3062 /* Verify that the REG_NOTE is legitimate. */
3063 || !REG_P (XEXP (note, 0)))
3064 continue;
3066 delete_prior_computation (note, insn);
3069 delete_related_insns (insn);
3072 /* If all INSN does is set the pc, delete it,
3073 and delete the insn that set the condition codes for it
3074 if that's what the previous thing was. */
3076 static void
3077 delete_jump (rtx_insn *insn)
3079 rtx set = single_set (insn);
3081 if (set && GET_CODE (SET_DEST (set)) == PC)
3082 delete_computation (insn);
3085 static rtx_insn *
3086 label_before_next_insn (rtx x, rtx scan_limit)
3088 rtx_insn *insn = next_active_insn (x);
3089 while (insn)
3091 insn = PREV_INSN (insn);
3092 if (insn == scan_limit || insn == NULL_RTX)
3093 return NULL;
3094 if (LABEL_P (insn))
3095 break;
3097 return insn;
3100 /* Return TRUE if there is a NOTE_INSN_SWITCH_TEXT_SECTIONS note in between
3101 BEG and END. */
3103 static bool
3104 switch_text_sections_between_p (const rtx_insn *beg, const rtx_insn *end)
3106 const rtx_insn *p;
3107 for (p = beg; p != end; p = NEXT_INSN (p))
3108 if (NOTE_P (p) && NOTE_KIND (p) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
3109 return true;
3110 return false;
3114 /* Once we have tried two ways to fill a delay slot, make a pass over the
3115 code to try to improve the results and to do such things as more jump
3116 threading. */
3118 static void
3119 relax_delay_slots (rtx_insn *first)
3121 rtx_insn *insn, *next;
3122 rtx_sequence *pat;
3123 rtx trial;
3124 rtx_insn *delay_insn;
3125 rtx target_label;
3127 /* Look at every JUMP_INSN and see if we can improve it. */
3128 for (insn = first; insn; insn = next)
3130 rtx_insn *other;
3131 bool crossing;
3133 next = next_active_insn (insn);
3135 /* If this is a jump insn, see if it now jumps to a jump, jumps to
3136 the next insn, or jumps to a label that is not the last of a
3137 group of consecutive labels. */
3138 if (is_a <rtx_jump_insn *> (insn)
3139 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3140 && !ANY_RETURN_P (target_label = JUMP_LABEL (insn)))
3142 rtx_jump_insn *jump_insn = as_a <rtx_jump_insn *> (insn);
3143 target_label
3144 = skip_consecutive_labels (follow_jumps (target_label, jump_insn,
3145 &crossing));
3146 if (ANY_RETURN_P (target_label))
3147 target_label = find_end_label (target_label);
3149 if (target_label && next_active_insn (target_label) == next
3150 && ! condjump_in_parallel_p (jump_insn)
3151 && ! (next && switch_text_sections_between_p (jump_insn, next)))
3153 delete_jump (jump_insn);
3154 continue;
3157 if (target_label && target_label != JUMP_LABEL (jump_insn))
3159 reorg_redirect_jump (jump_insn, target_label);
3160 if (crossing)
3161 CROSSING_JUMP_P (jump_insn) = 1;
3164 /* See if this jump conditionally branches around an unconditional
3165 jump. If so, invert this jump and point it to the target of the
3166 second jump. Check if it's possible on the target. */
3167 if (next && simplejump_or_return_p (next)
3168 && any_condjump_p (jump_insn)
3169 && target_label
3170 && next_active_insn (target_label) == next_active_insn (next)
3171 && no_labels_between_p (jump_insn, next)
3172 && targetm.can_follow_jump (jump_insn, next))
3174 rtx label = JUMP_LABEL (next);
3176 /* Be careful how we do this to avoid deleting code or
3177 labels that are momentarily dead. See similar optimization
3178 in jump.c.
3180 We also need to ensure we properly handle the case when
3181 invert_jump fails. */
3183 ++LABEL_NUSES (target_label);
3184 if (!ANY_RETURN_P (label))
3185 ++LABEL_NUSES (label);
3187 if (invert_jump (jump_insn, label, 1))
3189 delete_related_insns (next);
3190 next = jump_insn;
3193 if (!ANY_RETURN_P (label))
3194 --LABEL_NUSES (label);
3196 if (--LABEL_NUSES (target_label) == 0)
3197 delete_related_insns (target_label);
3199 continue;
3203 /* If this is an unconditional jump and the previous insn is a
3204 conditional jump, try reversing the condition of the previous
3205 insn and swapping our targets. The next pass might be able to
3206 fill the slots.
3208 Don't do this if we expect the conditional branch to be true, because
3209 we would then be making the more common case longer. */
3211 if (simplejump_or_return_p (insn)
3212 && (other = prev_active_insn (insn)) != 0
3213 && any_condjump_p (other)
3214 && no_labels_between_p (other, insn)
3215 && 0 > mostly_true_jump (other))
3217 rtx other_target = JUMP_LABEL (other);
3218 target_label = JUMP_LABEL (insn);
3220 if (invert_jump (as_a <rtx_jump_insn *> (other), target_label, 0))
3221 reorg_redirect_jump (as_a <rtx_jump_insn *> (insn), other_target);
3224 /* Now look only at cases where we have a filled delay slot. */
3225 if (!NONJUMP_INSN_P (insn) || GET_CODE (PATTERN (insn)) != SEQUENCE)
3226 continue;
3228 pat = as_a <rtx_sequence *> (PATTERN (insn));
3229 delay_insn = pat->insn (0);
3231 /* See if the first insn in the delay slot is redundant with some
3232 previous insn. Remove it from the delay slot if so; then set up
3233 to reprocess this insn. */
3234 if (redundant_insn (pat->insn (1), delay_insn, vNULL))
3236 update_block (pat->insn (1), insn);
3237 delete_from_delay_slot (pat->insn (1));
3238 next = prev_active_insn (next);
3239 continue;
3242 /* See if we have a RETURN insn with a filled delay slot followed
3243 by a RETURN insn with an unfilled a delay slot. If so, we can delete
3244 the first RETURN (but not its delay insn). This gives the same
3245 effect in fewer instructions.
3247 Only do so if optimizing for size since this results in slower, but
3248 smaller code. */
3249 if (optimize_function_for_size_p (cfun)
3250 && ANY_RETURN_P (PATTERN (delay_insn))
3251 && next
3252 && JUMP_P (next)
3253 && PATTERN (next) == PATTERN (delay_insn))
3255 rtx_insn *after;
3256 int i;
3258 /* Delete the RETURN and just execute the delay list insns.
3260 We do this by deleting the INSN containing the SEQUENCE, then
3261 re-emitting the insns separately, and then deleting the RETURN.
3262 This allows the count of the jump target to be properly
3263 decremented.
3265 Note that we need to change the INSN_UID of the re-emitted insns
3266 since it is used to hash the insns for mark_target_live_regs and
3267 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3269 Clear the from target bit, since these insns are no longer
3270 in delay slots. */
3271 for (i = 0; i < XVECLEN (pat, 0); i++)
3272 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3274 trial = PREV_INSN (insn);
3275 delete_related_insns (insn);
3276 gcc_assert (GET_CODE (pat) == SEQUENCE);
3277 add_insn_after (delay_insn, trial, NULL);
3278 after = delay_insn;
3279 for (i = 1; i < pat->len (); i++)
3280 after = emit_copy_of_insn_after (pat->insn (i), after);
3281 delete_scheduled_jump (delay_insn);
3282 continue;
3285 /* Now look only at the cases where we have a filled JUMP_INSN. */
3286 rtx_jump_insn *delay_jump_insn =
3287 dyn_cast <rtx_jump_insn *> (delay_insn);
3288 if (! delay_jump_insn || !(condjump_p (delay_jump_insn)
3289 || condjump_in_parallel_p (delay_jump_insn)))
3290 continue;
3292 target_label = JUMP_LABEL (delay_jump_insn);
3293 if (target_label && ANY_RETURN_P (target_label))
3294 continue;
3296 /* If this jump goes to another unconditional jump, thread it, but
3297 don't convert a jump into a RETURN here. */
3298 trial = skip_consecutive_labels (follow_jumps (target_label,
3299 delay_jump_insn,
3300 &crossing));
3301 if (ANY_RETURN_P (trial))
3302 trial = find_end_label (trial);
3304 if (trial && trial != target_label
3305 && redirect_with_delay_slots_safe_p (delay_jump_insn, trial, insn))
3307 reorg_redirect_jump (delay_jump_insn, trial);
3308 target_label = trial;
3309 if (crossing)
3310 CROSSING_JUMP_P (insn) = 1;
3313 /* If the first insn at TARGET_LABEL is redundant with a previous
3314 insn, redirect the jump to the following insn and process again.
3315 We use next_real_insn instead of next_active_insn so we
3316 don't skip USE-markers, or we'll end up with incorrect
3317 liveness info. */
3318 trial = next_real_insn (target_label);
3319 if (trial && GET_CODE (PATTERN (trial)) != SEQUENCE
3320 && redundant_insn (trial, insn, vNULL)
3321 && ! can_throw_internal (trial))
3323 /* Figure out where to emit the special USE insn so we don't
3324 later incorrectly compute register live/death info. */
3325 rtx_insn *tmp = next_active_insn (trial);
3326 if (tmp == 0)
3327 tmp = find_end_label (simple_return_rtx);
3329 if (tmp)
3331 /* Insert the special USE insn and update dataflow info.
3332 We know "trial" is an insn here as it is the output of
3333 next_real_insn () above. */
3334 update_block (as_a <rtx_insn *> (trial), tmp);
3336 /* Now emit a label before the special USE insn, and
3337 redirect our jump to the new label. */
3338 target_label = get_label_before (PREV_INSN (tmp), target_label);
3339 reorg_redirect_jump (delay_jump_insn, target_label);
3340 next = insn;
3341 continue;
3345 /* Similarly, if it is an unconditional jump with one insn in its
3346 delay list and that insn is redundant, thread the jump. */
3347 rtx_sequence *trial_seq =
3348 trial ? dyn_cast <rtx_sequence *> (PATTERN (trial)) : NULL;
3349 if (trial_seq
3350 && trial_seq->len () == 2
3351 && JUMP_P (trial_seq->insn (0))
3352 && simplejump_or_return_p (trial_seq->insn (0))
3353 && redundant_insn (trial_seq->insn (1), insn, vNULL))
3355 target_label = JUMP_LABEL (trial_seq->insn (0));
3356 if (ANY_RETURN_P (target_label))
3357 target_label = find_end_label (target_label);
3359 if (target_label
3360 && redirect_with_delay_slots_safe_p (delay_jump_insn,
3361 target_label, insn))
3363 update_block (trial_seq->insn (1), insn);
3364 reorg_redirect_jump (delay_jump_insn, target_label);
3365 next = insn;
3366 continue;
3370 /* See if we have a simple (conditional) jump that is useless. */
3371 if (! INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3372 && ! condjump_in_parallel_p (delay_jump_insn)
3373 && prev_active_insn (target_label) == insn
3374 && ! BARRIER_P (prev_nonnote_insn (target_label))
3375 /* If the last insn in the delay slot sets CC0 for some insn,
3376 various code assumes that it is in a delay slot. We could
3377 put it back where it belonged and delete the register notes,
3378 but it doesn't seem worthwhile in this uncommon case. */
3379 && (!HAVE_cc0
3380 || ! find_reg_note (XVECEXP (pat, 0, XVECLEN (pat, 0) - 1),
3381 REG_CC_USER, NULL_RTX)))
3383 rtx_insn *after;
3384 int i;
3386 /* All this insn does is execute its delay list and jump to the
3387 following insn. So delete the jump and just execute the delay
3388 list insns.
3390 We do this by deleting the INSN containing the SEQUENCE, then
3391 re-emitting the insns separately, and then deleting the jump.
3392 This allows the count of the jump target to be properly
3393 decremented.
3395 Note that we need to change the INSN_UID of the re-emitted insns
3396 since it is used to hash the insns for mark_target_live_regs and
3397 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3399 Clear the from target bit, since these insns are no longer
3400 in delay slots. */
3401 for (i = 0; i < XVECLEN (pat, 0); i++)
3402 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3404 trial = PREV_INSN (insn);
3405 delete_related_insns (insn);
3406 gcc_assert (GET_CODE (pat) == SEQUENCE);
3407 add_insn_after (delay_jump_insn, trial, NULL);
3408 after = delay_jump_insn;
3409 for (i = 1; i < pat->len (); i++)
3410 after = emit_copy_of_insn_after (pat->insn (i), after);
3411 delete_scheduled_jump (delay_jump_insn);
3412 continue;
3415 /* See if this is an unconditional jump around a single insn which is
3416 identical to the one in its delay slot. In this case, we can just
3417 delete the branch and the insn in its delay slot. */
3418 if (next && NONJUMP_INSN_P (next)
3419 && label_before_next_insn (next, insn) == target_label
3420 && simplejump_p (insn)
3421 && XVECLEN (pat, 0) == 2
3422 && rtx_equal_p (PATTERN (next), PATTERN (pat->insn (1))))
3424 delete_related_insns (insn);
3425 continue;
3428 /* See if this jump (with its delay slots) conditionally branches
3429 around an unconditional jump (without delay slots). If so, invert
3430 this jump and point it to the target of the second jump. We cannot
3431 do this for annulled jumps, though. Again, don't convert a jump to
3432 a RETURN here. */
3433 if (! INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3434 && any_condjump_p (delay_jump_insn)
3435 && next && simplejump_or_return_p (next)
3436 && next_active_insn (target_label) == next_active_insn (next)
3437 && no_labels_between_p (insn, next))
3439 rtx label = JUMP_LABEL (next);
3440 rtx old_label = JUMP_LABEL (delay_jump_insn);
3442 if (ANY_RETURN_P (label))
3443 label = find_end_label (label);
3445 /* find_end_label can generate a new label. Check this first. */
3446 if (label
3447 && no_labels_between_p (insn, next)
3448 && redirect_with_delay_slots_safe_p (delay_jump_insn,
3449 label, insn))
3451 /* Be careful how we do this to avoid deleting code or labels
3452 that are momentarily dead. See similar optimization in
3453 jump.c */
3454 if (old_label)
3455 ++LABEL_NUSES (old_label);
3457 if (invert_jump (delay_jump_insn, label, 1))
3459 int i;
3461 /* Must update the INSN_FROM_TARGET_P bits now that
3462 the branch is reversed, so that mark_target_live_regs
3463 will handle the delay slot insn correctly. */
3464 for (i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
3466 rtx slot = XVECEXP (PATTERN (insn), 0, i);
3467 INSN_FROM_TARGET_P (slot) = ! INSN_FROM_TARGET_P (slot);
3470 delete_related_insns (next);
3471 next = insn;
3474 if (old_label && --LABEL_NUSES (old_label) == 0)
3475 delete_related_insns (old_label);
3476 continue;
3480 /* If we own the thread opposite the way this insn branches, see if we
3481 can merge its delay slots with following insns. */
3482 if (INSN_FROM_TARGET_P (pat->insn (1))
3483 && own_thread_p (NEXT_INSN (insn), 0, 1))
3484 try_merge_delay_insns (insn, next);
3485 else if (! INSN_FROM_TARGET_P (pat->insn (1))
3486 && own_thread_p (target_label, target_label, 0))
3487 try_merge_delay_insns (insn, next_active_insn (target_label));
3489 /* If we get here, we haven't deleted INSN. But we may have deleted
3490 NEXT, so recompute it. */
3491 next = next_active_insn (insn);
3496 /* Look for filled jumps to the end of function label. We can try to convert
3497 them into RETURN insns if the insns in the delay slot are valid for the
3498 RETURN as well. */
3500 static void
3501 make_return_insns (rtx_insn *first)
3503 rtx_insn *insn;
3504 rtx_jump_insn *jump_insn;
3505 rtx real_return_label = function_return_label;
3506 rtx real_simple_return_label = function_simple_return_label;
3507 int slots, i;
3509 /* See if there is a RETURN insn in the function other than the one we
3510 made for END_OF_FUNCTION_LABEL. If so, set up anything we can't change
3511 into a RETURN to jump to it. */
3512 for (insn = first; insn; insn = NEXT_INSN (insn))
3513 if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
3515 rtx t = get_label_before (insn, NULL_RTX);
3516 if (PATTERN (insn) == ret_rtx)
3517 real_return_label = t;
3518 else
3519 real_simple_return_label = t;
3520 break;
3523 /* Show an extra usage of REAL_RETURN_LABEL so it won't go away if it
3524 was equal to END_OF_FUNCTION_LABEL. */
3525 if (real_return_label)
3526 LABEL_NUSES (real_return_label)++;
3527 if (real_simple_return_label)
3528 LABEL_NUSES (real_simple_return_label)++;
3530 /* Clear the list of insns to fill so we can use it. */
3531 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3533 for (insn = first; insn; insn = NEXT_INSN (insn))
3535 int flags;
3536 rtx kind, real_label;
3538 /* Only look at filled JUMP_INSNs that go to the end of function
3539 label. */
3540 if (!NONJUMP_INSN_P (insn))
3541 continue;
3543 if (GET_CODE (PATTERN (insn)) != SEQUENCE)
3544 continue;
3546 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (insn));
3548 if (!jump_to_label_p (pat->insn (0)))
3549 continue;
3551 if (JUMP_LABEL (pat->insn (0)) == function_return_label)
3553 kind = ret_rtx;
3554 real_label = real_return_label;
3556 else if (JUMP_LABEL (pat->insn (0)) == function_simple_return_label)
3558 kind = simple_return_rtx;
3559 real_label = real_simple_return_label;
3561 else
3562 continue;
3564 jump_insn = as_a <rtx_jump_insn *> (pat->insn (0));
3566 /* If we can't make the jump into a RETURN, try to redirect it to the best
3567 RETURN and go on to the next insn. */
3568 if (!reorg_redirect_jump (jump_insn, kind))
3570 /* Make sure redirecting the jump will not invalidate the delay
3571 slot insns. */
3572 if (redirect_with_delay_slots_safe_p (jump_insn, real_label, insn))
3573 reorg_redirect_jump (jump_insn, real_label);
3574 continue;
3577 /* See if this RETURN can accept the insns current in its delay slot.
3578 It can if it has more or an equal number of slots and the contents
3579 of each is valid. */
3581 flags = get_jump_flags (jump_insn, JUMP_LABEL (jump_insn));
3582 slots = num_delay_slots (jump_insn);
3583 if (slots >= XVECLEN (pat, 0) - 1)
3585 for (i = 1; i < XVECLEN (pat, 0); i++)
3586 if (! (
3587 #if ANNUL_IFFALSE_SLOTS
3588 (INSN_ANNULLED_BRANCH_P (jump_insn)
3589 && INSN_FROM_TARGET_P (pat->insn (i)))
3590 ? eligible_for_annul_false (jump_insn, i - 1,
3591 pat->insn (i), flags) :
3592 #endif
3593 #if ANNUL_IFTRUE_SLOTS
3594 (INSN_ANNULLED_BRANCH_P (jump_insn)
3595 && ! INSN_FROM_TARGET_P (pat->insn (i)))
3596 ? eligible_for_annul_true (jump_insn, i - 1,
3597 pat->insn (i), flags) :
3598 #endif
3599 eligible_for_delay (jump_insn, i - 1,
3600 pat->insn (i), flags)))
3601 break;
3603 else
3604 i = 0;
3606 if (i == XVECLEN (pat, 0))
3607 continue;
3609 /* We have to do something with this insn. If it is an unconditional
3610 RETURN, delete the SEQUENCE and output the individual insns,
3611 followed by the RETURN. Then set things up so we try to find
3612 insns for its delay slots, if it needs some. */
3613 if (ANY_RETURN_P (PATTERN (jump_insn)))
3615 rtx_insn *prev = PREV_INSN (insn);
3617 delete_related_insns (insn);
3618 for (i = 1; i < XVECLEN (pat, 0); i++)
3619 prev = emit_insn_after (PATTERN (XVECEXP (pat, 0, i)), prev);
3621 insn = emit_jump_insn_after (PATTERN (jump_insn), prev);
3622 emit_barrier_after (insn);
3624 if (slots)
3625 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3627 else
3628 /* It is probably more efficient to keep this with its current
3629 delay slot as a branch to a RETURN. */
3630 reorg_redirect_jump (jump_insn, real_label);
3633 /* Now delete REAL_RETURN_LABEL if we never used it. Then try to fill any
3634 new delay slots we have created. */
3635 if (real_return_label != NULL_RTX && --LABEL_NUSES (real_return_label) == 0)
3636 delete_related_insns (real_return_label);
3637 if (real_simple_return_label != NULL_RTX
3638 && --LABEL_NUSES (real_simple_return_label) == 0)
3639 delete_related_insns (real_simple_return_label);
3641 fill_simple_delay_slots (1);
3642 fill_simple_delay_slots (0);
3645 /* Try to find insns to place in delay slots. */
3647 static void
3648 dbr_schedule (rtx_insn *first)
3650 rtx_insn *insn, *next, *epilogue_insn = 0;
3651 int i;
3652 bool need_return_insns;
3654 /* If the current function has no insns other than the prologue and
3655 epilogue, then do not try to fill any delay slots. */
3656 if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS)
3657 return;
3659 /* Find the highest INSN_UID and allocate and initialize our map from
3660 INSN_UID's to position in code. */
3661 for (max_uid = 0, insn = first; insn; insn = NEXT_INSN (insn))
3663 if (INSN_UID (insn) > max_uid)
3664 max_uid = INSN_UID (insn);
3665 if (NOTE_P (insn)
3666 && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
3667 epilogue_insn = insn;
3670 uid_to_ruid = XNEWVEC (int, max_uid + 1);
3671 for (i = 0, insn = first; insn; i++, insn = NEXT_INSN (insn))
3672 uid_to_ruid[INSN_UID (insn)] = i;
3674 /* Initialize the list of insns that need filling. */
3675 if (unfilled_firstobj == 0)
3677 gcc_obstack_init (&unfilled_slots_obstack);
3678 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3681 for (insn = next_active_insn (first); insn; insn = next_active_insn (insn))
3683 rtx target;
3685 /* Skip vector tables. We can't get attributes for them. */
3686 if (JUMP_TABLE_DATA_P (insn))
3687 continue;
3689 if (JUMP_P (insn))
3690 INSN_ANNULLED_BRANCH_P (insn) = 0;
3691 INSN_FROM_TARGET_P (insn) = 0;
3693 if (num_delay_slots (insn) > 0)
3694 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3696 /* Ensure all jumps go to the last of a set of consecutive labels. */
3697 if (JUMP_P (insn)
3698 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3699 && !ANY_RETURN_P (JUMP_LABEL (insn))
3700 && ((target = skip_consecutive_labels (JUMP_LABEL (insn)))
3701 != JUMP_LABEL (insn)))
3702 redirect_jump (as_a <rtx_jump_insn *> (insn), target, 1);
3705 init_resource_info (epilogue_insn);
3707 /* Show we haven't computed an end-of-function label yet. */
3708 function_return_label = function_simple_return_label = NULL;
3710 /* Initialize the statistics for this function. */
3711 memset (num_insns_needing_delays, 0, sizeof num_insns_needing_delays);
3712 memset (num_filled_delays, 0, sizeof num_filled_delays);
3714 /* Now do the delay slot filling. Try everything twice in case earlier
3715 changes make more slots fillable. */
3717 for (reorg_pass_number = 0;
3718 reorg_pass_number < MAX_REORG_PASSES;
3719 reorg_pass_number++)
3721 fill_simple_delay_slots (1);
3722 fill_simple_delay_slots (0);
3723 if (!targetm.no_speculation_in_delay_slots_p ())
3724 fill_eager_delay_slots ();
3725 relax_delay_slots (first);
3728 /* If we made an end of function label, indicate that it is now
3729 safe to delete it by undoing our prior adjustment to LABEL_NUSES.
3730 If it is now unused, delete it. */
3731 if (function_return_label && --LABEL_NUSES (function_return_label) == 0)
3732 delete_related_insns (function_return_label);
3733 if (function_simple_return_label
3734 && --LABEL_NUSES (function_simple_return_label) == 0)
3735 delete_related_insns (function_simple_return_label);
3737 need_return_insns = false;
3738 need_return_insns |= targetm.have_return () && function_return_label != 0;
3739 need_return_insns |= (targetm.have_simple_return ()
3740 && function_simple_return_label != 0);
3741 if (need_return_insns)
3742 make_return_insns (first);
3744 /* Delete any USE insns made by update_block; subsequent passes don't need
3745 them or know how to deal with them. */
3746 for (insn = first; insn; insn = next)
3748 next = NEXT_INSN (insn);
3750 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
3751 && INSN_P (XEXP (PATTERN (insn), 0)))
3752 next = delete_related_insns (insn);
3755 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3757 /* It is not clear why the line below is needed, but it does seem to be. */
3758 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3760 if (dump_file)
3762 int i, j, need_comma;
3763 int total_delay_slots[MAX_DELAY_HISTOGRAM + 1];
3764 int total_annul_slots[MAX_DELAY_HISTOGRAM + 1];
3766 for (reorg_pass_number = 0;
3767 reorg_pass_number < MAX_REORG_PASSES;
3768 reorg_pass_number++)
3770 fprintf (dump_file, ";; Reorg pass #%d:\n", reorg_pass_number + 1);
3771 for (i = 0; i < NUM_REORG_FUNCTIONS; i++)
3773 need_comma = 0;
3774 fprintf (dump_file, ";; Reorg function #%d\n", i);
3776 fprintf (dump_file, ";; %d insns needing delay slots\n;; ",
3777 num_insns_needing_delays[i][reorg_pass_number]);
3779 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3780 if (num_filled_delays[i][j][reorg_pass_number])
3782 if (need_comma)
3783 fprintf (dump_file, ", ");
3784 need_comma = 1;
3785 fprintf (dump_file, "%d got %d delays",
3786 num_filled_delays[i][j][reorg_pass_number], j);
3788 fprintf (dump_file, "\n");
3791 memset (total_delay_slots, 0, sizeof total_delay_slots);
3792 memset (total_annul_slots, 0, sizeof total_annul_slots);
3793 for (insn = first; insn; insn = NEXT_INSN (insn))
3795 if (! insn->deleted ()
3796 && NONJUMP_INSN_P (insn)
3797 && GET_CODE (PATTERN (insn)) != USE
3798 && GET_CODE (PATTERN (insn)) != CLOBBER)
3800 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
3802 rtx control;
3803 j = XVECLEN (PATTERN (insn), 0) - 1;
3804 if (j > MAX_DELAY_HISTOGRAM)
3805 j = MAX_DELAY_HISTOGRAM;
3806 control = XVECEXP (PATTERN (insn), 0, 0);
3807 if (JUMP_P (control) && INSN_ANNULLED_BRANCH_P (control))
3808 total_annul_slots[j]++;
3809 else
3810 total_delay_slots[j]++;
3812 else if (num_delay_slots (insn) > 0)
3813 total_delay_slots[0]++;
3816 fprintf (dump_file, ";; Reorg totals: ");
3817 need_comma = 0;
3818 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3820 if (total_delay_slots[j])
3822 if (need_comma)
3823 fprintf (dump_file, ", ");
3824 need_comma = 1;
3825 fprintf (dump_file, "%d got %d delays", total_delay_slots[j], j);
3828 fprintf (dump_file, "\n");
3830 if (ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
3832 fprintf (dump_file, ";; Reorg annuls: ");
3833 need_comma = 0;
3834 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3836 if (total_annul_slots[j])
3838 if (need_comma)
3839 fprintf (dump_file, ", ");
3840 need_comma = 1;
3841 fprintf (dump_file, "%d got %d delays", total_annul_slots[j], j);
3844 fprintf (dump_file, "\n");
3847 fprintf (dump_file, "\n");
3850 if (!sibling_labels.is_empty ())
3852 update_alignments (sibling_labels);
3853 sibling_labels.release ();
3856 free_resource_info ();
3857 free (uid_to_ruid);
3858 crtl->dbr_scheduled_p = true;
3861 /* Run delay slot optimization. */
3862 static unsigned int
3863 rest_of_handle_delay_slots (void)
3865 if (DELAY_SLOTS)
3866 dbr_schedule (get_insns ());
3868 return 0;
3871 namespace {
3873 const pass_data pass_data_delay_slots =
3875 RTL_PASS, /* type */
3876 "dbr", /* name */
3877 OPTGROUP_NONE, /* optinfo_flags */
3878 TV_DBR_SCHED, /* tv_id */
3879 0, /* properties_required */
3880 0, /* properties_provided */
3881 0, /* properties_destroyed */
3882 0, /* todo_flags_start */
3883 0, /* todo_flags_finish */
3886 class pass_delay_slots : public rtl_opt_pass
3888 public:
3889 pass_delay_slots (gcc::context *ctxt)
3890 : rtl_opt_pass (pass_data_delay_slots, ctxt)
3893 /* opt_pass methods: */
3894 virtual bool gate (function *);
3895 virtual unsigned int execute (function *)
3897 return rest_of_handle_delay_slots ();
3900 }; // class pass_delay_slots
3902 bool
3903 pass_delay_slots::gate (function *)
3905 /* At -O0 dataflow info isn't updated after RA. */
3906 if (DELAY_SLOTS)
3907 return optimize > 0 && flag_delayed_branch && !crtl->dbr_scheduled_p;
3909 return false;
3912 } // anon namespace
3914 rtl_opt_pass *
3915 make_pass_delay_slots (gcc::context *ctxt)
3917 return new pass_delay_slots (ctxt);
3920 /* Machine dependent reorg pass. */
3922 namespace {
3924 const pass_data pass_data_machine_reorg =
3926 RTL_PASS, /* type */
3927 "mach", /* name */
3928 OPTGROUP_NONE, /* optinfo_flags */
3929 TV_MACH_DEP, /* tv_id */
3930 0, /* properties_required */
3931 0, /* properties_provided */
3932 0, /* properties_destroyed */
3933 0, /* todo_flags_start */
3934 0, /* todo_flags_finish */
3937 class pass_machine_reorg : public rtl_opt_pass
3939 public:
3940 pass_machine_reorg (gcc::context *ctxt)
3941 : rtl_opt_pass (pass_data_machine_reorg, ctxt)
3944 /* opt_pass methods: */
3945 virtual bool gate (function *)
3947 return targetm.machine_dependent_reorg != 0;
3950 virtual unsigned int execute (function *)
3952 targetm.machine_dependent_reorg ();
3953 return 0;
3956 }; // class pass_machine_reorg
3958 } // anon namespace
3960 rtl_opt_pass *
3961 make_pass_machine_reorg (gcc::context *ctxt)
3963 return new pass_machine_reorg (ctxt);