PR target/81325
[official-gcc.git] / gcc / reorg.c
blob5914af655b6bbbddc98f23f53c0d1e6e496dc2eb
1 /* Perform instruction reorganizations for delay slot filling.
2 Copyright (C) 1992-2017 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu).
4 Hacked by Michael Tiemann (tiemann@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Instruction reorganization pass.
24 This pass runs after register allocation and final jump
25 optimization. It should be the last pass to run before peephole.
26 It serves primarily to fill delay slots of insns, typically branch
27 and call insns. Other insns typically involve more complicated
28 interactions of data dependencies and resource constraints, and
29 are better handled by scheduling before register allocation (by the
30 function `schedule_insns').
32 The Branch Penalty is the number of extra cycles that are needed to
33 execute a branch insn. On an ideal machine, branches take a single
34 cycle, and the Branch Penalty is 0. Several RISC machines approach
35 branch delays differently:
37 The MIPS has a single branch delay slot. Most insns
38 (except other branches) can be used to fill this slot. When the
39 slot is filled, two insns execute in two cycles, reducing the
40 branch penalty to zero.
42 The SPARC always has a branch delay slot, but its effects can be
43 annulled when the branch is not taken. This means that failing to
44 find other sources of insns, we can hoist an insn from the branch
45 target that would only be safe to execute knowing that the branch
46 is taken.
48 The HP-PA always has a branch delay slot. For unconditional branches
49 its effects can be annulled when the branch is taken. The effects
50 of the delay slot in a conditional branch can be nullified for forward
51 taken branches, or for untaken backward branches. This means
52 we can hoist insns from the fall-through path for forward branches or
53 steal insns from the target of backward branches.
55 The TMS320C3x and C4x have three branch delay slots. When the three
56 slots are filled, the branch penalty is zero. Most insns can fill the
57 delay slots except jump insns.
59 Three techniques for filling delay slots have been implemented so far:
61 (1) `fill_simple_delay_slots' is the simplest, most efficient way
62 to fill delay slots. This pass first looks for insns which come
63 from before the branch and which are safe to execute after the
64 branch. Then it searches after the insn requiring delay slots or,
65 in the case of a branch, for insns that are after the point at
66 which the branch merges into the fallthrough code, if such a point
67 exists. When such insns are found, the branch penalty decreases
68 and no code expansion takes place.
70 (2) `fill_eager_delay_slots' is more complicated: it is used for
71 scheduling conditional jumps, or for scheduling jumps which cannot
72 be filled using (1). A machine need not have annulled jumps to use
73 this strategy, but it helps (by keeping more options open).
74 `fill_eager_delay_slots' tries to guess the direction the branch
75 will go; if it guesses right 100% of the time, it can reduce the
76 branch penalty as much as `fill_simple_delay_slots' does. If it
77 guesses wrong 100% of the time, it might as well schedule nops. When
78 `fill_eager_delay_slots' takes insns from the fall-through path of
79 the jump, usually there is no code expansion; when it takes insns
80 from the branch target, there is code expansion if it is not the
81 only way to reach that target.
83 (3) `relax_delay_slots' uses a set of rules to simplify code that
84 has been reorganized by (1) and (2). It finds cases where
85 conditional test can be eliminated, jumps can be threaded, extra
86 insns can be eliminated, etc. It is the job of (1) and (2) to do a
87 good job of scheduling locally; `relax_delay_slots' takes care of
88 making the various individual schedules work well together. It is
89 especially tuned to handle the control flow interactions of branch
90 insns. It does nothing for insns with delay slots that do not
91 branch.
93 On machines that use CC0, we are very conservative. We will not make
94 a copy of an insn involving CC0 since we want to maintain a 1-1
95 correspondence between the insn that sets and uses CC0. The insns are
96 allowed to be separated by placing an insn that sets CC0 (but not an insn
97 that uses CC0; we could do this, but it doesn't seem worthwhile) in a
98 delay slot. In that case, we point each insn at the other with REG_CC_USER
99 and REG_CC_SETTER notes. Note that these restrictions affect very few
100 machines because most RISC machines with delay slots will not use CC0
101 (the RT is the only known exception at this point). */
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "backend.h"
107 #include "target.h"
108 #include "rtl.h"
109 #include "tree.h"
110 #include "predict.h"
111 #include "memmodel.h"
112 #include "tm_p.h"
113 #include "expmed.h"
114 #include "insn-config.h"
115 #include "emit-rtl.h"
116 #include "recog.h"
117 #include "insn-attr.h"
118 #include "resource.h"
119 #include "params.h"
120 #include "tree-pass.h"
123 /* First, some functions that were used before GCC got a control flow graph.
124 These functions are now only used here in reorg.c, and have therefore
125 been moved here to avoid inadvertent misuse elsewhere in the compiler. */
127 /* Return the last label to mark the same position as LABEL. Return LABEL
128 itself if it is null or any return rtx. */
130 static rtx
131 skip_consecutive_labels (rtx label_or_return)
133 rtx_insn *insn;
135 if (label_or_return && ANY_RETURN_P (label_or_return))
136 return label_or_return;
138 rtx_insn *label = as_a <rtx_insn *> (label_or_return);
140 for (insn = label; insn != 0 && !INSN_P (insn); insn = NEXT_INSN (insn))
141 if (LABEL_P (insn))
142 label = insn;
144 return label;
147 /* INSN uses CC0 and is being moved into a delay slot. Set up REG_CC_SETTER
148 and REG_CC_USER notes so we can find it. */
150 static void
151 link_cc0_insns (rtx_insn *insn)
153 rtx user = next_nonnote_insn (insn);
155 if (NONJUMP_INSN_P (user) && GET_CODE (PATTERN (user)) == SEQUENCE)
156 user = XVECEXP (PATTERN (user), 0, 0);
158 add_reg_note (user, REG_CC_SETTER, insn);
159 add_reg_note (insn, REG_CC_USER, user);
162 /* Insns which have delay slots that have not yet been filled. */
164 static struct obstack unfilled_slots_obstack;
165 static rtx *unfilled_firstobj;
167 /* Define macros to refer to the first and last slot containing unfilled
168 insns. These are used because the list may move and its address
169 should be recomputed at each use. */
171 #define unfilled_slots_base \
172 ((rtx_insn **) obstack_base (&unfilled_slots_obstack))
174 #define unfilled_slots_next \
175 ((rtx_insn **) obstack_next_free (&unfilled_slots_obstack))
177 /* Points to the label before the end of the function, or before a
178 return insn. */
179 static rtx_code_label *function_return_label;
180 /* Likewise for a simple_return. */
181 static rtx_code_label *function_simple_return_label;
183 /* Mapping between INSN_UID's and position in the code since INSN_UID's do
184 not always monotonically increase. */
185 static int *uid_to_ruid;
187 /* Highest valid index in `uid_to_ruid'. */
188 static int max_uid;
190 static int stop_search_p (rtx_insn *, int);
191 static int resource_conflicts_p (struct resources *, struct resources *);
192 static int insn_references_resource_p (rtx, struct resources *, bool);
193 static int insn_sets_resource_p (rtx, struct resources *, bool);
194 static rtx_code_label *find_end_label (rtx);
195 static rtx_insn *emit_delay_sequence (rtx_insn *, const vec<rtx_insn *> &,
196 int);
197 static void add_to_delay_list (rtx_insn *, vec<rtx_insn *> *);
198 static rtx_insn *delete_from_delay_slot (rtx_insn *);
199 static void delete_scheduled_jump (rtx_insn *);
200 static void note_delay_statistics (int, int);
201 static int get_jump_flags (const rtx_insn *, rtx);
202 static int mostly_true_jump (rtx);
203 static rtx get_branch_condition (const rtx_insn *, rtx);
204 static int condition_dominates_p (rtx, const rtx_insn *);
205 static int redirect_with_delay_slots_safe_p (rtx_insn *, rtx, rtx);
206 static int redirect_with_delay_list_safe_p (rtx_insn *, rtx,
207 const vec<rtx_insn *> &);
208 static int check_annul_list_true_false (int, const vec<rtx_insn *> &);
209 static void steal_delay_list_from_target (rtx_insn *, rtx, rtx_sequence *,
210 vec<rtx_insn *> *,
211 struct resources *,
212 struct resources *,
213 struct resources *,
214 int, int *, int *,
215 rtx *);
216 static void steal_delay_list_from_fallthrough (rtx_insn *, rtx, rtx_sequence *,
217 vec<rtx_insn *> *,
218 struct resources *,
219 struct resources *,
220 struct resources *,
221 int, int *, int *);
222 static void try_merge_delay_insns (rtx_insn *, rtx_insn *);
223 static rtx_insn *redundant_insn (rtx, rtx_insn *, const vec<rtx_insn *> &);
224 static int own_thread_p (rtx, rtx, int);
225 static void update_block (rtx_insn *, rtx_insn *);
226 static int reorg_redirect_jump (rtx_jump_insn *, rtx);
227 static void update_reg_dead_notes (rtx_insn *, rtx_insn *);
228 static void fix_reg_dead_note (rtx_insn *, rtx);
229 static void update_reg_unused_notes (rtx_insn *, rtx);
230 static void fill_simple_delay_slots (int);
231 static void fill_slots_from_thread (rtx_jump_insn *, rtx, rtx, rtx,
232 int, int, int, int,
233 int *, vec<rtx_insn *> *);
234 static void fill_eager_delay_slots (void);
235 static void relax_delay_slots (rtx_insn *);
236 static void make_return_insns (rtx_insn *);
238 /* A wrapper around next_active_insn which takes care to return ret_rtx
239 unchanged. */
241 static rtx
242 first_active_target_insn (rtx insn)
244 if (ANY_RETURN_P (insn))
245 return insn;
246 return next_active_insn (as_a <rtx_insn *> (insn));
249 /* Return true iff INSN is a simplejump, or any kind of return insn. */
251 static bool
252 simplejump_or_return_p (rtx insn)
254 return (JUMP_P (insn)
255 && (simplejump_p (as_a <rtx_insn *> (insn))
256 || ANY_RETURN_P (PATTERN (insn))));
259 /* Return TRUE if this insn should stop the search for insn to fill delay
260 slots. LABELS_P indicates that labels should terminate the search.
261 In all cases, jumps terminate the search. */
263 static int
264 stop_search_p (rtx_insn *insn, int labels_p)
266 if (insn == 0)
267 return 1;
269 /* If the insn can throw an exception that is caught within the function,
270 it may effectively perform a jump from the viewpoint of the function.
271 Therefore act like for a jump. */
272 if (can_throw_internal (insn))
273 return 1;
275 switch (GET_CODE (insn))
277 case NOTE:
278 case CALL_INSN:
279 return 0;
281 case CODE_LABEL:
282 return labels_p;
284 case JUMP_INSN:
285 case BARRIER:
286 return 1;
288 case INSN:
289 /* OK unless it contains a delay slot or is an `asm' insn of some type.
290 We don't know anything about these. */
291 return (GET_CODE (PATTERN (insn)) == SEQUENCE
292 || GET_CODE (PATTERN (insn)) == ASM_INPUT
293 || asm_noperands (PATTERN (insn)) >= 0);
295 default:
296 gcc_unreachable ();
300 /* Return TRUE if any resources are marked in both RES1 and RES2 or if either
301 resource set contains a volatile memory reference. Otherwise, return FALSE. */
303 static int
304 resource_conflicts_p (struct resources *res1, struct resources *res2)
306 if ((res1->cc && res2->cc) || (res1->memory && res2->memory)
307 || res1->volatil || res2->volatil)
308 return 1;
310 return hard_reg_set_intersect_p (res1->regs, res2->regs);
313 /* Return TRUE if any resource marked in RES, a `struct resources', is
314 referenced by INSN. If INCLUDE_DELAYED_EFFECTS is set, return if the called
315 routine is using those resources.
317 We compute this by computing all the resources referenced by INSN and
318 seeing if this conflicts with RES. It might be faster to directly check
319 ourselves, and this is the way it used to work, but it means duplicating
320 a large block of complex code. */
322 static int
323 insn_references_resource_p (rtx insn, struct resources *res,
324 bool include_delayed_effects)
326 struct resources insn_res;
328 CLEAR_RESOURCE (&insn_res);
329 mark_referenced_resources (insn, &insn_res, include_delayed_effects);
330 return resource_conflicts_p (&insn_res, res);
333 /* Return TRUE if INSN modifies resources that are marked in RES.
334 INCLUDE_DELAYED_EFFECTS is set if the actions of that routine should be
335 included. CC0 is only modified if it is explicitly set; see comments
336 in front of mark_set_resources for details. */
338 static int
339 insn_sets_resource_p (rtx insn, struct resources *res,
340 bool include_delayed_effects)
342 struct resources insn_sets;
344 CLEAR_RESOURCE (&insn_sets);
345 mark_set_resources (insn, &insn_sets, 0,
346 (include_delayed_effects
347 ? MARK_SRC_DEST_CALL
348 : MARK_SRC_DEST));
349 return resource_conflicts_p (&insn_sets, res);
352 /* Find a label at the end of the function or before a RETURN. If there
353 is none, try to make one. If that fails, returns 0.
355 The property of such a label is that it is placed just before the
356 epilogue or a bare RETURN insn, so that another bare RETURN can be
357 turned into a jump to the label unconditionally. In particular, the
358 label cannot be placed before a RETURN insn with a filled delay slot.
360 ??? There may be a problem with the current implementation. Suppose
361 we start with a bare RETURN insn and call find_end_label. It may set
362 function_return_label just before the RETURN. Suppose the machinery
363 is able to fill the delay slot of the RETURN insn afterwards. Then
364 function_return_label is no longer valid according to the property
365 described above and find_end_label will still return it unmodified.
366 Note that this is probably mitigated by the following observation:
367 once function_return_label is made, it is very likely the target of
368 a jump, so filling the delay slot of the RETURN will be much more
369 difficult.
370 KIND is either simple_return_rtx or ret_rtx, indicating which type of
371 return we're looking for. */
373 static rtx_code_label *
374 find_end_label (rtx kind)
376 rtx_insn *insn;
377 rtx_code_label **plabel;
379 if (kind == ret_rtx)
380 plabel = &function_return_label;
381 else
383 gcc_assert (kind == simple_return_rtx);
384 plabel = &function_simple_return_label;
387 /* If we found one previously, return it. */
388 if (*plabel)
389 return *plabel;
391 /* Otherwise, see if there is a label at the end of the function. If there
392 is, it must be that RETURN insns aren't needed, so that is our return
393 label and we don't have to do anything else. */
395 insn = get_last_insn ();
396 while (NOTE_P (insn)
397 || (NONJUMP_INSN_P (insn)
398 && (GET_CODE (PATTERN (insn)) == USE
399 || GET_CODE (PATTERN (insn)) == CLOBBER)))
400 insn = PREV_INSN (insn);
402 /* When a target threads its epilogue we might already have a
403 suitable return insn. If so put a label before it for the
404 function_return_label. */
405 if (BARRIER_P (insn)
406 && JUMP_P (PREV_INSN (insn))
407 && PATTERN (PREV_INSN (insn)) == kind)
409 rtx_insn *temp = PREV_INSN (PREV_INSN (insn));
410 rtx_code_label *label = gen_label_rtx ();
411 LABEL_NUSES (label) = 0;
413 /* Put the label before any USE insns that may precede the RETURN
414 insn. */
415 while (GET_CODE (temp) == USE)
416 temp = PREV_INSN (temp);
418 emit_label_after (label, temp);
419 *plabel = label;
422 else if (LABEL_P (insn))
423 *plabel = as_a <rtx_code_label *> (insn);
424 else
426 rtx_code_label *label = gen_label_rtx ();
427 LABEL_NUSES (label) = 0;
428 /* If the basic block reorder pass moves the return insn to
429 some other place try to locate it again and put our
430 function_return_label there. */
431 while (insn && ! (JUMP_P (insn) && (PATTERN (insn) == kind)))
432 insn = PREV_INSN (insn);
433 if (insn)
435 insn = PREV_INSN (insn);
437 /* Put the label before any USE insns that may precede the
438 RETURN insn. */
439 while (GET_CODE (insn) == USE)
440 insn = PREV_INSN (insn);
442 emit_label_after (label, insn);
444 else
446 if (targetm.have_epilogue () && ! targetm.have_return ())
447 /* The RETURN insn has its delay slot filled so we cannot
448 emit the label just before it. Since we already have
449 an epilogue and cannot emit a new RETURN, we cannot
450 emit the label at all. */
451 return NULL;
453 /* Otherwise, make a new label and emit a RETURN and BARRIER,
454 if needed. */
455 emit_label (label);
456 if (targetm.have_return ())
458 /* The return we make may have delay slots too. */
459 rtx_insn *pat = targetm.gen_return ();
460 rtx_insn *insn = emit_jump_insn (pat);
461 set_return_jump_label (insn);
462 emit_barrier ();
463 if (num_delay_slots (insn) > 0)
464 obstack_ptr_grow (&unfilled_slots_obstack, insn);
467 *plabel = label;
470 /* Show one additional use for this label so it won't go away until
471 we are done. */
472 ++LABEL_NUSES (*plabel);
474 return *plabel;
477 /* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace
478 the pattern of INSN with the SEQUENCE.
480 Returns the insn containing the SEQUENCE that replaces INSN. */
482 static rtx_insn *
483 emit_delay_sequence (rtx_insn *insn, const vec<rtx_insn *> &list, int length)
485 /* Allocate the rtvec to hold the insns and the SEQUENCE. */
486 rtvec seqv = rtvec_alloc (length + 1);
487 rtx seq = gen_rtx_SEQUENCE (VOIDmode, seqv);
488 rtx_insn *seq_insn = make_insn_raw (seq);
490 /* If DELAY_INSN has a location, use it for SEQ_INSN. If DELAY_INSN does
491 not have a location, but one of the delayed insns does, we pick up a
492 location from there later. */
493 INSN_LOCATION (seq_insn) = INSN_LOCATION (insn);
495 /* Unlink INSN from the insn chain, so that we can put it into
496 the SEQUENCE. Remember where we want to emit SEQUENCE in AFTER. */
497 rtx_insn *after = PREV_INSN (insn);
498 remove_insn (insn);
499 SET_NEXT_INSN (insn) = SET_PREV_INSN (insn) = NULL;
501 /* Build our SEQUENCE and rebuild the insn chain. */
502 start_sequence ();
503 XVECEXP (seq, 0, 0) = emit_insn (insn);
505 unsigned int delay_insns = list.length ();
506 gcc_assert (delay_insns == (unsigned int) length);
507 for (unsigned int i = 0; i < delay_insns; i++)
509 rtx_insn *tem = list[i];
510 rtx note, next;
512 /* Show that this copy of the insn isn't deleted. */
513 tem->set_undeleted ();
515 /* Unlink insn from its original place, and re-emit it into
516 the sequence. */
517 SET_NEXT_INSN (tem) = SET_PREV_INSN (tem) = NULL;
518 XVECEXP (seq, 0, i + 1) = emit_insn (tem);
520 /* SPARC assembler, for instance, emit warning when debug info is output
521 into the delay slot. */
522 if (INSN_LOCATION (tem) && !INSN_LOCATION (seq_insn))
523 INSN_LOCATION (seq_insn) = INSN_LOCATION (tem);
524 INSN_LOCATION (tem) = 0;
526 for (note = REG_NOTES (tem); note; note = next)
528 next = XEXP (note, 1);
529 switch (REG_NOTE_KIND (note))
531 case REG_DEAD:
532 /* Remove any REG_DEAD notes because we can't rely on them now
533 that the insn has been moved. */
534 remove_note (tem, note);
535 break;
537 case REG_LABEL_OPERAND:
538 case REG_LABEL_TARGET:
539 /* Keep the label reference count up to date. */
540 if (LABEL_P (XEXP (note, 0)))
541 LABEL_NUSES (XEXP (note, 0)) ++;
542 break;
544 default:
545 break;
549 end_sequence ();
551 /* Splice our SEQUENCE into the insn stream where INSN used to be. */
552 add_insn_after (seq_insn, after, NULL);
554 return seq_insn;
557 /* Add INSN to DELAY_LIST and return the head of the new list. The list must
558 be in the order in which the insns are to be executed. */
560 static void
561 add_to_delay_list (rtx_insn *insn, vec<rtx_insn *> *delay_list)
563 /* If INSN has its block number recorded, clear it since we may
564 be moving the insn to a new block. */
565 clear_hashed_info_for_insn (insn);
566 delay_list->safe_push (insn);
569 /* Delete INSN from the delay slot of the insn that it is in, which may
570 produce an insn with no delay slots. Return the new insn. */
572 static rtx_insn *
573 delete_from_delay_slot (rtx_insn *insn)
575 rtx_insn *trial, *seq_insn, *prev;
576 rtx_sequence *seq;
577 int i;
578 int had_barrier = 0;
580 /* We first must find the insn containing the SEQUENCE with INSN in its
581 delay slot. Do this by finding an insn, TRIAL, where
582 PREV_INSN (NEXT_INSN (TRIAL)) != TRIAL. */
584 for (trial = insn;
585 PREV_INSN (NEXT_INSN (trial)) == trial;
586 trial = NEXT_INSN (trial))
589 seq_insn = PREV_INSN (NEXT_INSN (trial));
590 seq = as_a <rtx_sequence *> (PATTERN (seq_insn));
592 if (NEXT_INSN (seq_insn) && BARRIER_P (NEXT_INSN (seq_insn)))
593 had_barrier = 1;
595 /* Create a delay list consisting of all the insns other than the one
596 we are deleting (unless we were the only one). */
597 auto_vec<rtx_insn *, 5> delay_list;
598 if (seq->len () > 2)
599 for (i = 1; i < seq->len (); i++)
600 if (seq->insn (i) != insn)
601 add_to_delay_list (seq->insn (i), &delay_list);
603 /* Delete the old SEQUENCE, re-emit the insn that used to have the delay
604 list, and rebuild the delay list if non-empty. */
605 prev = PREV_INSN (seq_insn);
606 trial = seq->insn (0);
607 delete_related_insns (seq_insn);
608 add_insn_after (trial, prev, NULL);
610 /* If there was a barrier after the old SEQUENCE, remit it. */
611 if (had_barrier)
612 emit_barrier_after (trial);
614 /* If there are any delay insns, remit them. Otherwise clear the
615 annul flag. */
616 if (!delay_list.is_empty ())
617 trial = emit_delay_sequence (trial, delay_list, XVECLEN (seq, 0) - 2);
618 else if (JUMP_P (trial))
619 INSN_ANNULLED_BRANCH_P (trial) = 0;
621 INSN_FROM_TARGET_P (insn) = 0;
623 /* Show we need to fill this insn again. */
624 obstack_ptr_grow (&unfilled_slots_obstack, trial);
626 return trial;
629 /* Delete INSN, a JUMP_INSN. If it is a conditional jump, we must track down
630 the insn that sets CC0 for it and delete it too. */
632 static void
633 delete_scheduled_jump (rtx_insn *insn)
635 /* Delete the insn that sets cc0 for us. On machines without cc0, we could
636 delete the insn that sets the condition code, but it is hard to find it.
637 Since this case is rare anyway, don't bother trying; there would likely
638 be other insns that became dead anyway, which we wouldn't know to
639 delete. */
641 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, insn))
643 rtx note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
645 /* If a reg-note was found, it points to an insn to set CC0. This
646 insn is in the delay list of some other insn. So delete it from
647 the delay list it was in. */
648 if (note)
650 if (! FIND_REG_INC_NOTE (XEXP (note, 0), NULL_RTX)
651 && sets_cc0_p (PATTERN (XEXP (note, 0))) == 1)
652 delete_from_delay_slot (as_a <rtx_insn *> (XEXP (note, 0)));
654 else
656 /* The insn setting CC0 is our previous insn, but it may be in
657 a delay slot. It will be the last insn in the delay slot, if
658 it is. */
659 rtx_insn *trial = previous_insn (insn);
660 if (NOTE_P (trial))
661 trial = prev_nonnote_insn (trial);
662 if (sets_cc0_p (PATTERN (trial)) != 1
663 || FIND_REG_INC_NOTE (trial, NULL_RTX))
664 return;
665 if (PREV_INSN (NEXT_INSN (trial)) == trial)
666 delete_related_insns (trial);
667 else
668 delete_from_delay_slot (trial);
672 delete_related_insns (insn);
675 /* Counters for delay-slot filling. */
677 #define NUM_REORG_FUNCTIONS 2
678 #define MAX_DELAY_HISTOGRAM 3
679 #define MAX_REORG_PASSES 2
681 static int num_insns_needing_delays[NUM_REORG_FUNCTIONS][MAX_REORG_PASSES];
683 static int num_filled_delays[NUM_REORG_FUNCTIONS][MAX_DELAY_HISTOGRAM+1][MAX_REORG_PASSES];
685 static int reorg_pass_number;
687 static void
688 note_delay_statistics (int slots_filled, int index)
690 num_insns_needing_delays[index][reorg_pass_number]++;
691 if (slots_filled > MAX_DELAY_HISTOGRAM)
692 slots_filled = MAX_DELAY_HISTOGRAM;
693 num_filled_delays[index][slots_filled][reorg_pass_number]++;
696 /* Optimize the following cases:
698 1. When a conditional branch skips over only one instruction,
699 use an annulling branch and put that insn in the delay slot.
700 Use either a branch that annuls when the condition if true or
701 invert the test with a branch that annuls when the condition is
702 false. This saves insns, since otherwise we must copy an insn
703 from the L1 target.
705 (orig) (skip) (otherwise)
706 Bcc.n L1 Bcc',a L1 Bcc,a L1'
707 insn insn insn2
708 L1: L1: L1:
709 insn2 insn2 insn2
710 insn3 insn3 L1':
711 insn3
713 2. When a conditional branch skips over only one instruction,
714 and after that, it unconditionally branches somewhere else,
715 perform the similar optimization. This saves executing the
716 second branch in the case where the inverted condition is true.
718 Bcc.n L1 Bcc',a L2
719 insn insn
720 L1: L1:
721 Bra L2 Bra L2
723 INSN is a JUMP_INSN.
725 This should be expanded to skip over N insns, where N is the number
726 of delay slots required. */
728 static void
729 optimize_skip (rtx_jump_insn *insn, vec<rtx_insn *> *delay_list)
731 rtx_insn *trial = next_nonnote_insn (insn);
732 rtx_insn *next_trial = next_active_insn (trial);
733 int flags;
735 flags = get_jump_flags (insn, JUMP_LABEL (insn));
737 if (trial == 0
738 || !NONJUMP_INSN_P (trial)
739 || GET_CODE (PATTERN (trial)) == SEQUENCE
740 || recog_memoized (trial) < 0
741 || (! eligible_for_annul_false (insn, 0, trial, flags)
742 && ! eligible_for_annul_true (insn, 0, trial, flags))
743 || RTX_FRAME_RELATED_P (trial)
744 || can_throw_internal (trial))
745 return;
747 /* There are two cases where we are just executing one insn (we assume
748 here that a branch requires only one insn; this should be generalized
749 at some point): Where the branch goes around a single insn or where
750 we have one insn followed by a branch to the same label we branch to.
751 In both of these cases, inverting the jump and annulling the delay
752 slot give the same effect in fewer insns. */
753 if (next_trial == next_active_insn (JUMP_LABEL_AS_INSN (insn))
754 || (next_trial != 0
755 && simplejump_or_return_p (next_trial)
756 && JUMP_LABEL (insn) == JUMP_LABEL (next_trial)))
758 if (eligible_for_annul_false (insn, 0, trial, flags))
760 if (invert_jump (insn, JUMP_LABEL (insn), 1))
761 INSN_FROM_TARGET_P (trial) = 1;
762 else if (! eligible_for_annul_true (insn, 0, trial, flags))
763 return;
766 add_to_delay_list (trial, delay_list);
767 next_trial = next_active_insn (trial);
768 update_block (trial, trial);
769 delete_related_insns (trial);
771 /* Also, if we are targeting an unconditional
772 branch, thread our jump to the target of that branch. Don't
773 change this into a RETURN here, because it may not accept what
774 we have in the delay slot. We'll fix this up later. */
775 if (next_trial && simplejump_or_return_p (next_trial))
777 rtx target_label = JUMP_LABEL (next_trial);
778 if (ANY_RETURN_P (target_label))
779 target_label = find_end_label (target_label);
781 if (target_label)
783 /* Recompute the flags based on TARGET_LABEL since threading
784 the jump to TARGET_LABEL may change the direction of the
785 jump (which may change the circumstances in which the
786 delay slot is nullified). */
787 flags = get_jump_flags (insn, target_label);
788 if (eligible_for_annul_true (insn, 0, trial, flags))
789 reorg_redirect_jump (insn, target_label);
793 INSN_ANNULLED_BRANCH_P (insn) = 1;
797 /* Encode and return branch direction and prediction information for
798 INSN assuming it will jump to LABEL.
800 Non conditional branches return no direction information and
801 are predicted as very likely taken. */
803 static int
804 get_jump_flags (const rtx_insn *insn, rtx label)
806 int flags;
808 /* get_jump_flags can be passed any insn with delay slots, these may
809 be INSNs, CALL_INSNs, or JUMP_INSNs. Only JUMP_INSNs have branch
810 direction information, and only if they are conditional jumps.
812 If LABEL is a return, then there is no way to determine the branch
813 direction. */
814 if (JUMP_P (insn)
815 && (condjump_p (insn) || condjump_in_parallel_p (insn))
816 && !ANY_RETURN_P (label)
817 && INSN_UID (insn) <= max_uid
818 && INSN_UID (label) <= max_uid)
819 flags
820 = (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)])
821 ? ATTR_FLAG_forward : ATTR_FLAG_backward;
822 /* No valid direction information. */
823 else
824 flags = 0;
826 return flags;
829 /* Return truth value of the statement that this branch
830 is mostly taken. If we think that the branch is extremely likely
831 to be taken, we return 2. If the branch is slightly more likely to be
832 taken, return 1. If the branch is slightly less likely to be taken,
833 return 0 and if the branch is highly unlikely to be taken, return -1. */
835 static int
836 mostly_true_jump (rtx jump_insn)
838 /* If branch probabilities are available, then use that number since it
839 always gives a correct answer. */
840 rtx note = find_reg_note (jump_insn, REG_BR_PROB, 0);
841 if (note)
843 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
844 .to_reg_br_prob_base ();
846 if (prob >= REG_BR_PROB_BASE * 9 / 10)
847 return 2;
848 else if (prob >= REG_BR_PROB_BASE / 2)
849 return 1;
850 else if (prob >= REG_BR_PROB_BASE / 10)
851 return 0;
852 else
853 return -1;
856 /* If there is no note, assume branches are not taken.
857 This should be rare. */
858 return 0;
861 /* Return the condition under which INSN will branch to TARGET. If TARGET
862 is zero, return the condition under which INSN will return. If INSN is
863 an unconditional branch, return const_true_rtx. If INSN isn't a simple
864 type of jump, or it doesn't go to TARGET, return 0. */
866 static rtx
867 get_branch_condition (const rtx_insn *insn, rtx target)
869 rtx pat = PATTERN (insn);
870 rtx src;
872 if (condjump_in_parallel_p (insn))
873 pat = XVECEXP (pat, 0, 0);
875 if (ANY_RETURN_P (pat) && pat == target)
876 return const_true_rtx;
878 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
879 return 0;
881 src = SET_SRC (pat);
882 if (GET_CODE (src) == LABEL_REF && label_ref_label (src) == target)
883 return const_true_rtx;
885 else if (GET_CODE (src) == IF_THEN_ELSE
886 && XEXP (src, 2) == pc_rtx
887 && ((GET_CODE (XEXP (src, 1)) == LABEL_REF
888 && label_ref_label (XEXP (src, 1)) == target)
889 || (ANY_RETURN_P (XEXP (src, 1)) && XEXP (src, 1) == target)))
890 return XEXP (src, 0);
892 else if (GET_CODE (src) == IF_THEN_ELSE
893 && XEXP (src, 1) == pc_rtx
894 && ((GET_CODE (XEXP (src, 2)) == LABEL_REF
895 && label_ref_label (XEXP (src, 2)) == target)
896 || (ANY_RETURN_P (XEXP (src, 2)) && XEXP (src, 2) == target)))
898 enum rtx_code rev;
899 rev = reversed_comparison_code (XEXP (src, 0), insn);
900 if (rev != UNKNOWN)
901 return gen_rtx_fmt_ee (rev, GET_MODE (XEXP (src, 0)),
902 XEXP (XEXP (src, 0), 0),
903 XEXP (XEXP (src, 0), 1));
906 return 0;
909 /* Return nonzero if CONDITION is more strict than the condition of
910 INSN, i.e., if INSN will always branch if CONDITION is true. */
912 static int
913 condition_dominates_p (rtx condition, const rtx_insn *insn)
915 rtx other_condition = get_branch_condition (insn, JUMP_LABEL (insn));
916 enum rtx_code code = GET_CODE (condition);
917 enum rtx_code other_code;
919 if (rtx_equal_p (condition, other_condition)
920 || other_condition == const_true_rtx)
921 return 1;
923 else if (condition == const_true_rtx || other_condition == 0)
924 return 0;
926 other_code = GET_CODE (other_condition);
927 if (GET_RTX_LENGTH (code) != 2 || GET_RTX_LENGTH (other_code) != 2
928 || ! rtx_equal_p (XEXP (condition, 0), XEXP (other_condition, 0))
929 || ! rtx_equal_p (XEXP (condition, 1), XEXP (other_condition, 1)))
930 return 0;
932 return comparison_dominates_p (code, other_code);
935 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
936 any insns already in the delay slot of JUMP. */
938 static int
939 redirect_with_delay_slots_safe_p (rtx_insn *jump, rtx newlabel, rtx seq)
941 int flags, i;
942 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (seq));
944 /* Make sure all the delay slots of this jump would still
945 be valid after threading the jump. If they are still
946 valid, then return nonzero. */
948 flags = get_jump_flags (jump, newlabel);
949 for (i = 1; i < pat->len (); i++)
950 if (! (
951 #if ANNUL_IFFALSE_SLOTS
952 (INSN_ANNULLED_BRANCH_P (jump)
953 && INSN_FROM_TARGET_P (pat->insn (i)))
954 ? eligible_for_annul_false (jump, i - 1, pat->insn (i), flags) :
955 #endif
956 #if ANNUL_IFTRUE_SLOTS
957 (INSN_ANNULLED_BRANCH_P (jump)
958 && ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
959 ? eligible_for_annul_true (jump, i - 1, pat->insn (i), flags) :
960 #endif
961 eligible_for_delay (jump, i - 1, pat->insn (i), flags)))
962 break;
964 return (i == pat->len ());
967 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
968 any insns we wish to place in the delay slot of JUMP. */
970 static int
971 redirect_with_delay_list_safe_p (rtx_insn *jump, rtx newlabel,
972 const vec<rtx_insn *> &delay_list)
974 /* Make sure all the insns in DELAY_LIST would still be
975 valid after threading the jump. If they are still
976 valid, then return nonzero. */
978 int flags = get_jump_flags (jump, newlabel);
979 unsigned int delay_insns = delay_list.length ();
980 unsigned int i = 0;
981 for (; i < delay_insns; i++)
982 if (! (
983 #if ANNUL_IFFALSE_SLOTS
984 (INSN_ANNULLED_BRANCH_P (jump)
985 && INSN_FROM_TARGET_P (delay_list[i]))
986 ? eligible_for_annul_false (jump, i, delay_list[i], flags) :
987 #endif
988 #if ANNUL_IFTRUE_SLOTS
989 (INSN_ANNULLED_BRANCH_P (jump)
990 && ! INSN_FROM_TARGET_P (delay_list[i]))
991 ? eligible_for_annul_true (jump, i, delay_list[i], flags) :
992 #endif
993 eligible_for_delay (jump, i, delay_list[i], flags)))
994 break;
996 return i == delay_insns;
999 /* DELAY_LIST is a list of insns that have already been placed into delay
1000 slots. See if all of them have the same annulling status as ANNUL_TRUE_P.
1001 If not, return 0; otherwise return 1. */
1003 static int
1004 check_annul_list_true_false (int annul_true_p,
1005 const vec<rtx_insn *> &delay_list)
1007 rtx_insn *trial;
1008 unsigned int i;
1009 FOR_EACH_VEC_ELT (delay_list, i, trial)
1010 if ((annul_true_p && INSN_FROM_TARGET_P (trial))
1011 || (!annul_true_p && !INSN_FROM_TARGET_P (trial)))
1012 return 0;
1014 return 1;
1017 /* INSN branches to an insn whose pattern SEQ is a SEQUENCE. Given that
1018 the condition tested by INSN is CONDITION and the resources shown in
1019 OTHER_NEEDED are needed after INSN, see whether INSN can take all the insns
1020 from SEQ's delay list, in addition to whatever insns it may execute
1021 (in DELAY_LIST). SETS and NEEDED are denote resources already set and
1022 needed while searching for delay slot insns. Return the concatenated
1023 delay list if possible, otherwise, return 0.
1025 SLOTS_TO_FILL is the total number of slots required by INSN, and
1026 PSLOTS_FILLED points to the number filled so far (also the number of
1027 insns in DELAY_LIST). It is updated with the number that have been
1028 filled from the SEQUENCE, if any.
1030 PANNUL_P points to a nonzero value if we already know that we need
1031 to annul INSN. If this routine determines that annulling is needed,
1032 it may set that value nonzero.
1034 PNEW_THREAD points to a location that is to receive the place at which
1035 execution should continue. */
1037 static void
1038 steal_delay_list_from_target (rtx_insn *insn, rtx condition, rtx_sequence *seq,
1039 vec<rtx_insn *> *delay_list, resources *sets,
1040 struct resources *needed,
1041 struct resources *other_needed,
1042 int slots_to_fill, int *pslots_filled,
1043 int *pannul_p, rtx *pnew_thread)
1045 int slots_remaining = slots_to_fill - *pslots_filled;
1046 int total_slots_filled = *pslots_filled;
1047 auto_vec<rtx_insn *, 5> new_delay_list;
1048 int must_annul = *pannul_p;
1049 int used_annul = 0;
1050 int i;
1051 struct resources cc_set;
1052 bool *redundant;
1054 /* We can't do anything if there are more delay slots in SEQ than we
1055 can handle, or if we don't know that it will be a taken branch.
1056 We know that it will be a taken branch if it is either an unconditional
1057 branch or a conditional branch with a stricter branch condition.
1059 Also, exit if the branch has more than one set, since then it is computing
1060 other results that can't be ignored, e.g. the HPPA mov&branch instruction.
1061 ??? It may be possible to move other sets into INSN in addition to
1062 moving the instructions in the delay slots.
1064 We can not steal the delay list if one of the instructions in the
1065 current delay_list modifies the condition codes and the jump in the
1066 sequence is a conditional jump. We can not do this because we can
1067 not change the direction of the jump because the condition codes
1068 will effect the direction of the jump in the sequence. */
1070 CLEAR_RESOURCE (&cc_set);
1072 rtx_insn *trial;
1073 FOR_EACH_VEC_ELT (*delay_list, i, trial)
1075 mark_set_resources (trial, &cc_set, 0, MARK_SRC_DEST_CALL);
1076 if (insn_references_resource_p (seq->insn (0), &cc_set, false))
1077 return;
1080 if (XVECLEN (seq, 0) - 1 > slots_remaining
1081 || ! condition_dominates_p (condition, seq->insn (0))
1082 || ! single_set (seq->insn (0)))
1083 return;
1085 /* On some targets, branches with delay slots can have a limited
1086 displacement. Give the back end a chance to tell us we can't do
1087 this. */
1088 if (! targetm.can_follow_jump (insn, seq->insn (0)))
1089 return;
1091 redundant = XALLOCAVEC (bool, XVECLEN (seq, 0));
1092 for (i = 1; i < seq->len (); i++)
1094 rtx_insn *trial = seq->insn (i);
1095 int flags;
1097 if (insn_references_resource_p (trial, sets, false)
1098 || insn_sets_resource_p (trial, needed, false)
1099 || insn_sets_resource_p (trial, sets, false)
1100 /* If TRIAL sets CC0, we can't copy it, so we can't steal this
1101 delay list. */
1102 || (HAVE_cc0 && find_reg_note (trial, REG_CC_USER, NULL_RTX))
1103 /* If TRIAL is from the fallthrough code of an annulled branch insn
1104 in SEQ, we cannot use it. */
1105 || (INSN_ANNULLED_BRANCH_P (seq->insn (0))
1106 && ! INSN_FROM_TARGET_P (trial)))
1107 return;
1109 /* If this insn was already done (usually in a previous delay slot),
1110 pretend we put it in our delay slot. */
1111 redundant[i] = redundant_insn (trial, insn, new_delay_list);
1112 if (redundant[i])
1113 continue;
1115 /* We will end up re-vectoring this branch, so compute flags
1116 based on jumping to the new label. */
1117 flags = get_jump_flags (insn, JUMP_LABEL (seq->insn (0)));
1119 if (! must_annul
1120 && ((condition == const_true_rtx
1121 || (! insn_sets_resource_p (trial, other_needed, false)
1122 && ! may_trap_or_fault_p (PATTERN (trial)))))
1123 ? eligible_for_delay (insn, total_slots_filled, trial, flags)
1124 : (must_annul || (delay_list->is_empty () && new_delay_list.is_empty ()))
1125 && (must_annul = 1,
1126 check_annul_list_true_false (0, *delay_list)
1127 && check_annul_list_true_false (0, new_delay_list)
1128 && eligible_for_annul_false (insn, total_slots_filled,
1129 trial, flags)))
1131 if (must_annul)
1133 /* Frame related instructions cannot go into annulled delay
1134 slots, it messes up the dwarf info. */
1135 if (RTX_FRAME_RELATED_P (trial))
1136 return;
1137 used_annul = 1;
1139 rtx_insn *temp = copy_delay_slot_insn (trial);
1140 INSN_FROM_TARGET_P (temp) = 1;
1141 add_to_delay_list (temp, &new_delay_list);
1142 total_slots_filled++;
1144 if (--slots_remaining == 0)
1145 break;
1147 else
1148 return;
1151 /* Record the effect of the instructions that were redundant and which
1152 we therefore decided not to copy. */
1153 for (i = 1; i < seq->len (); i++)
1154 if (redundant[i])
1155 update_block (seq->insn (i), insn);
1157 /* Show the place to which we will be branching. */
1158 *pnew_thread = first_active_target_insn (JUMP_LABEL (seq->insn (0)));
1160 /* Add any new insns to the delay list and update the count of the
1161 number of slots filled. */
1162 *pslots_filled = total_slots_filled;
1163 if (used_annul)
1164 *pannul_p = 1;
1166 rtx_insn *temp;
1167 FOR_EACH_VEC_ELT (new_delay_list, i, temp)
1168 add_to_delay_list (temp, delay_list);
1171 /* Similar to steal_delay_list_from_target except that SEQ is on the
1172 fallthrough path of INSN. Here we only do something if the delay insn
1173 of SEQ is an unconditional branch. In that case we steal its delay slot
1174 for INSN since unconditional branches are much easier to fill. */
1176 static void
1177 steal_delay_list_from_fallthrough (rtx_insn *insn, rtx condition,
1178 rtx_sequence *seq,
1179 vec<rtx_insn *> *delay_list,
1180 struct resources *sets,
1181 struct resources *needed,
1182 struct resources *other_needed,
1183 int slots_to_fill, int *pslots_filled,
1184 int *pannul_p)
1186 int i;
1187 int flags;
1188 int must_annul = *pannul_p;
1189 int used_annul = 0;
1191 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1193 /* We can't do anything if SEQ's delay insn isn't an
1194 unconditional branch. */
1196 if (! simplejump_or_return_p (seq->insn (0)))
1197 return;
1199 for (i = 1; i < seq->len (); i++)
1201 rtx_insn *trial = seq->insn (i);
1203 /* If TRIAL sets CC0, stealing it will move it too far from the use
1204 of CC0. */
1205 if (insn_references_resource_p (trial, sets, false)
1206 || insn_sets_resource_p (trial, needed, false)
1207 || insn_sets_resource_p (trial, sets, false)
1208 || (HAVE_cc0 && sets_cc0_p (PATTERN (trial))))
1210 break;
1212 /* If this insn was already done, we don't need it. */
1213 if (redundant_insn (trial, insn, *delay_list))
1215 update_block (trial, insn);
1216 delete_from_delay_slot (trial);
1217 continue;
1220 if (! must_annul
1221 && ((condition == const_true_rtx
1222 || (! insn_sets_resource_p (trial, other_needed, false)
1223 && ! may_trap_or_fault_p (PATTERN (trial)))))
1224 ? eligible_for_delay (insn, *pslots_filled, trial, flags)
1225 : (must_annul || delay_list->is_empty ()) && (must_annul = 1,
1226 check_annul_list_true_false (1, *delay_list)
1227 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
1229 if (must_annul)
1230 used_annul = 1;
1231 delete_from_delay_slot (trial);
1232 add_to_delay_list (trial, delay_list);
1234 if (++(*pslots_filled) == slots_to_fill)
1235 break;
1237 else
1238 break;
1241 if (used_annul)
1242 *pannul_p = 1;
1245 /* Try merging insns starting at THREAD which match exactly the insns in
1246 INSN's delay list.
1248 If all insns were matched and the insn was previously annulling, the
1249 annul bit will be cleared.
1251 For each insn that is merged, if the branch is or will be non-annulling,
1252 we delete the merged insn. */
1254 static void
1255 try_merge_delay_insns (rtx_insn *insn, rtx_insn *thread)
1257 rtx_insn *trial, *next_trial;
1258 rtx_insn *delay_insn = as_a <rtx_insn *> (XVECEXP (PATTERN (insn), 0, 0));
1259 int annul_p = JUMP_P (delay_insn) && INSN_ANNULLED_BRANCH_P (delay_insn);
1260 int slot_number = 1;
1261 int num_slots = XVECLEN (PATTERN (insn), 0);
1262 rtx next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1263 struct resources set, needed, modified;
1264 auto_vec<std::pair<rtx_insn *, bool>, 10> merged_insns;
1265 int flags;
1267 flags = get_jump_flags (delay_insn, JUMP_LABEL (delay_insn));
1269 CLEAR_RESOURCE (&needed);
1270 CLEAR_RESOURCE (&set);
1272 /* If this is not an annulling branch, take into account anything needed in
1273 INSN's delay slot. This prevents two increments from being incorrectly
1274 folded into one. If we are annulling, this would be the correct
1275 thing to do. (The alternative, looking at things set in NEXT_TO_MATCH
1276 will essentially disable this optimization. This method is somewhat of
1277 a kludge, but I don't see a better way.) */
1278 if (! annul_p)
1279 for (int i = 1; i < num_slots; i++)
1280 if (XVECEXP (PATTERN (insn), 0, i))
1281 mark_referenced_resources (XVECEXP (PATTERN (insn), 0, i), &needed,
1282 true);
1284 for (trial = thread; !stop_search_p (trial, 1); trial = next_trial)
1286 rtx pat = PATTERN (trial);
1287 rtx oldtrial = trial;
1289 next_trial = next_nonnote_insn (trial);
1291 /* TRIAL must be a CALL_INSN or INSN. Skip USE and CLOBBER. */
1292 if (NONJUMP_INSN_P (trial)
1293 && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER))
1294 continue;
1296 if (GET_CODE (next_to_match) == GET_CODE (trial)
1297 /* We can't share an insn that sets cc0. */
1298 && (!HAVE_cc0 || ! sets_cc0_p (pat))
1299 && ! insn_references_resource_p (trial, &set, true)
1300 && ! insn_sets_resource_p (trial, &set, true)
1301 && ! insn_sets_resource_p (trial, &needed, true)
1302 && (trial = try_split (pat, trial, 0)) != 0
1303 /* Update next_trial, in case try_split succeeded. */
1304 && (next_trial = next_nonnote_insn (trial))
1305 /* Likewise THREAD. */
1306 && (thread = oldtrial == thread ? trial : thread)
1307 && rtx_equal_p (PATTERN (next_to_match), PATTERN (trial))
1308 /* Have to test this condition if annul condition is different
1309 from (and less restrictive than) non-annulling one. */
1310 && eligible_for_delay (delay_insn, slot_number - 1, trial, flags))
1313 if (! annul_p)
1315 update_block (trial, thread);
1316 if (trial == thread)
1317 thread = next_active_insn (thread);
1319 delete_related_insns (trial);
1320 INSN_FROM_TARGET_P (next_to_match) = 0;
1322 else
1323 merged_insns.safe_push (std::pair<rtx_insn *, bool> (trial, false));
1325 if (++slot_number == num_slots)
1326 break;
1328 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1331 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
1332 mark_referenced_resources (trial, &needed, true);
1335 /* See if we stopped on a filled insn. If we did, try to see if its
1336 delay slots match. */
1337 if (slot_number != num_slots
1338 && trial && NONJUMP_INSN_P (trial)
1339 && GET_CODE (PATTERN (trial)) == SEQUENCE
1340 && !(JUMP_P (XVECEXP (PATTERN (trial), 0, 0))
1341 && INSN_ANNULLED_BRANCH_P (XVECEXP (PATTERN (trial), 0, 0))))
1343 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (trial));
1344 rtx filled_insn = XVECEXP (pat, 0, 0);
1346 /* Account for resources set/needed by the filled insn. */
1347 mark_set_resources (filled_insn, &set, 0, MARK_SRC_DEST_CALL);
1348 mark_referenced_resources (filled_insn, &needed, true);
1350 for (int i = 1; i < pat->len (); i++)
1352 rtx_insn *dtrial = pat->insn (i);
1354 CLEAR_RESOURCE (&modified);
1355 /* Account for resources set by the insn following NEXT_TO_MATCH
1356 inside INSN's delay list. */
1357 for (int j = 1; slot_number + j < num_slots; j++)
1358 mark_set_resources (XVECEXP (PATTERN (insn), 0, slot_number + j),
1359 &modified, 0, MARK_SRC_DEST_CALL);
1360 /* Account for resources set by the insn before DTRIAL and inside
1361 TRIAL's delay list. */
1362 for (int j = 1; j < i; j++)
1363 mark_set_resources (XVECEXP (pat, 0, j),
1364 &modified, 0, MARK_SRC_DEST_CALL);
1365 if (! insn_references_resource_p (dtrial, &set, true)
1366 && ! insn_sets_resource_p (dtrial, &set, true)
1367 && ! insn_sets_resource_p (dtrial, &needed, true)
1368 && (!HAVE_cc0 || ! sets_cc0_p (PATTERN (dtrial)))
1369 && rtx_equal_p (PATTERN (next_to_match), PATTERN (dtrial))
1370 /* Check that DTRIAL and NEXT_TO_MATCH does not reference a
1371 resource modified between them (only dtrial is checked because
1372 next_to_match and dtrial shall to be equal in order to hit
1373 this line) */
1374 && ! insn_references_resource_p (dtrial, &modified, true)
1375 && eligible_for_delay (delay_insn, slot_number - 1, dtrial, flags))
1377 if (! annul_p)
1379 rtx_insn *new_rtx;
1381 update_block (dtrial, thread);
1382 new_rtx = delete_from_delay_slot (dtrial);
1383 if (thread->deleted ())
1384 thread = new_rtx;
1385 INSN_FROM_TARGET_P (next_to_match) = 0;
1387 else
1388 merged_insns.safe_push (std::pair<rtx_insn *, bool> (dtrial,
1389 true));
1391 if (++slot_number == num_slots)
1392 break;
1394 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1396 else
1398 /* Keep track of the set/referenced resources for the delay
1399 slots of any trial insns we encounter. */
1400 mark_set_resources (dtrial, &set, 0, MARK_SRC_DEST_CALL);
1401 mark_referenced_resources (dtrial, &needed, true);
1406 /* If all insns in the delay slot have been matched and we were previously
1407 annulling the branch, we need not any more. In that case delete all the
1408 merged insns. Also clear the INSN_FROM_TARGET_P bit of each insn in
1409 the delay list so that we know that it isn't only being used at the
1410 target. */
1411 if (slot_number == num_slots && annul_p)
1413 unsigned int len = merged_insns.length ();
1414 for (unsigned int i = len - 1; i < len; i--)
1415 if (merged_insns[i].second)
1417 update_block (merged_insns[i].first, thread);
1418 rtx_insn *new_rtx = delete_from_delay_slot (merged_insns[i].first);
1419 if (thread->deleted ())
1420 thread = new_rtx;
1422 else
1424 update_block (merged_insns[i].first, thread);
1425 delete_related_insns (merged_insns[i].first);
1428 INSN_ANNULLED_BRANCH_P (delay_insn) = 0;
1430 for (int i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1431 INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i)) = 0;
1435 /* See if INSN is redundant with an insn in front of TARGET. Often this
1436 is called when INSN is a candidate for a delay slot of TARGET.
1437 DELAY_LIST are insns that will be placed in delay slots of TARGET in front
1438 of INSN. Often INSN will be redundant with an insn in a delay slot of
1439 some previous insn. This happens when we have a series of branches to the
1440 same label; in that case the first insn at the target might want to go
1441 into each of the delay slots.
1443 If we are not careful, this routine can take up a significant fraction
1444 of the total compilation time (4%), but only wins rarely. Hence we
1445 speed this routine up by making two passes. The first pass goes back
1446 until it hits a label and sees if it finds an insn with an identical
1447 pattern. Only in this (relatively rare) event does it check for
1448 data conflicts.
1450 We do not split insns we encounter. This could cause us not to find a
1451 redundant insn, but the cost of splitting seems greater than the possible
1452 gain in rare cases. */
1454 static rtx_insn *
1455 redundant_insn (rtx insn, rtx_insn *target, const vec<rtx_insn *> &delay_list)
1457 rtx target_main = target;
1458 rtx ipat = PATTERN (insn);
1459 rtx_insn *trial;
1460 rtx pat;
1461 struct resources needed, set;
1462 int i;
1463 unsigned insns_to_search;
1465 /* If INSN has any REG_UNUSED notes, it can't match anything since we
1466 are allowed to not actually assign to such a register. */
1467 if (find_reg_note (insn, REG_UNUSED, NULL_RTX) != 0)
1468 return 0;
1470 /* Scan backwards looking for a match. */
1471 for (trial = PREV_INSN (target),
1472 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1473 trial && insns_to_search > 0;
1474 trial = PREV_INSN (trial))
1476 /* (use (insn))s can come immediately after a barrier if the
1477 label that used to precede them has been deleted as dead.
1478 See delete_related_insns. */
1479 if (LABEL_P (trial) || BARRIER_P (trial))
1480 return 0;
1482 if (!INSN_P (trial))
1483 continue;
1484 --insns_to_search;
1486 pat = PATTERN (trial);
1487 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1488 continue;
1490 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1492 /* Stop for a CALL and its delay slots because it is difficult to
1493 track its resource needs correctly. */
1494 if (CALL_P (seq->element (0)))
1495 return 0;
1497 /* Stop for an INSN or JUMP_INSN with delayed effects and its delay
1498 slots because it is difficult to track its resource needs
1499 correctly. */
1501 if (INSN_SETS_ARE_DELAYED (seq->insn (0)))
1502 return 0;
1504 if (INSN_REFERENCES_ARE_DELAYED (seq->insn (0)))
1505 return 0;
1507 /* See if any of the insns in the delay slot match, updating
1508 resource requirements as we go. */
1509 for (i = seq->len () - 1; i > 0; i--)
1510 if (GET_CODE (seq->element (i)) == GET_CODE (insn)
1511 && rtx_equal_p (PATTERN (seq->element (i)), ipat)
1512 && ! find_reg_note (seq->element (i), REG_UNUSED, NULL_RTX))
1513 break;
1515 /* If found a match, exit this loop early. */
1516 if (i > 0)
1517 break;
1520 else if (GET_CODE (trial) == GET_CODE (insn) && rtx_equal_p (pat, ipat)
1521 && ! find_reg_note (trial, REG_UNUSED, NULL_RTX))
1522 break;
1525 /* If we didn't find an insn that matches, return 0. */
1526 if (trial == 0)
1527 return 0;
1529 /* See what resources this insn sets and needs. If they overlap, or
1530 if this insn references CC0, it can't be redundant. */
1532 CLEAR_RESOURCE (&needed);
1533 CLEAR_RESOURCE (&set);
1534 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
1535 mark_referenced_resources (insn, &needed, true);
1537 /* If TARGET is a SEQUENCE, get the main insn. */
1538 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1539 target_main = XVECEXP (PATTERN (target), 0, 0);
1541 if (resource_conflicts_p (&needed, &set)
1542 || (HAVE_cc0 && reg_mentioned_p (cc0_rtx, ipat))
1543 /* The insn requiring the delay may not set anything needed or set by
1544 INSN. */
1545 || insn_sets_resource_p (target_main, &needed, true)
1546 || insn_sets_resource_p (target_main, &set, true))
1547 return 0;
1549 /* Insns we pass may not set either NEEDED or SET, so merge them for
1550 simpler tests. */
1551 needed.memory |= set.memory;
1552 IOR_HARD_REG_SET (needed.regs, set.regs);
1554 /* This insn isn't redundant if it conflicts with an insn that either is
1555 or will be in a delay slot of TARGET. */
1557 unsigned int j;
1558 rtx_insn *temp;
1559 FOR_EACH_VEC_ELT (delay_list, j, temp)
1560 if (insn_sets_resource_p (temp, &needed, true))
1561 return 0;
1563 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1564 for (i = 1; i < XVECLEN (PATTERN (target), 0); i++)
1565 if (insn_sets_resource_p (XVECEXP (PATTERN (target), 0, i), &needed,
1566 true))
1567 return 0;
1569 /* Scan backwards until we reach a label or an insn that uses something
1570 INSN sets or sets something insn uses or sets. */
1572 for (trial = PREV_INSN (target),
1573 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1574 trial && !LABEL_P (trial) && insns_to_search > 0;
1575 trial = PREV_INSN (trial))
1577 if (!INSN_P (trial))
1578 continue;
1579 --insns_to_search;
1581 pat = PATTERN (trial);
1582 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1583 continue;
1585 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1587 bool annul_p = false;
1588 rtx_insn *control = seq->insn (0);
1590 /* If this is a CALL_INSN and its delay slots, it is hard to track
1591 the resource needs properly, so give up. */
1592 if (CALL_P (control))
1593 return 0;
1595 /* If this is an INSN or JUMP_INSN with delayed effects, it
1596 is hard to track the resource needs properly, so give up. */
1598 if (INSN_SETS_ARE_DELAYED (control))
1599 return 0;
1601 if (INSN_REFERENCES_ARE_DELAYED (control))
1602 return 0;
1604 if (JUMP_P (control))
1605 annul_p = INSN_ANNULLED_BRANCH_P (control);
1607 /* See if any of the insns in the delay slot match, updating
1608 resource requirements as we go. */
1609 for (i = seq->len () - 1; i > 0; i--)
1611 rtx_insn *candidate = seq->insn (i);
1613 /* If an insn will be annulled if the branch is false, it isn't
1614 considered as a possible duplicate insn. */
1615 if (rtx_equal_p (PATTERN (candidate), ipat)
1616 && ! (annul_p && INSN_FROM_TARGET_P (candidate)))
1618 /* Show that this insn will be used in the sequel. */
1619 INSN_FROM_TARGET_P (candidate) = 0;
1620 return candidate;
1623 /* Unless this is an annulled insn from the target of a branch,
1624 we must stop if it sets anything needed or set by INSN. */
1625 if ((!annul_p || !INSN_FROM_TARGET_P (candidate))
1626 && insn_sets_resource_p (candidate, &needed, true))
1627 return 0;
1630 /* If the insn requiring the delay slot conflicts with INSN, we
1631 must stop. */
1632 if (insn_sets_resource_p (control, &needed, true))
1633 return 0;
1635 else
1637 /* See if TRIAL is the same as INSN. */
1638 pat = PATTERN (trial);
1639 if (rtx_equal_p (pat, ipat))
1640 return trial;
1642 /* Can't go any further if TRIAL conflicts with INSN. */
1643 if (insn_sets_resource_p (trial, &needed, true))
1644 return 0;
1648 return 0;
1651 /* Return 1 if THREAD can only be executed in one way. If LABEL is nonzero,
1652 it is the target of the branch insn being scanned. If ALLOW_FALLTHROUGH
1653 is nonzero, we are allowed to fall into this thread; otherwise, we are
1654 not.
1656 If LABEL is used more than one or we pass a label other than LABEL before
1657 finding an active insn, we do not own this thread. */
1659 static int
1660 own_thread_p (rtx thread, rtx label, int allow_fallthrough)
1662 rtx_insn *active_insn;
1663 rtx_insn *insn;
1665 /* We don't own the function end. */
1666 if (thread == 0 || ANY_RETURN_P (thread))
1667 return 0;
1669 /* We have a non-NULL insn. */
1670 rtx_insn *thread_insn = as_a <rtx_insn *> (thread);
1672 /* Get the first active insn, or THREAD_INSN, if it is an active insn. */
1673 active_insn = next_active_insn (PREV_INSN (thread_insn));
1675 for (insn = thread_insn; insn != active_insn; insn = NEXT_INSN (insn))
1676 if (LABEL_P (insn)
1677 && (insn != label || LABEL_NUSES (insn) != 1))
1678 return 0;
1680 if (allow_fallthrough)
1681 return 1;
1683 /* Ensure that we reach a BARRIER before any insn or label. */
1684 for (insn = prev_nonnote_insn (thread_insn);
1685 insn == 0 || !BARRIER_P (insn);
1686 insn = prev_nonnote_insn (insn))
1687 if (insn == 0
1688 || LABEL_P (insn)
1689 || (NONJUMP_INSN_P (insn)
1690 && GET_CODE (PATTERN (insn)) != USE
1691 && GET_CODE (PATTERN (insn)) != CLOBBER))
1692 return 0;
1694 return 1;
1697 /* Called when INSN is being moved from a location near the target of a jump.
1698 We leave a marker of the form (use (INSN)) immediately in front of WHERE
1699 for mark_target_live_regs. These markers will be deleted at the end.
1701 We used to try to update the live status of registers if WHERE is at
1702 the start of a basic block, but that can't work since we may remove a
1703 BARRIER in relax_delay_slots. */
1705 static void
1706 update_block (rtx_insn *insn, rtx_insn *where)
1708 emit_insn_before (gen_rtx_USE (VOIDmode, insn), where);
1710 /* INSN might be making a value live in a block where it didn't use to
1711 be. So recompute liveness information for this block. */
1712 incr_ticks_for_insn (insn);
1715 /* Similar to REDIRECT_JUMP except that we update the BB_TICKS entry for
1716 the basic block containing the jump. */
1718 static int
1719 reorg_redirect_jump (rtx_jump_insn *jump, rtx nlabel)
1721 incr_ticks_for_insn (jump);
1722 return redirect_jump (jump, nlabel, 1);
1725 /* Called when INSN is being moved forward into a delay slot of DELAYED_INSN.
1726 We check every instruction between INSN and DELAYED_INSN for REG_DEAD notes
1727 that reference values used in INSN. If we find one, then we move the
1728 REG_DEAD note to INSN.
1730 This is needed to handle the case where a later insn (after INSN) has a
1731 REG_DEAD note for a register used by INSN, and this later insn subsequently
1732 gets moved before a CODE_LABEL because it is a redundant insn. In this
1733 case, mark_target_live_regs may be confused into thinking the register
1734 is dead because it sees a REG_DEAD note immediately before a CODE_LABEL. */
1736 static void
1737 update_reg_dead_notes (rtx_insn *insn, rtx_insn *delayed_insn)
1739 rtx link, next;
1740 rtx_insn *p;
1742 for (p = next_nonnote_insn (insn); p != delayed_insn;
1743 p = next_nonnote_insn (p))
1744 for (link = REG_NOTES (p); link; link = next)
1746 next = XEXP (link, 1);
1748 if (REG_NOTE_KIND (link) != REG_DEAD
1749 || !REG_P (XEXP (link, 0)))
1750 continue;
1752 if (reg_referenced_p (XEXP (link, 0), PATTERN (insn)))
1754 /* Move the REG_DEAD note from P to INSN. */
1755 remove_note (p, link);
1756 XEXP (link, 1) = REG_NOTES (insn);
1757 REG_NOTES (insn) = link;
1762 /* Called when an insn redundant with start_insn is deleted. If there
1763 is a REG_DEAD note for the target of start_insn between start_insn
1764 and stop_insn, then the REG_DEAD note needs to be deleted since the
1765 value no longer dies there.
1767 If the REG_DEAD note isn't deleted, then mark_target_live_regs may be
1768 confused into thinking the register is dead. */
1770 static void
1771 fix_reg_dead_note (rtx_insn *start_insn, rtx stop_insn)
1773 rtx link, next;
1774 rtx_insn *p;
1776 for (p = next_nonnote_insn (start_insn); p != stop_insn;
1777 p = next_nonnote_insn (p))
1778 for (link = REG_NOTES (p); link; link = next)
1780 next = XEXP (link, 1);
1782 if (REG_NOTE_KIND (link) != REG_DEAD
1783 || !REG_P (XEXP (link, 0)))
1784 continue;
1786 if (reg_set_p (XEXP (link, 0), PATTERN (start_insn)))
1788 remove_note (p, link);
1789 return;
1794 /* Delete any REG_UNUSED notes that exist on INSN but not on REDUNDANT_INSN.
1796 This handles the case of udivmodXi4 instructions which optimize their
1797 output depending on whether any REG_UNUSED notes are present.
1798 we must make sure that INSN calculates as many results as REDUNDANT_INSN
1799 does. */
1801 static void
1802 update_reg_unused_notes (rtx_insn *insn, rtx redundant_insn)
1804 rtx link, next;
1806 for (link = REG_NOTES (insn); link; link = next)
1808 next = XEXP (link, 1);
1810 if (REG_NOTE_KIND (link) != REG_UNUSED
1811 || !REG_P (XEXP (link, 0)))
1812 continue;
1814 if (! find_regno_note (redundant_insn, REG_UNUSED,
1815 REGNO (XEXP (link, 0))))
1816 remove_note (insn, link);
1820 static vec <rtx> sibling_labels;
1822 /* Return the label before INSN, or put a new label there. If SIBLING is
1823 non-zero, it is another label associated with the new label (if any),
1824 typically the former target of the jump that will be redirected to
1825 the new label. */
1827 static rtx_insn *
1828 get_label_before (rtx_insn *insn, rtx sibling)
1830 rtx_insn *label;
1832 /* Find an existing label at this point
1833 or make a new one if there is none. */
1834 label = prev_nonnote_insn (insn);
1836 if (label == 0 || !LABEL_P (label))
1838 rtx_insn *prev = PREV_INSN (insn);
1840 label = gen_label_rtx ();
1841 emit_label_after (label, prev);
1842 LABEL_NUSES (label) = 0;
1843 if (sibling)
1845 sibling_labels.safe_push (label);
1846 sibling_labels.safe_push (sibling);
1849 return label;
1852 /* Scan a function looking for insns that need a delay slot and find insns to
1853 put into the delay slot.
1855 NON_JUMPS_P is nonzero if we are to only try to fill non-jump insns (such
1856 as calls). We do these first since we don't want jump insns (that are
1857 easier to fill) to get the only insns that could be used for non-jump insns.
1858 When it is zero, only try to fill JUMP_INSNs.
1860 When slots are filled in this manner, the insns (including the
1861 delay_insn) are put together in a SEQUENCE rtx. In this fashion,
1862 it is possible to tell whether a delay slot has really been filled
1863 or not. `final' knows how to deal with this, by communicating
1864 through FINAL_SEQUENCE. */
1866 static void
1867 fill_simple_delay_slots (int non_jumps_p)
1869 rtx_insn *insn, *trial, *next_trial;
1870 rtx pat;
1871 int i;
1872 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
1873 struct resources needed, set;
1874 int slots_to_fill, slots_filled;
1875 auto_vec<rtx_insn *, 5> delay_list;
1877 for (i = 0; i < num_unfilled_slots; i++)
1879 int flags;
1880 /* Get the next insn to fill. If it has already had any slots assigned,
1881 we can't do anything with it. Maybe we'll improve this later. */
1883 insn = unfilled_slots_base[i];
1884 if (insn == 0
1885 || insn->deleted ()
1886 || (NONJUMP_INSN_P (insn)
1887 && GET_CODE (PATTERN (insn)) == SEQUENCE)
1888 || (JUMP_P (insn) && non_jumps_p)
1889 || (!JUMP_P (insn) && ! non_jumps_p))
1890 continue;
1892 /* It may have been that this insn used to need delay slots, but
1893 now doesn't; ignore in that case. This can happen, for example,
1894 on the HP PA RISC, where the number of delay slots depends on
1895 what insns are nearby. */
1896 slots_to_fill = num_delay_slots (insn);
1898 /* Some machine description have defined instructions to have
1899 delay slots only in certain circumstances which may depend on
1900 nearby insns (which change due to reorg's actions).
1902 For example, the PA port normally has delay slots for unconditional
1903 jumps.
1905 However, the PA port claims such jumps do not have a delay slot
1906 if they are immediate successors of certain CALL_INSNs. This
1907 allows the port to favor filling the delay slot of the call with
1908 the unconditional jump. */
1909 if (slots_to_fill == 0)
1910 continue;
1912 /* This insn needs, or can use, some delay slots. SLOTS_TO_FILL
1913 says how many. After initialization, first try optimizing
1915 call _foo call _foo
1916 nop add %o7,.-L1,%o7
1917 b,a L1
1920 If this case applies, the delay slot of the call is filled with
1921 the unconditional jump. This is done first to avoid having the
1922 delay slot of the call filled in the backward scan. Also, since
1923 the unconditional jump is likely to also have a delay slot, that
1924 insn must exist when it is subsequently scanned.
1926 This is tried on each insn with delay slots as some machines
1927 have insns which perform calls, but are not represented as
1928 CALL_INSNs. */
1930 slots_filled = 0;
1931 delay_list.truncate (0);
1933 if (JUMP_P (insn))
1934 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1935 else
1936 flags = get_jump_flags (insn, NULL_RTX);
1938 if ((trial = next_active_insn (insn))
1939 && JUMP_P (trial)
1940 && simplejump_p (trial)
1941 && eligible_for_delay (insn, slots_filled, trial, flags)
1942 && no_labels_between_p (insn, trial)
1943 && ! can_throw_internal (trial))
1945 rtx_insn **tmp;
1946 slots_filled++;
1947 add_to_delay_list (trial, &delay_list);
1949 /* TRIAL may have had its delay slot filled, then unfilled. When
1950 the delay slot is unfilled, TRIAL is placed back on the unfilled
1951 slots obstack. Unfortunately, it is placed on the end of the
1952 obstack, not in its original location. Therefore, we must search
1953 from entry i + 1 to the end of the unfilled slots obstack to
1954 try and find TRIAL. */
1955 tmp = &unfilled_slots_base[i + 1];
1956 while (*tmp != trial && tmp != unfilled_slots_next)
1957 tmp++;
1959 /* Remove the unconditional jump from consideration for delay slot
1960 filling and unthread it. */
1961 if (*tmp == trial)
1962 *tmp = 0;
1964 rtx_insn *next = NEXT_INSN (trial);
1965 rtx_insn *prev = PREV_INSN (trial);
1966 if (prev)
1967 SET_NEXT_INSN (prev) = next;
1968 if (next)
1969 SET_PREV_INSN (next) = prev;
1973 /* Now, scan backwards from the insn to search for a potential
1974 delay-slot candidate. Stop searching when a label or jump is hit.
1976 For each candidate, if it is to go into the delay slot (moved
1977 forward in execution sequence), it must not need or set any resources
1978 that were set by later insns and must not set any resources that
1979 are needed for those insns.
1981 The delay slot insn itself sets resources unless it is a call
1982 (in which case the called routine, not the insn itself, is doing
1983 the setting). */
1985 if (slots_filled < slots_to_fill)
1987 /* If the flags register is dead after the insn, then we want to be
1988 able to accept a candidate that clobbers it. For this purpose,
1989 we need to filter the flags register during life analysis, so
1990 that it doesn't create RAW and WAW dependencies, while still
1991 creating the necessary WAR dependencies. */
1992 bool filter_flags
1993 = (slots_to_fill == 1
1994 && targetm.flags_regnum != INVALID_REGNUM
1995 && find_regno_note (insn, REG_DEAD, targetm.flags_regnum));
1996 struct resources fset;
1997 CLEAR_RESOURCE (&needed);
1998 CLEAR_RESOURCE (&set);
1999 mark_set_resources (insn, &set, 0, MARK_SRC_DEST);
2000 if (filter_flags)
2002 CLEAR_RESOURCE (&fset);
2003 mark_set_resources (insn, &fset, 0, MARK_SRC_DEST);
2005 mark_referenced_resources (insn, &needed, false);
2007 for (trial = prev_nonnote_insn (insn); ! stop_search_p (trial, 1);
2008 trial = next_trial)
2010 next_trial = prev_nonnote_insn (trial);
2012 /* This must be an INSN or CALL_INSN. */
2013 pat = PATTERN (trial);
2015 /* Stand-alone USE and CLOBBER are just for flow. */
2016 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2017 continue;
2019 /* Check for resource conflict first, to avoid unnecessary
2020 splitting. */
2021 if (! insn_references_resource_p (trial, &set, true)
2022 && ! insn_sets_resource_p (trial,
2023 filter_flags ? &fset : &set,
2024 true)
2025 && ! insn_sets_resource_p (trial, &needed, true)
2026 /* Can't separate set of cc0 from its use. */
2027 && (!HAVE_cc0 || ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat)))
2028 && ! can_throw_internal (trial))
2030 trial = try_split (pat, trial, 1);
2031 next_trial = prev_nonnote_insn (trial);
2032 if (eligible_for_delay (insn, slots_filled, trial, flags))
2034 /* In this case, we are searching backward, so if we
2035 find insns to put on the delay list, we want
2036 to put them at the head, rather than the
2037 tail, of the list. */
2039 update_reg_dead_notes (trial, insn);
2040 delay_list.safe_insert (0, trial);
2041 update_block (trial, trial);
2042 delete_related_insns (trial);
2043 if (slots_to_fill == ++slots_filled)
2044 break;
2045 continue;
2049 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2050 if (filter_flags)
2052 mark_set_resources (trial, &fset, 0, MARK_SRC_DEST_CALL);
2053 /* If the flags register is set, then it doesn't create RAW
2054 dependencies any longer and it also doesn't create WAW
2055 dependencies since it's dead after the original insn. */
2056 if (TEST_HARD_REG_BIT (fset.regs, targetm.flags_regnum))
2058 CLEAR_HARD_REG_BIT (needed.regs, targetm.flags_regnum);
2059 CLEAR_HARD_REG_BIT (fset.regs, targetm.flags_regnum);
2062 mark_referenced_resources (trial, &needed, true);
2066 /* If all needed slots haven't been filled, we come here. */
2068 /* Try to optimize case of jumping around a single insn. */
2069 if ((ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
2070 && slots_filled != slots_to_fill
2071 && delay_list.is_empty ()
2072 && JUMP_P (insn)
2073 && (condjump_p (insn) || condjump_in_parallel_p (insn))
2074 && !ANY_RETURN_P (JUMP_LABEL (insn)))
2076 optimize_skip (as_a <rtx_jump_insn *> (insn), &delay_list);
2077 if (!delay_list.is_empty ())
2078 slots_filled += 1;
2081 /* Try to get insns from beyond the insn needing the delay slot.
2082 These insns can neither set or reference resources set in insns being
2083 skipped, cannot set resources in the insn being skipped, and, if this
2084 is a CALL_INSN (or a CALL_INSN is passed), cannot trap (because the
2085 call might not return).
2087 There used to be code which continued past the target label if
2088 we saw all uses of the target label. This code did not work,
2089 because it failed to account for some instructions which were
2090 both annulled and marked as from the target. This can happen as a
2091 result of optimize_skip. Since this code was redundant with
2092 fill_eager_delay_slots anyways, it was just deleted. */
2094 if (slots_filled != slots_to_fill
2095 /* If this instruction could throw an exception which is
2096 caught in the same function, then it's not safe to fill
2097 the delay slot with an instruction from beyond this
2098 point. For example, consider:
2100 int i = 2;
2102 try {
2103 f();
2104 i = 3;
2105 } catch (...) {}
2107 return i;
2109 Even though `i' is a local variable, we must be sure not
2110 to put `i = 3' in the delay slot if `f' might throw an
2111 exception.
2113 Presumably, we should also check to see if we could get
2114 back to this function via `setjmp'. */
2115 && ! can_throw_internal (insn)
2116 && !JUMP_P (insn))
2118 int maybe_never = 0;
2119 rtx pat, trial_delay;
2121 CLEAR_RESOURCE (&needed);
2122 CLEAR_RESOURCE (&set);
2123 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
2124 mark_referenced_resources (insn, &needed, true);
2126 if (CALL_P (insn))
2127 maybe_never = 1;
2129 for (trial = next_nonnote_insn (insn); !stop_search_p (trial, 1);
2130 trial = next_trial)
2132 next_trial = next_nonnote_insn (trial);
2134 /* This must be an INSN or CALL_INSN. */
2135 pat = PATTERN (trial);
2137 /* Stand-alone USE and CLOBBER are just for flow. */
2138 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2139 continue;
2141 /* If this already has filled delay slots, get the insn needing
2142 the delay slots. */
2143 if (GET_CODE (pat) == SEQUENCE)
2144 trial_delay = XVECEXP (pat, 0, 0);
2145 else
2146 trial_delay = trial;
2148 /* Stop our search when seeing a jump. */
2149 if (JUMP_P (trial_delay))
2150 break;
2152 /* See if we have a resource problem before we try to split. */
2153 if (GET_CODE (pat) != SEQUENCE
2154 && ! insn_references_resource_p (trial, &set, true)
2155 && ! insn_sets_resource_p (trial, &set, true)
2156 && ! insn_sets_resource_p (trial, &needed, true)
2157 && (!HAVE_cc0 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat)))
2158 && ! (maybe_never && may_trap_or_fault_p (pat))
2159 && (trial = try_split (pat, trial, 0))
2160 && eligible_for_delay (insn, slots_filled, trial, flags)
2161 && ! can_throw_internal (trial))
2163 next_trial = next_nonnote_insn (trial);
2164 add_to_delay_list (trial, &delay_list);
2165 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, pat))
2166 link_cc0_insns (trial);
2168 delete_related_insns (trial);
2169 if (slots_to_fill == ++slots_filled)
2170 break;
2171 continue;
2174 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2175 mark_referenced_resources (trial, &needed, true);
2177 /* Ensure we don't put insns between the setting of cc and the
2178 comparison by moving a setting of cc into an earlier delay
2179 slot since these insns could clobber the condition code. */
2180 set.cc = 1;
2182 /* If this is a call, we might not get here. */
2183 if (CALL_P (trial_delay))
2184 maybe_never = 1;
2187 /* If there are slots left to fill and our search was stopped by an
2188 unconditional branch, try the insn at the branch target. We can
2189 redirect the branch if it works.
2191 Don't do this if the insn at the branch target is a branch. */
2192 if (slots_to_fill != slots_filled
2193 && trial
2194 && jump_to_label_p (trial)
2195 && simplejump_p (trial)
2196 && (next_trial = next_active_insn (JUMP_LABEL_AS_INSN (trial))) != 0
2197 && ! (NONJUMP_INSN_P (next_trial)
2198 && GET_CODE (PATTERN (next_trial)) == SEQUENCE)
2199 && !JUMP_P (next_trial)
2200 && ! insn_references_resource_p (next_trial, &set, true)
2201 && ! insn_sets_resource_p (next_trial, &set, true)
2202 && ! insn_sets_resource_p (next_trial, &needed, true)
2203 && (!HAVE_cc0 || ! reg_mentioned_p (cc0_rtx, PATTERN (next_trial)))
2204 && ! (maybe_never && may_trap_or_fault_p (PATTERN (next_trial)))
2205 && (next_trial = try_split (PATTERN (next_trial), next_trial, 0))
2206 && eligible_for_delay (insn, slots_filled, next_trial, flags)
2207 && ! can_throw_internal (trial))
2209 /* See comment in relax_delay_slots about necessity of using
2210 next_real_insn here. */
2211 rtx_insn *new_label = next_real_insn (next_trial);
2213 if (new_label != 0)
2214 new_label = get_label_before (new_label, JUMP_LABEL (trial));
2215 else
2216 new_label = find_end_label (simple_return_rtx);
2218 if (new_label)
2220 add_to_delay_list (copy_delay_slot_insn (next_trial),
2221 &delay_list);
2222 slots_filled++;
2223 reorg_redirect_jump (as_a <rtx_jump_insn *> (trial),
2224 new_label);
2229 /* If this is an unconditional jump, then try to get insns from the
2230 target of the jump. */
2231 rtx_jump_insn *jump_insn;
2232 if ((jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2233 && simplejump_p (jump_insn)
2234 && slots_filled != slots_to_fill)
2235 fill_slots_from_thread (jump_insn, const_true_rtx,
2236 next_active_insn (JUMP_LABEL_AS_INSN (insn)),
2237 NULL, 1, 1, own_thread_p (JUMP_LABEL (insn),
2238 JUMP_LABEL (insn), 0),
2239 slots_to_fill, &slots_filled, &delay_list);
2241 if (!delay_list.is_empty ())
2242 unfilled_slots_base[i]
2243 = emit_delay_sequence (insn, delay_list, slots_filled);
2245 if (slots_to_fill == slots_filled)
2246 unfilled_slots_base[i] = 0;
2248 note_delay_statistics (slots_filled, 0);
2252 /* Follow any unconditional jump at LABEL, for the purpose of redirecting JUMP;
2253 return the ultimate label reached by any such chain of jumps.
2254 Return a suitable return rtx if the chain ultimately leads to a
2255 return instruction.
2256 If LABEL is not followed by a jump, return LABEL.
2257 If the chain loops or we can't find end, return LABEL,
2258 since that tells caller to avoid changing the insn.
2259 If the returned label is obtained by following a crossing jump,
2260 set *CROSSING to true, otherwise set it to false. */
2262 static rtx
2263 follow_jumps (rtx label, rtx_insn *jump, bool *crossing)
2265 rtx_insn *insn;
2266 rtx_insn *next;
2267 int depth;
2269 *crossing = false;
2270 if (ANY_RETURN_P (label))
2271 return label;
2273 rtx_insn *value = as_a <rtx_insn *> (label);
2275 for (depth = 0;
2276 (depth < 10
2277 && (insn = next_active_insn (value)) != 0
2278 && JUMP_P (insn)
2279 && JUMP_LABEL (insn) != NULL_RTX
2280 && ((any_uncondjump_p (insn) && onlyjump_p (insn))
2281 || ANY_RETURN_P (PATTERN (insn)))
2282 && (next = NEXT_INSN (insn))
2283 && BARRIER_P (next));
2284 depth++)
2286 rtx this_label_or_return = JUMP_LABEL (insn);
2288 /* If we have found a cycle, make the insn jump to itself. */
2289 if (this_label_or_return == label)
2290 return label;
2292 /* Cannot follow returns and cannot look through tablejumps. */
2293 if (ANY_RETURN_P (this_label_or_return))
2294 return this_label_or_return;
2296 rtx_insn *this_label = as_a <rtx_insn *> (this_label_or_return);
2297 if (NEXT_INSN (this_label)
2298 && JUMP_TABLE_DATA_P (NEXT_INSN (this_label)))
2299 break;
2301 if (!targetm.can_follow_jump (jump, insn))
2302 break;
2303 if (!*crossing)
2304 *crossing = CROSSING_JUMP_P (jump);
2305 value = this_label;
2307 if (depth == 10)
2308 return label;
2309 return value;
2312 /* Try to find insns to place in delay slots.
2314 INSN is the jump needing SLOTS_TO_FILL delay slots. It tests CONDITION
2315 or is an unconditional branch if CONDITION is const_true_rtx.
2316 *PSLOTS_FILLED is updated with the number of slots that we have filled.
2318 THREAD is a flow-of-control, either the insns to be executed if the
2319 branch is true or if the branch is false, THREAD_IF_TRUE says which.
2321 OPPOSITE_THREAD is the thread in the opposite direction. It is used
2322 to see if any potential delay slot insns set things needed there.
2324 LIKELY is nonzero if it is extremely likely that the branch will be
2325 taken and THREAD_IF_TRUE is set. This is used for the branch at the
2326 end of a loop back up to the top.
2328 OWN_THREAD and OWN_OPPOSITE_THREAD are true if we are the only user of the
2329 thread. I.e., it is the fallthrough code of our jump or the target of the
2330 jump when we are the only jump going there.
2332 If OWN_THREAD is false, it must be the "true" thread of a jump. In that
2333 case, we can only take insns from the head of the thread for our delay
2334 slot. We then adjust the jump to point after the insns we have taken. */
2336 static void
2337 fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
2338 rtx thread_or_return, rtx opposite_thread, int likely,
2339 int thread_if_true, int own_thread, int slots_to_fill,
2340 int *pslots_filled, vec<rtx_insn *> *delay_list)
2342 rtx new_thread;
2343 struct resources opposite_needed, set, needed;
2344 rtx_insn *trial;
2345 int lose = 0;
2346 int must_annul = 0;
2347 int flags;
2349 /* Validate our arguments. */
2350 gcc_assert (condition != const_true_rtx || thread_if_true);
2351 gcc_assert (own_thread || thread_if_true);
2353 flags = get_jump_flags (insn, JUMP_LABEL (insn));
2355 /* If our thread is the end of subroutine, we can't get any delay
2356 insns from that. */
2357 if (thread_or_return == NULL_RTX || ANY_RETURN_P (thread_or_return))
2358 return;
2360 rtx_insn *thread = as_a <rtx_insn *> (thread_or_return);
2362 /* If this is an unconditional branch, nothing is needed at the
2363 opposite thread. Otherwise, compute what is needed there. */
2364 if (condition == const_true_rtx)
2365 CLEAR_RESOURCE (&opposite_needed);
2366 else
2367 mark_target_live_regs (get_insns (), opposite_thread, &opposite_needed);
2369 /* If the insn at THREAD can be split, do it here to avoid having to
2370 update THREAD and NEW_THREAD if it is done in the loop below. Also
2371 initialize NEW_THREAD. */
2373 new_thread = thread = try_split (PATTERN (thread), thread, 0);
2375 /* Scan insns at THREAD. We are looking for an insn that can be removed
2376 from THREAD (it neither sets nor references resources that were set
2377 ahead of it and it doesn't set anything needs by the insns ahead of
2378 it) and that either can be placed in an annulling insn or aren't
2379 needed at OPPOSITE_THREAD. */
2381 CLEAR_RESOURCE (&needed);
2382 CLEAR_RESOURCE (&set);
2384 /* If we do not own this thread, we must stop as soon as we find
2385 something that we can't put in a delay slot, since all we can do
2386 is branch into THREAD at a later point. Therefore, labels stop
2387 the search if this is not the `true' thread. */
2389 for (trial = thread;
2390 ! stop_search_p (trial, ! thread_if_true) && (! lose || own_thread);
2391 trial = next_nonnote_insn (trial))
2393 rtx pat, old_trial;
2395 /* If we have passed a label, we no longer own this thread. */
2396 if (LABEL_P (trial))
2398 own_thread = 0;
2399 continue;
2402 pat = PATTERN (trial);
2403 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2404 continue;
2406 /* If TRIAL conflicts with the insns ahead of it, we lose. Also,
2407 don't separate or copy insns that set and use CC0. */
2408 if (! insn_references_resource_p (trial, &set, true)
2409 && ! insn_sets_resource_p (trial, &set, true)
2410 && ! insn_sets_resource_p (trial, &needed, true)
2411 && (!HAVE_cc0 || (! (reg_mentioned_p (cc0_rtx, pat)
2412 && (! own_thread || ! sets_cc0_p (pat)))))
2413 && ! can_throw_internal (trial))
2415 rtx_insn *prior_insn;
2417 /* If TRIAL is redundant with some insn before INSN, we don't
2418 actually need to add it to the delay list; we can merely pretend
2419 we did. */
2420 if ((prior_insn = redundant_insn (trial, insn, *delay_list)))
2422 fix_reg_dead_note (prior_insn, insn);
2423 if (own_thread)
2425 update_block (trial, thread);
2426 if (trial == thread)
2428 thread = next_active_insn (thread);
2429 if (new_thread == trial)
2430 new_thread = thread;
2433 delete_related_insns (trial);
2435 else
2437 update_reg_unused_notes (prior_insn, trial);
2438 new_thread = next_active_insn (trial);
2441 continue;
2444 /* There are two ways we can win: If TRIAL doesn't set anything
2445 needed at the opposite thread and can't trap, or if it can
2446 go into an annulled delay slot. But we want neither to copy
2447 nor to speculate frame-related insns. */
2448 if (!must_annul
2449 && ((condition == const_true_rtx
2450 && (own_thread || !RTX_FRAME_RELATED_P (trial)))
2451 || (! insn_sets_resource_p (trial, &opposite_needed, true)
2452 && ! may_trap_or_fault_p (pat)
2453 && ! RTX_FRAME_RELATED_P (trial))))
2455 old_trial = trial;
2456 trial = try_split (pat, trial, 0);
2457 if (new_thread == old_trial)
2458 new_thread = trial;
2459 if (thread == old_trial)
2460 thread = trial;
2461 pat = PATTERN (trial);
2462 if (eligible_for_delay (insn, *pslots_filled, trial, flags))
2463 goto winner;
2465 else if (!RTX_FRAME_RELATED_P (trial)
2466 && ((ANNUL_IFTRUE_SLOTS && ! thread_if_true)
2467 || (ANNUL_IFFALSE_SLOTS && thread_if_true)))
2469 old_trial = trial;
2470 trial = try_split (pat, trial, 0);
2471 if (new_thread == old_trial)
2472 new_thread = trial;
2473 if (thread == old_trial)
2474 thread = trial;
2475 pat = PATTERN (trial);
2476 if ((must_annul || delay_list->is_empty ()) && (thread_if_true
2477 ? check_annul_list_true_false (0, *delay_list)
2478 && eligible_for_annul_false (insn, *pslots_filled, trial, flags)
2479 : check_annul_list_true_false (1, *delay_list)
2480 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
2482 rtx_insn *temp;
2484 must_annul = 1;
2485 winner:
2487 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, pat))
2488 link_cc0_insns (trial);
2490 /* If we own this thread, delete the insn. If this is the
2491 destination of a branch, show that a basic block status
2492 may have been updated. In any case, mark the new
2493 starting point of this thread. */
2494 if (own_thread)
2496 rtx note;
2498 update_block (trial, thread);
2499 if (trial == thread)
2501 thread = next_active_insn (thread);
2502 if (new_thread == trial)
2503 new_thread = thread;
2506 /* We are moving this insn, not deleting it. We must
2507 temporarily increment the use count on any referenced
2508 label lest it be deleted by delete_related_insns. */
2509 for (note = REG_NOTES (trial);
2510 note != NULL_RTX;
2511 note = XEXP (note, 1))
2512 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2513 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2515 /* REG_LABEL_OPERAND could be
2516 NOTE_INSN_DELETED_LABEL too. */
2517 if (LABEL_P (XEXP (note, 0)))
2518 LABEL_NUSES (XEXP (note, 0))++;
2519 else
2520 gcc_assert (REG_NOTE_KIND (note)
2521 == REG_LABEL_OPERAND);
2523 if (jump_to_label_p (trial))
2524 LABEL_NUSES (JUMP_LABEL (trial))++;
2526 delete_related_insns (trial);
2528 for (note = REG_NOTES (trial);
2529 note != NULL_RTX;
2530 note = XEXP (note, 1))
2531 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2532 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2534 /* REG_LABEL_OPERAND could be
2535 NOTE_INSN_DELETED_LABEL too. */
2536 if (LABEL_P (XEXP (note, 0)))
2537 LABEL_NUSES (XEXP (note, 0))--;
2538 else
2539 gcc_assert (REG_NOTE_KIND (note)
2540 == REG_LABEL_OPERAND);
2542 if (jump_to_label_p (trial))
2543 LABEL_NUSES (JUMP_LABEL (trial))--;
2545 else
2546 new_thread = next_active_insn (trial);
2548 temp = own_thread ? trial : copy_delay_slot_insn (trial);
2549 if (thread_if_true)
2550 INSN_FROM_TARGET_P (temp) = 1;
2552 add_to_delay_list (temp, delay_list);
2554 if (slots_to_fill == ++(*pslots_filled))
2556 /* Even though we have filled all the slots, we
2557 may be branching to a location that has a
2558 redundant insn. Skip any if so. */
2559 while (new_thread && ! own_thread
2560 && ! insn_sets_resource_p (new_thread, &set, true)
2561 && ! insn_sets_resource_p (new_thread, &needed,
2562 true)
2563 && ! insn_references_resource_p (new_thread,
2564 &set, true)
2565 && (prior_insn
2566 = redundant_insn (new_thread, insn,
2567 *delay_list)))
2569 /* We know we do not own the thread, so no need
2570 to call update_block and delete_insn. */
2571 fix_reg_dead_note (prior_insn, insn);
2572 update_reg_unused_notes (prior_insn, new_thread);
2573 new_thread
2574 = next_active_insn (as_a<rtx_insn *> (new_thread));
2576 break;
2579 continue;
2584 /* This insn can't go into a delay slot. */
2585 lose = 1;
2586 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2587 mark_referenced_resources (trial, &needed, true);
2589 /* Ensure we don't put insns between the setting of cc and the comparison
2590 by moving a setting of cc into an earlier delay slot since these insns
2591 could clobber the condition code. */
2592 set.cc = 1;
2594 /* If this insn is a register-register copy and the next insn has
2595 a use of our destination, change it to use our source. That way,
2596 it will become a candidate for our delay slot the next time
2597 through this loop. This case occurs commonly in loops that
2598 scan a list.
2600 We could check for more complex cases than those tested below,
2601 but it doesn't seem worth it. It might also be a good idea to try
2602 to swap the two insns. That might do better.
2604 We can't do this if the next insn modifies our destination, because
2605 that would make the replacement into the insn invalid. We also can't
2606 do this if it modifies our source, because it might be an earlyclobber
2607 operand. This latter test also prevents updating the contents of
2608 a PRE_INC. We also can't do this if there's overlap of source and
2609 destination. Overlap may happen for larger-than-register-size modes. */
2611 if (NONJUMP_INSN_P (trial) && GET_CODE (pat) == SET
2612 && REG_P (SET_SRC (pat))
2613 && REG_P (SET_DEST (pat))
2614 && !reg_overlap_mentioned_p (SET_DEST (pat), SET_SRC (pat)))
2616 rtx_insn *next = next_nonnote_insn (trial);
2618 if (next && NONJUMP_INSN_P (next)
2619 && GET_CODE (PATTERN (next)) != USE
2620 && ! reg_set_p (SET_DEST (pat), next)
2621 && ! reg_set_p (SET_SRC (pat), next)
2622 && reg_referenced_p (SET_DEST (pat), PATTERN (next))
2623 && ! modified_in_p (SET_DEST (pat), next))
2624 validate_replace_rtx (SET_DEST (pat), SET_SRC (pat), next);
2628 /* If we stopped on a branch insn that has delay slots, see if we can
2629 steal some of the insns in those slots. */
2630 if (trial && NONJUMP_INSN_P (trial)
2631 && GET_CODE (PATTERN (trial)) == SEQUENCE
2632 && JUMP_P (XVECEXP (PATTERN (trial), 0, 0)))
2634 rtx_sequence *sequence = as_a <rtx_sequence *> (PATTERN (trial));
2635 /* If this is the `true' thread, we will want to follow the jump,
2636 so we can only do this if we have taken everything up to here. */
2637 if (thread_if_true && trial == new_thread)
2639 steal_delay_list_from_target (insn, condition, sequence,
2640 delay_list, &set, &needed,
2641 &opposite_needed, slots_to_fill,
2642 pslots_filled, &must_annul,
2643 &new_thread);
2644 /* If we owned the thread and are told that it branched
2645 elsewhere, make sure we own the thread at the new location. */
2646 if (own_thread && trial != new_thread)
2647 own_thread = own_thread_p (new_thread, new_thread, 0);
2649 else if (! thread_if_true)
2650 steal_delay_list_from_fallthrough (insn, condition, sequence,
2651 delay_list, &set, &needed,
2652 &opposite_needed, slots_to_fill,
2653 pslots_filled, &must_annul);
2656 /* If we haven't found anything for this delay slot and it is very
2657 likely that the branch will be taken, see if the insn at our target
2658 increments or decrements a register with an increment that does not
2659 depend on the destination register. If so, try to place the opposite
2660 arithmetic insn after the jump insn and put the arithmetic insn in the
2661 delay slot. If we can't do this, return. */
2662 if (delay_list->is_empty () && likely
2663 && new_thread && !ANY_RETURN_P (new_thread)
2664 && NONJUMP_INSN_P (new_thread)
2665 && !RTX_FRAME_RELATED_P (new_thread)
2666 && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
2667 && asm_noperands (PATTERN (new_thread)) < 0)
2669 rtx pat = PATTERN (new_thread);
2670 rtx dest;
2671 rtx src;
2673 /* We know "new_thread" is an insn due to NONJUMP_INSN_P (new_thread)
2674 above. */
2675 trial = as_a <rtx_insn *> (new_thread);
2676 pat = PATTERN (trial);
2678 if (!NONJUMP_INSN_P (trial)
2679 || GET_CODE (pat) != SET
2680 || ! eligible_for_delay (insn, 0, trial, flags)
2681 || can_throw_internal (trial))
2682 return;
2684 dest = SET_DEST (pat), src = SET_SRC (pat);
2685 if ((GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
2686 && rtx_equal_p (XEXP (src, 0), dest)
2687 && (!FLOAT_MODE_P (GET_MODE (src))
2688 || flag_unsafe_math_optimizations)
2689 && ! reg_overlap_mentioned_p (dest, XEXP (src, 1))
2690 && ! side_effects_p (pat))
2692 rtx other = XEXP (src, 1);
2693 rtx new_arith;
2694 rtx_insn *ninsn;
2696 /* If this is a constant adjustment, use the same code with
2697 the negated constant. Otherwise, reverse the sense of the
2698 arithmetic. */
2699 if (CONST_INT_P (other))
2700 new_arith = gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src), dest,
2701 negate_rtx (GET_MODE (src), other));
2702 else
2703 new_arith = gen_rtx_fmt_ee (GET_CODE (src) == PLUS ? MINUS : PLUS,
2704 GET_MODE (src), dest, other);
2706 ninsn = emit_insn_after (gen_rtx_SET (dest, new_arith), insn);
2708 if (recog_memoized (ninsn) < 0
2709 || (extract_insn (ninsn),
2710 !constrain_operands (1, get_preferred_alternatives (ninsn))))
2712 delete_related_insns (ninsn);
2713 return;
2716 if (own_thread)
2718 update_block (trial, thread);
2719 if (trial == thread)
2721 thread = next_active_insn (thread);
2722 if (new_thread == trial)
2723 new_thread = thread;
2725 delete_related_insns (trial);
2727 else
2728 new_thread = next_active_insn (trial);
2730 ninsn = own_thread ? trial : copy_delay_slot_insn (trial);
2731 if (thread_if_true)
2732 INSN_FROM_TARGET_P (ninsn) = 1;
2734 add_to_delay_list (ninsn, delay_list);
2735 (*pslots_filled)++;
2739 if (!delay_list->is_empty () && must_annul)
2740 INSN_ANNULLED_BRANCH_P (insn) = 1;
2742 /* If we are to branch into the middle of this thread, find an appropriate
2743 label or make a new one if none, and redirect INSN to it. If we hit the
2744 end of the function, use the end-of-function label. */
2745 if (new_thread != thread)
2747 rtx label;
2748 bool crossing = false;
2750 gcc_assert (thread_if_true);
2752 if (new_thread && simplejump_or_return_p (new_thread)
2753 && redirect_with_delay_list_safe_p (insn,
2754 JUMP_LABEL (new_thread),
2755 *delay_list))
2756 new_thread = follow_jumps (JUMP_LABEL (new_thread), insn,
2757 &crossing);
2759 if (ANY_RETURN_P (new_thread))
2760 label = find_end_label (new_thread);
2761 else if (LABEL_P (new_thread))
2762 label = new_thread;
2763 else
2764 label = get_label_before (as_a <rtx_insn *> (new_thread),
2765 JUMP_LABEL (insn));
2767 if (label)
2769 reorg_redirect_jump (insn, label);
2770 if (crossing)
2771 CROSSING_JUMP_P (insn) = 1;
2776 /* Make another attempt to find insns to place in delay slots.
2778 We previously looked for insns located in front of the delay insn
2779 and, for non-jump delay insns, located behind the delay insn.
2781 Here only try to schedule jump insns and try to move insns from either
2782 the target or the following insns into the delay slot. If annulling is
2783 supported, we will be likely to do this. Otherwise, we can do this only
2784 if safe. */
2786 static void
2787 fill_eager_delay_slots (void)
2789 rtx_insn *insn;
2790 int i;
2791 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
2793 for (i = 0; i < num_unfilled_slots; i++)
2795 rtx condition;
2796 rtx target_label, insn_at_target;
2797 rtx_insn *fallthrough_insn;
2798 auto_vec<rtx_insn *, 5> delay_list;
2799 rtx_jump_insn *jump_insn;
2800 int own_target;
2801 int own_fallthrough;
2802 int prediction, slots_to_fill, slots_filled;
2804 insn = unfilled_slots_base[i];
2805 if (insn == 0
2806 || insn->deleted ()
2807 || ! (jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2808 || ! (condjump_p (jump_insn) || condjump_in_parallel_p (jump_insn)))
2809 continue;
2811 slots_to_fill = num_delay_slots (jump_insn);
2812 /* Some machine description have defined instructions to have
2813 delay slots only in certain circumstances which may depend on
2814 nearby insns (which change due to reorg's actions).
2816 For example, the PA port normally has delay slots for unconditional
2817 jumps.
2819 However, the PA port claims such jumps do not have a delay slot
2820 if they are immediate successors of certain CALL_INSNs. This
2821 allows the port to favor filling the delay slot of the call with
2822 the unconditional jump. */
2823 if (slots_to_fill == 0)
2824 continue;
2826 slots_filled = 0;
2827 target_label = JUMP_LABEL (jump_insn);
2828 condition = get_branch_condition (jump_insn, target_label);
2830 if (condition == 0)
2831 continue;
2833 /* Get the next active fallthrough and target insns and see if we own
2834 them. Then see whether the branch is likely true. We don't need
2835 to do a lot of this for unconditional branches. */
2837 insn_at_target = first_active_target_insn (target_label);
2838 own_target = own_thread_p (target_label, target_label, 0);
2840 if (condition == const_true_rtx)
2842 own_fallthrough = 0;
2843 fallthrough_insn = 0;
2844 prediction = 2;
2846 else
2848 fallthrough_insn = next_active_insn (jump_insn);
2849 own_fallthrough = own_thread_p (NEXT_INSN (jump_insn), NULL_RTX, 1);
2850 prediction = mostly_true_jump (jump_insn);
2853 /* If this insn is expected to branch, first try to get insns from our
2854 target, then our fallthrough insns. If it is not expected to branch,
2855 try the other order. */
2857 if (prediction > 0)
2859 fill_slots_from_thread (jump_insn, condition, insn_at_target,
2860 fallthrough_insn, prediction == 2, 1,
2861 own_target,
2862 slots_to_fill, &slots_filled, &delay_list);
2864 if (delay_list.is_empty () && own_fallthrough)
2866 /* Even though we didn't find anything for delay slots,
2867 we might have found a redundant insn which we deleted
2868 from the thread that was filled. So we have to recompute
2869 the next insn at the target. */
2870 target_label = JUMP_LABEL (jump_insn);
2871 insn_at_target = first_active_target_insn (target_label);
2873 fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2874 insn_at_target, 0, 0, own_fallthrough,
2875 slots_to_fill, &slots_filled,
2876 &delay_list);
2879 else
2881 if (own_fallthrough)
2882 fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2883 insn_at_target, 0, 0, own_fallthrough,
2884 slots_to_fill, &slots_filled, &delay_list);
2886 if (delay_list.is_empty ())
2887 fill_slots_from_thread (jump_insn, condition, insn_at_target,
2888 next_active_insn (insn), 0, 1, own_target,
2889 slots_to_fill, &slots_filled, &delay_list);
2892 if (!delay_list.is_empty ())
2893 unfilled_slots_base[i]
2894 = emit_delay_sequence (jump_insn, delay_list, slots_filled);
2896 if (slots_to_fill == slots_filled)
2897 unfilled_slots_base[i] = 0;
2899 note_delay_statistics (slots_filled, 1);
2903 static void delete_computation (rtx_insn *insn);
2905 /* Recursively delete prior insns that compute the value (used only by INSN
2906 which the caller is deleting) stored in the register mentioned by NOTE
2907 which is a REG_DEAD note associated with INSN. */
2909 static void
2910 delete_prior_computation (rtx note, rtx_insn *insn)
2912 rtx_insn *our_prev;
2913 rtx reg = XEXP (note, 0);
2915 for (our_prev = prev_nonnote_insn (insn);
2916 our_prev && (NONJUMP_INSN_P (our_prev)
2917 || CALL_P (our_prev));
2918 our_prev = prev_nonnote_insn (our_prev))
2920 rtx pat = PATTERN (our_prev);
2922 /* If we reach a CALL which is not calling a const function
2923 or the callee pops the arguments, then give up. */
2924 if (CALL_P (our_prev)
2925 && (! RTL_CONST_CALL_P (our_prev)
2926 || GET_CODE (pat) != SET || GET_CODE (SET_SRC (pat)) != CALL))
2927 break;
2929 /* If we reach a SEQUENCE, it is too complex to try to
2930 do anything with it, so give up. We can be run during
2931 and after reorg, so SEQUENCE rtl can legitimately show
2932 up here. */
2933 if (GET_CODE (pat) == SEQUENCE)
2934 break;
2936 if (GET_CODE (pat) == USE
2937 && NONJUMP_INSN_P (XEXP (pat, 0)))
2938 /* reorg creates USEs that look like this. We leave them
2939 alone because reorg needs them for its own purposes. */
2940 break;
2942 if (reg_set_p (reg, pat))
2944 if (side_effects_p (pat) && !CALL_P (our_prev))
2945 break;
2947 if (GET_CODE (pat) == PARALLEL)
2949 /* If we find a SET of something else, we can't
2950 delete the insn. */
2952 int i;
2954 for (i = 0; i < XVECLEN (pat, 0); i++)
2956 rtx part = XVECEXP (pat, 0, i);
2958 if (GET_CODE (part) == SET
2959 && SET_DEST (part) != reg)
2960 break;
2963 if (i == XVECLEN (pat, 0))
2964 delete_computation (our_prev);
2966 else if (GET_CODE (pat) == SET
2967 && REG_P (SET_DEST (pat)))
2969 int dest_regno = REGNO (SET_DEST (pat));
2970 int dest_endregno = END_REGNO (SET_DEST (pat));
2971 int regno = REGNO (reg);
2972 int endregno = END_REGNO (reg);
2974 if (dest_regno >= regno
2975 && dest_endregno <= endregno)
2976 delete_computation (our_prev);
2978 /* We may have a multi-word hard register and some, but not
2979 all, of the words of the register are needed in subsequent
2980 insns. Write REG_UNUSED notes for those parts that were not
2981 needed. */
2982 else if (dest_regno <= regno
2983 && dest_endregno >= endregno)
2985 int i;
2987 add_reg_note (our_prev, REG_UNUSED, reg);
2989 for (i = dest_regno; i < dest_endregno; i++)
2990 if (! find_regno_note (our_prev, REG_UNUSED, i))
2991 break;
2993 if (i == dest_endregno)
2994 delete_computation (our_prev);
2998 break;
3001 /* If PAT references the register that dies here, it is an
3002 additional use. Hence any prior SET isn't dead. However, this
3003 insn becomes the new place for the REG_DEAD note. */
3004 if (reg_overlap_mentioned_p (reg, pat))
3006 XEXP (note, 1) = REG_NOTES (our_prev);
3007 REG_NOTES (our_prev) = note;
3008 break;
3013 /* Delete INSN and recursively delete insns that compute values used only
3014 by INSN. This uses the REG_DEAD notes computed during flow analysis.
3016 Look at all our REG_DEAD notes. If a previous insn does nothing other
3017 than set a register that dies in this insn, we can delete that insn
3018 as well.
3020 On machines with CC0, if CC0 is used in this insn, we may be able to
3021 delete the insn that set it. */
3023 static void
3024 delete_computation (rtx_insn *insn)
3026 rtx note, next;
3028 if (HAVE_cc0 && reg_referenced_p (cc0_rtx, PATTERN (insn)))
3030 rtx_insn *prev = prev_nonnote_insn (insn);
3031 /* We assume that at this stage
3032 CC's are always set explicitly
3033 and always immediately before the jump that
3034 will use them. So if the previous insn
3035 exists to set the CC's, delete it
3036 (unless it performs auto-increments, etc.). */
3037 if (prev && NONJUMP_INSN_P (prev)
3038 && sets_cc0_p (PATTERN (prev)))
3040 if (sets_cc0_p (PATTERN (prev)) > 0
3041 && ! side_effects_p (PATTERN (prev)))
3042 delete_computation (prev);
3043 else
3044 /* Otherwise, show that cc0 won't be used. */
3045 add_reg_note (prev, REG_UNUSED, cc0_rtx);
3049 for (note = REG_NOTES (insn); note; note = next)
3051 next = XEXP (note, 1);
3053 if (REG_NOTE_KIND (note) != REG_DEAD
3054 /* Verify that the REG_NOTE is legitimate. */
3055 || !REG_P (XEXP (note, 0)))
3056 continue;
3058 delete_prior_computation (note, insn);
3061 delete_related_insns (insn);
3064 /* If all INSN does is set the pc, delete it,
3065 and delete the insn that set the condition codes for it
3066 if that's what the previous thing was. */
3068 static void
3069 delete_jump (rtx_insn *insn)
3071 rtx set = single_set (insn);
3073 if (set && GET_CODE (SET_DEST (set)) == PC)
3074 delete_computation (insn);
3077 static rtx_insn *
3078 label_before_next_insn (rtx_insn *x, rtx scan_limit)
3080 rtx_insn *insn = next_active_insn (x);
3081 while (insn)
3083 insn = PREV_INSN (insn);
3084 if (insn == scan_limit || insn == NULL_RTX)
3085 return NULL;
3086 if (LABEL_P (insn))
3087 break;
3089 return insn;
3092 /* Return TRUE if there is a NOTE_INSN_SWITCH_TEXT_SECTIONS note in between
3093 BEG and END. */
3095 static bool
3096 switch_text_sections_between_p (const rtx_insn *beg, const rtx_insn *end)
3098 const rtx_insn *p;
3099 for (p = beg; p != end; p = NEXT_INSN (p))
3100 if (NOTE_P (p) && NOTE_KIND (p) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
3101 return true;
3102 return false;
3106 /* Once we have tried two ways to fill a delay slot, make a pass over the
3107 code to try to improve the results and to do such things as more jump
3108 threading. */
3110 static void
3111 relax_delay_slots (rtx_insn *first)
3113 rtx_insn *insn, *next;
3114 rtx_sequence *pat;
3115 rtx_insn *delay_insn;
3116 rtx target_label;
3118 /* Look at every JUMP_INSN and see if we can improve it. */
3119 for (insn = first; insn; insn = next)
3121 rtx_insn *other;
3122 bool crossing;
3124 next = next_active_insn (insn);
3126 /* If this is a jump insn, see if it now jumps to a jump, jumps to
3127 the next insn, or jumps to a label that is not the last of a
3128 group of consecutive labels. */
3129 if (is_a <rtx_jump_insn *> (insn)
3130 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3131 && !ANY_RETURN_P (target_label = JUMP_LABEL (insn)))
3133 rtx_jump_insn *jump_insn = as_a <rtx_jump_insn *> (insn);
3134 target_label
3135 = skip_consecutive_labels (follow_jumps (target_label, jump_insn,
3136 &crossing));
3137 if (ANY_RETURN_P (target_label))
3138 target_label = find_end_label (target_label);
3140 if (target_label
3141 && next_active_insn (as_a<rtx_insn *> (target_label)) == next
3142 && ! condjump_in_parallel_p (jump_insn)
3143 && ! (next && switch_text_sections_between_p (jump_insn, next)))
3145 delete_jump (jump_insn);
3146 continue;
3149 if (target_label && target_label != JUMP_LABEL (jump_insn))
3151 reorg_redirect_jump (jump_insn, target_label);
3152 if (crossing)
3153 CROSSING_JUMP_P (jump_insn) = 1;
3156 /* See if this jump conditionally branches around an unconditional
3157 jump. If so, invert this jump and point it to the target of the
3158 second jump. Check if it's possible on the target. */
3159 if (next && simplejump_or_return_p (next)
3160 && any_condjump_p (jump_insn)
3161 && target_label
3162 && (next_active_insn (as_a<rtx_insn *> (target_label))
3163 == next_active_insn (next))
3164 && no_labels_between_p (jump_insn, next)
3165 && targetm.can_follow_jump (jump_insn, next))
3167 rtx label = JUMP_LABEL (next);
3169 /* Be careful how we do this to avoid deleting code or
3170 labels that are momentarily dead. See similar optimization
3171 in jump.c.
3173 We also need to ensure we properly handle the case when
3174 invert_jump fails. */
3176 ++LABEL_NUSES (target_label);
3177 if (!ANY_RETURN_P (label))
3178 ++LABEL_NUSES (label);
3180 if (invert_jump (jump_insn, label, 1))
3182 delete_related_insns (next);
3183 next = jump_insn;
3186 if (!ANY_RETURN_P (label))
3187 --LABEL_NUSES (label);
3189 if (--LABEL_NUSES (target_label) == 0)
3190 delete_related_insns (target_label);
3192 continue;
3196 /* If this is an unconditional jump and the previous insn is a
3197 conditional jump, try reversing the condition of the previous
3198 insn and swapping our targets. The next pass might be able to
3199 fill the slots.
3201 Don't do this if we expect the conditional branch to be true, because
3202 we would then be making the more common case longer. */
3204 if (simplejump_or_return_p (insn)
3205 && (other = prev_active_insn (insn)) != 0
3206 && any_condjump_p (other)
3207 && no_labels_between_p (other, insn)
3208 && 0 > mostly_true_jump (other))
3210 rtx other_target = JUMP_LABEL (other);
3211 target_label = JUMP_LABEL (insn);
3213 if (invert_jump (as_a <rtx_jump_insn *> (other), target_label, 0))
3214 reorg_redirect_jump (as_a <rtx_jump_insn *> (insn), other_target);
3217 /* Now look only at cases where we have a filled delay slot. */
3218 if (!NONJUMP_INSN_P (insn) || GET_CODE (PATTERN (insn)) != SEQUENCE)
3219 continue;
3221 pat = as_a <rtx_sequence *> (PATTERN (insn));
3222 delay_insn = pat->insn (0);
3224 /* See if the first insn in the delay slot is redundant with some
3225 previous insn. Remove it from the delay slot if so; then set up
3226 to reprocess this insn. */
3227 if (redundant_insn (pat->insn (1), delay_insn, vNULL))
3229 update_block (pat->insn (1), insn);
3230 delete_from_delay_slot (pat->insn (1));
3231 next = prev_active_insn (next);
3232 continue;
3235 /* See if we have a RETURN insn with a filled delay slot followed
3236 by a RETURN insn with an unfilled a delay slot. If so, we can delete
3237 the first RETURN (but not its delay insn). This gives the same
3238 effect in fewer instructions.
3240 Only do so if optimizing for size since this results in slower, but
3241 smaller code. */
3242 if (optimize_function_for_size_p (cfun)
3243 && ANY_RETURN_P (PATTERN (delay_insn))
3244 && next
3245 && JUMP_P (next)
3246 && PATTERN (next) == PATTERN (delay_insn))
3248 rtx_insn *after;
3249 int i;
3251 /* Delete the RETURN and just execute the delay list insns.
3253 We do this by deleting the INSN containing the SEQUENCE, then
3254 re-emitting the insns separately, and then deleting the RETURN.
3255 This allows the count of the jump target to be properly
3256 decremented.
3258 Note that we need to change the INSN_UID of the re-emitted insns
3259 since it is used to hash the insns for mark_target_live_regs and
3260 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3262 Clear the from target bit, since these insns are no longer
3263 in delay slots. */
3264 for (i = 0; i < XVECLEN (pat, 0); i++)
3265 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3267 rtx_insn *prev = PREV_INSN (insn);
3268 delete_related_insns (insn);
3269 gcc_assert (GET_CODE (pat) == SEQUENCE);
3270 add_insn_after (delay_insn, prev, NULL);
3271 after = delay_insn;
3272 for (i = 1; i < pat->len (); i++)
3273 after = emit_copy_of_insn_after (pat->insn (i), after);
3274 delete_scheduled_jump (delay_insn);
3275 continue;
3278 /* Now look only at the cases where we have a filled JUMP_INSN. */
3279 rtx_jump_insn *delay_jump_insn =
3280 dyn_cast <rtx_jump_insn *> (delay_insn);
3281 if (! delay_jump_insn || !(condjump_p (delay_jump_insn)
3282 || condjump_in_parallel_p (delay_jump_insn)))
3283 continue;
3285 target_label = JUMP_LABEL (delay_jump_insn);
3286 if (target_label && ANY_RETURN_P (target_label))
3287 continue;
3289 /* If this jump goes to another unconditional jump, thread it, but
3290 don't convert a jump into a RETURN here. */
3291 rtx trial = skip_consecutive_labels (follow_jumps (target_label,
3292 delay_jump_insn,
3293 &crossing));
3294 if (ANY_RETURN_P (trial))
3295 trial = find_end_label (trial);
3297 if (trial && trial != target_label
3298 && redirect_with_delay_slots_safe_p (delay_jump_insn, trial, insn))
3300 reorg_redirect_jump (delay_jump_insn, trial);
3301 target_label = trial;
3302 if (crossing)
3303 CROSSING_JUMP_P (delay_jump_insn) = 1;
3306 /* If the first insn at TARGET_LABEL is redundant with a previous
3307 insn, redirect the jump to the following insn and process again.
3308 We use next_real_insn instead of next_active_insn so we
3309 don't skip USE-markers, or we'll end up with incorrect
3310 liveness info. */
3311 trial = next_real_insn (target_label);
3312 if (trial && GET_CODE (PATTERN (trial)) != SEQUENCE
3313 && redundant_insn (trial, insn, vNULL)
3314 && ! can_throw_internal (trial))
3316 /* Figure out where to emit the special USE insn so we don't
3317 later incorrectly compute register live/death info. */
3318 rtx_insn *tmp = next_active_insn (as_a<rtx_insn *> (trial));
3319 if (tmp == 0)
3320 tmp = find_end_label (simple_return_rtx);
3322 if (tmp)
3324 /* Insert the special USE insn and update dataflow info.
3325 We know "trial" is an insn here as it is the output of
3326 next_real_insn () above. */
3327 update_block (as_a <rtx_insn *> (trial), tmp);
3329 /* Now emit a label before the special USE insn, and
3330 redirect our jump to the new label. */
3331 target_label = get_label_before (PREV_INSN (tmp), target_label);
3332 reorg_redirect_jump (delay_jump_insn, target_label);
3333 next = insn;
3334 continue;
3338 /* Similarly, if it is an unconditional jump with one insn in its
3339 delay list and that insn is redundant, thread the jump. */
3340 rtx_sequence *trial_seq =
3341 trial ? dyn_cast <rtx_sequence *> (PATTERN (trial)) : NULL;
3342 if (trial_seq
3343 && trial_seq->len () == 2
3344 && JUMP_P (trial_seq->insn (0))
3345 && simplejump_or_return_p (trial_seq->insn (0))
3346 && redundant_insn (trial_seq->insn (1), insn, vNULL))
3348 rtx temp_label = JUMP_LABEL (trial_seq->insn (0));
3349 if (ANY_RETURN_P (temp_label))
3350 temp_label = find_end_label (temp_label);
3352 if (temp_label
3353 && redirect_with_delay_slots_safe_p (delay_jump_insn,
3354 temp_label, insn))
3356 update_block (trial_seq->insn (1), insn);
3357 reorg_redirect_jump (delay_jump_insn, temp_label);
3358 next = insn;
3359 continue;
3363 /* See if we have a simple (conditional) jump that is useless. */
3364 if (! INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3365 && ! condjump_in_parallel_p (delay_jump_insn)
3366 && prev_active_insn (as_a<rtx_insn *> (target_label)) == insn
3367 && ! BARRIER_P (prev_nonnote_insn (as_a<rtx_insn *> (target_label)))
3368 /* If the last insn in the delay slot sets CC0 for some insn,
3369 various code assumes that it is in a delay slot. We could
3370 put it back where it belonged and delete the register notes,
3371 but it doesn't seem worthwhile in this uncommon case. */
3372 && (!HAVE_cc0
3373 || ! find_reg_note (XVECEXP (pat, 0, XVECLEN (pat, 0) - 1),
3374 REG_CC_USER, NULL_RTX)))
3376 rtx_insn *after;
3377 int i;
3379 /* All this insn does is execute its delay list and jump to the
3380 following insn. So delete the jump and just execute the delay
3381 list insns.
3383 We do this by deleting the INSN containing the SEQUENCE, then
3384 re-emitting the insns separately, and then deleting the jump.
3385 This allows the count of the jump target to be properly
3386 decremented.
3388 Note that we need to change the INSN_UID of the re-emitted insns
3389 since it is used to hash the insns for mark_target_live_regs and
3390 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3392 Clear the from target bit, since these insns are no longer
3393 in delay slots. */
3394 for (i = 0; i < XVECLEN (pat, 0); i++)
3395 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3397 rtx_insn *prev = PREV_INSN (insn);
3398 delete_related_insns (insn);
3399 gcc_assert (GET_CODE (pat) == SEQUENCE);
3400 add_insn_after (delay_jump_insn, prev, NULL);
3401 after = delay_jump_insn;
3402 for (i = 1; i < pat->len (); i++)
3403 after = emit_copy_of_insn_after (pat->insn (i), after);
3404 delete_scheduled_jump (delay_jump_insn);
3405 continue;
3408 /* See if this is an unconditional jump around a single insn which is
3409 identical to the one in its delay slot. In this case, we can just
3410 delete the branch and the insn in its delay slot. */
3411 if (next && NONJUMP_INSN_P (next)
3412 && label_before_next_insn (next, insn) == target_label
3413 && simplejump_p (insn)
3414 && XVECLEN (pat, 0) == 2
3415 && rtx_equal_p (PATTERN (next), PATTERN (pat->insn (1))))
3417 delete_related_insns (insn);
3418 continue;
3421 /* See if this jump (with its delay slots) conditionally branches
3422 around an unconditional jump (without delay slots). If so, invert
3423 this jump and point it to the target of the second jump. We cannot
3424 do this for annulled jumps, though. Again, don't convert a jump to
3425 a RETURN here. */
3426 if (! INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3427 && any_condjump_p (delay_jump_insn)
3428 && next && simplejump_or_return_p (next)
3429 && (next_active_insn (as_a<rtx_insn *> (target_label))
3430 == next_active_insn (next))
3431 && no_labels_between_p (insn, next))
3433 rtx label = JUMP_LABEL (next);
3434 rtx old_label = JUMP_LABEL (delay_jump_insn);
3436 if (ANY_RETURN_P (label))
3437 label = find_end_label (label);
3439 /* find_end_label can generate a new label. Check this first. */
3440 if (label
3441 && no_labels_between_p (insn, next)
3442 && redirect_with_delay_slots_safe_p (delay_jump_insn,
3443 label, insn))
3445 /* Be careful how we do this to avoid deleting code or labels
3446 that are momentarily dead. See similar optimization in
3447 jump.c */
3448 if (old_label)
3449 ++LABEL_NUSES (old_label);
3451 if (invert_jump (delay_jump_insn, label, 1))
3453 int i;
3455 /* Must update the INSN_FROM_TARGET_P bits now that
3456 the branch is reversed, so that mark_target_live_regs
3457 will handle the delay slot insn correctly. */
3458 for (i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
3460 rtx slot = XVECEXP (PATTERN (insn), 0, i);
3461 INSN_FROM_TARGET_P (slot) = ! INSN_FROM_TARGET_P (slot);
3464 delete_related_insns (next);
3465 next = insn;
3468 if (old_label && --LABEL_NUSES (old_label) == 0)
3469 delete_related_insns (old_label);
3470 continue;
3474 /* If we own the thread opposite the way this insn branches, see if we
3475 can merge its delay slots with following insns. */
3476 if (INSN_FROM_TARGET_P (pat->insn (1))
3477 && own_thread_p (NEXT_INSN (insn), 0, 1))
3478 try_merge_delay_insns (insn, next);
3479 else if (! INSN_FROM_TARGET_P (pat->insn (1))
3480 && own_thread_p (target_label, target_label, 0))
3481 try_merge_delay_insns (insn,
3482 next_active_insn (as_a<rtx_insn *> (target_label)));
3484 /* If we get here, we haven't deleted INSN. But we may have deleted
3485 NEXT, so recompute it. */
3486 next = next_active_insn (insn);
3491 /* Look for filled jumps to the end of function label. We can try to convert
3492 them into RETURN insns if the insns in the delay slot are valid for the
3493 RETURN as well. */
3495 static void
3496 make_return_insns (rtx_insn *first)
3498 rtx_insn *insn;
3499 rtx_jump_insn *jump_insn;
3500 rtx real_return_label = function_return_label;
3501 rtx real_simple_return_label = function_simple_return_label;
3502 int slots, i;
3504 /* See if there is a RETURN insn in the function other than the one we
3505 made for END_OF_FUNCTION_LABEL. If so, set up anything we can't change
3506 into a RETURN to jump to it. */
3507 for (insn = first; insn; insn = NEXT_INSN (insn))
3508 if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
3510 rtx t = get_label_before (insn, NULL_RTX);
3511 if (PATTERN (insn) == ret_rtx)
3512 real_return_label = t;
3513 else
3514 real_simple_return_label = t;
3515 break;
3518 /* Show an extra usage of REAL_RETURN_LABEL so it won't go away if it
3519 was equal to END_OF_FUNCTION_LABEL. */
3520 if (real_return_label)
3521 LABEL_NUSES (real_return_label)++;
3522 if (real_simple_return_label)
3523 LABEL_NUSES (real_simple_return_label)++;
3525 /* Clear the list of insns to fill so we can use it. */
3526 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3528 for (insn = first; insn; insn = NEXT_INSN (insn))
3530 int flags;
3531 rtx kind, real_label;
3533 /* Only look at filled JUMP_INSNs that go to the end of function
3534 label. */
3535 if (!NONJUMP_INSN_P (insn))
3536 continue;
3538 if (GET_CODE (PATTERN (insn)) != SEQUENCE)
3539 continue;
3541 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (insn));
3543 if (!jump_to_label_p (pat->insn (0)))
3544 continue;
3546 if (JUMP_LABEL (pat->insn (0)) == function_return_label)
3548 kind = ret_rtx;
3549 real_label = real_return_label;
3551 else if (JUMP_LABEL (pat->insn (0)) == function_simple_return_label)
3553 kind = simple_return_rtx;
3554 real_label = real_simple_return_label;
3556 else
3557 continue;
3559 jump_insn = as_a <rtx_jump_insn *> (pat->insn (0));
3561 /* If we can't make the jump into a RETURN, try to redirect it to the best
3562 RETURN and go on to the next insn. */
3563 if (!reorg_redirect_jump (jump_insn, kind))
3565 /* Make sure redirecting the jump will not invalidate the delay
3566 slot insns. */
3567 if (redirect_with_delay_slots_safe_p (jump_insn, real_label, insn))
3568 reorg_redirect_jump (jump_insn, real_label);
3569 continue;
3572 /* See if this RETURN can accept the insns current in its delay slot.
3573 It can if it has more or an equal number of slots and the contents
3574 of each is valid. */
3576 flags = get_jump_flags (jump_insn, JUMP_LABEL (jump_insn));
3577 slots = num_delay_slots (jump_insn);
3578 if (slots >= XVECLEN (pat, 0) - 1)
3580 for (i = 1; i < XVECLEN (pat, 0); i++)
3581 if (! (
3582 #if ANNUL_IFFALSE_SLOTS
3583 (INSN_ANNULLED_BRANCH_P (jump_insn)
3584 && INSN_FROM_TARGET_P (pat->insn (i)))
3585 ? eligible_for_annul_false (jump_insn, i - 1,
3586 pat->insn (i), flags) :
3587 #endif
3588 #if ANNUL_IFTRUE_SLOTS
3589 (INSN_ANNULLED_BRANCH_P (jump_insn)
3590 && ! INSN_FROM_TARGET_P (pat->insn (i)))
3591 ? eligible_for_annul_true (jump_insn, i - 1,
3592 pat->insn (i), flags) :
3593 #endif
3594 eligible_for_delay (jump_insn, i - 1,
3595 pat->insn (i), flags)))
3596 break;
3598 else
3599 i = 0;
3601 if (i == XVECLEN (pat, 0))
3602 continue;
3604 /* We have to do something with this insn. If it is an unconditional
3605 RETURN, delete the SEQUENCE and output the individual insns,
3606 followed by the RETURN. Then set things up so we try to find
3607 insns for its delay slots, if it needs some. */
3608 if (ANY_RETURN_P (PATTERN (jump_insn)))
3610 rtx_insn *prev = PREV_INSN (insn);
3612 delete_related_insns (insn);
3613 for (i = 1; i < XVECLEN (pat, 0); i++)
3614 prev = emit_insn_after (PATTERN (XVECEXP (pat, 0, i)), prev);
3616 insn = emit_jump_insn_after (PATTERN (jump_insn), prev);
3617 emit_barrier_after (insn);
3619 if (slots)
3620 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3622 else
3623 /* It is probably more efficient to keep this with its current
3624 delay slot as a branch to a RETURN. */
3625 reorg_redirect_jump (jump_insn, real_label);
3628 /* Now delete REAL_RETURN_LABEL if we never used it. Then try to fill any
3629 new delay slots we have created. */
3630 if (real_return_label != NULL_RTX && --LABEL_NUSES (real_return_label) == 0)
3631 delete_related_insns (real_return_label);
3632 if (real_simple_return_label != NULL_RTX
3633 && --LABEL_NUSES (real_simple_return_label) == 0)
3634 delete_related_insns (real_simple_return_label);
3636 fill_simple_delay_slots (1);
3637 fill_simple_delay_slots (0);
3640 /* Try to find insns to place in delay slots. */
3642 static void
3643 dbr_schedule (rtx_insn *first)
3645 rtx_insn *insn, *next, *epilogue_insn = 0;
3646 int i;
3647 bool need_return_insns;
3649 /* If the current function has no insns other than the prologue and
3650 epilogue, then do not try to fill any delay slots. */
3651 if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS)
3652 return;
3654 /* Find the highest INSN_UID and allocate and initialize our map from
3655 INSN_UID's to position in code. */
3656 for (max_uid = 0, insn = first; insn; insn = NEXT_INSN (insn))
3658 if (INSN_UID (insn) > max_uid)
3659 max_uid = INSN_UID (insn);
3660 if (NOTE_P (insn)
3661 && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
3662 epilogue_insn = insn;
3665 uid_to_ruid = XNEWVEC (int, max_uid + 1);
3666 for (i = 0, insn = first; insn; i++, insn = NEXT_INSN (insn))
3667 uid_to_ruid[INSN_UID (insn)] = i;
3669 /* Initialize the list of insns that need filling. */
3670 if (unfilled_firstobj == 0)
3672 gcc_obstack_init (&unfilled_slots_obstack);
3673 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3676 for (insn = next_active_insn (first); insn; insn = next_active_insn (insn))
3678 rtx target;
3680 /* Skip vector tables. We can't get attributes for them. */
3681 if (JUMP_TABLE_DATA_P (insn))
3682 continue;
3684 if (JUMP_P (insn))
3685 INSN_ANNULLED_BRANCH_P (insn) = 0;
3686 INSN_FROM_TARGET_P (insn) = 0;
3688 if (num_delay_slots (insn) > 0)
3689 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3691 /* Ensure all jumps go to the last of a set of consecutive labels. */
3692 if (JUMP_P (insn)
3693 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3694 && !ANY_RETURN_P (JUMP_LABEL (insn))
3695 && ((target = skip_consecutive_labels (JUMP_LABEL (insn)))
3696 != JUMP_LABEL (insn)))
3697 redirect_jump (as_a <rtx_jump_insn *> (insn), target, 1);
3700 init_resource_info (epilogue_insn);
3702 /* Show we haven't computed an end-of-function label yet. */
3703 function_return_label = function_simple_return_label = NULL;
3705 /* Initialize the statistics for this function. */
3706 memset (num_insns_needing_delays, 0, sizeof num_insns_needing_delays);
3707 memset (num_filled_delays, 0, sizeof num_filled_delays);
3709 /* Now do the delay slot filling. Try everything twice in case earlier
3710 changes make more slots fillable. */
3712 for (reorg_pass_number = 0;
3713 reorg_pass_number < MAX_REORG_PASSES;
3714 reorg_pass_number++)
3716 fill_simple_delay_slots (1);
3717 fill_simple_delay_slots (0);
3718 if (!targetm.no_speculation_in_delay_slots_p ())
3719 fill_eager_delay_slots ();
3720 relax_delay_slots (first);
3723 /* If we made an end of function label, indicate that it is now
3724 safe to delete it by undoing our prior adjustment to LABEL_NUSES.
3725 If it is now unused, delete it. */
3726 if (function_return_label && --LABEL_NUSES (function_return_label) == 0)
3727 delete_related_insns (function_return_label);
3728 if (function_simple_return_label
3729 && --LABEL_NUSES (function_simple_return_label) == 0)
3730 delete_related_insns (function_simple_return_label);
3732 need_return_insns = false;
3733 need_return_insns |= targetm.have_return () && function_return_label != 0;
3734 need_return_insns |= (targetm.have_simple_return ()
3735 && function_simple_return_label != 0);
3736 if (need_return_insns)
3737 make_return_insns (first);
3739 /* Delete any USE insns made by update_block; subsequent passes don't need
3740 them or know how to deal with them. */
3741 for (insn = first; insn; insn = next)
3743 next = NEXT_INSN (insn);
3745 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
3746 && INSN_P (XEXP (PATTERN (insn), 0)))
3747 next = delete_related_insns (insn);
3750 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3752 /* It is not clear why the line below is needed, but it does seem to be. */
3753 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3755 if (dump_file)
3757 int i, j, need_comma;
3758 int total_delay_slots[MAX_DELAY_HISTOGRAM + 1];
3759 int total_annul_slots[MAX_DELAY_HISTOGRAM + 1];
3761 for (reorg_pass_number = 0;
3762 reorg_pass_number < MAX_REORG_PASSES;
3763 reorg_pass_number++)
3765 fprintf (dump_file, ";; Reorg pass #%d:\n", reorg_pass_number + 1);
3766 for (i = 0; i < NUM_REORG_FUNCTIONS; i++)
3768 need_comma = 0;
3769 fprintf (dump_file, ";; Reorg function #%d\n", i);
3771 fprintf (dump_file, ";; %d insns needing delay slots\n;; ",
3772 num_insns_needing_delays[i][reorg_pass_number]);
3774 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3775 if (num_filled_delays[i][j][reorg_pass_number])
3777 if (need_comma)
3778 fprintf (dump_file, ", ");
3779 need_comma = 1;
3780 fprintf (dump_file, "%d got %d delays",
3781 num_filled_delays[i][j][reorg_pass_number], j);
3783 fprintf (dump_file, "\n");
3786 memset (total_delay_slots, 0, sizeof total_delay_slots);
3787 memset (total_annul_slots, 0, sizeof total_annul_slots);
3788 for (insn = first; insn; insn = NEXT_INSN (insn))
3790 if (! insn->deleted ()
3791 && NONJUMP_INSN_P (insn)
3792 && GET_CODE (PATTERN (insn)) != USE
3793 && GET_CODE (PATTERN (insn)) != CLOBBER)
3795 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
3797 rtx control;
3798 j = XVECLEN (PATTERN (insn), 0) - 1;
3799 if (j > MAX_DELAY_HISTOGRAM)
3800 j = MAX_DELAY_HISTOGRAM;
3801 control = XVECEXP (PATTERN (insn), 0, 0);
3802 if (JUMP_P (control) && INSN_ANNULLED_BRANCH_P (control))
3803 total_annul_slots[j]++;
3804 else
3805 total_delay_slots[j]++;
3807 else if (num_delay_slots (insn) > 0)
3808 total_delay_slots[0]++;
3811 fprintf (dump_file, ";; Reorg totals: ");
3812 need_comma = 0;
3813 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3815 if (total_delay_slots[j])
3817 if (need_comma)
3818 fprintf (dump_file, ", ");
3819 need_comma = 1;
3820 fprintf (dump_file, "%d got %d delays", total_delay_slots[j], j);
3823 fprintf (dump_file, "\n");
3825 if (ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
3827 fprintf (dump_file, ";; Reorg annuls: ");
3828 need_comma = 0;
3829 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3831 if (total_annul_slots[j])
3833 if (need_comma)
3834 fprintf (dump_file, ", ");
3835 need_comma = 1;
3836 fprintf (dump_file, "%d got %d delays", total_annul_slots[j], j);
3839 fprintf (dump_file, "\n");
3842 fprintf (dump_file, "\n");
3845 if (!sibling_labels.is_empty ())
3847 update_alignments (sibling_labels);
3848 sibling_labels.release ();
3851 free_resource_info ();
3852 free (uid_to_ruid);
3853 crtl->dbr_scheduled_p = true;
3856 /* Run delay slot optimization. */
3857 static unsigned int
3858 rest_of_handle_delay_slots (void)
3860 if (DELAY_SLOTS)
3861 dbr_schedule (get_insns ());
3863 return 0;
3866 namespace {
3868 const pass_data pass_data_delay_slots =
3870 RTL_PASS, /* type */
3871 "dbr", /* name */
3872 OPTGROUP_NONE, /* optinfo_flags */
3873 TV_DBR_SCHED, /* tv_id */
3874 0, /* properties_required */
3875 0, /* properties_provided */
3876 0, /* properties_destroyed */
3877 0, /* todo_flags_start */
3878 0, /* todo_flags_finish */
3881 class pass_delay_slots : public rtl_opt_pass
3883 public:
3884 pass_delay_slots (gcc::context *ctxt)
3885 : rtl_opt_pass (pass_data_delay_slots, ctxt)
3888 /* opt_pass methods: */
3889 virtual bool gate (function *);
3890 virtual unsigned int execute (function *)
3892 return rest_of_handle_delay_slots ();
3895 }; // class pass_delay_slots
3897 bool
3898 pass_delay_slots::gate (function *)
3900 /* At -O0 dataflow info isn't updated after RA. */
3901 if (DELAY_SLOTS)
3902 return optimize > 0 && flag_delayed_branch && !crtl->dbr_scheduled_p;
3904 return false;
3907 } // anon namespace
3909 rtl_opt_pass *
3910 make_pass_delay_slots (gcc::context *ctxt)
3912 return new pass_delay_slots (ctxt);
3915 /* Machine dependent reorg pass. */
3917 namespace {
3919 const pass_data pass_data_machine_reorg =
3921 RTL_PASS, /* type */
3922 "mach", /* name */
3923 OPTGROUP_NONE, /* optinfo_flags */
3924 TV_MACH_DEP, /* tv_id */
3925 0, /* properties_required */
3926 0, /* properties_provided */
3927 0, /* properties_destroyed */
3928 0, /* todo_flags_start */
3929 0, /* todo_flags_finish */
3932 class pass_machine_reorg : public rtl_opt_pass
3934 public:
3935 pass_machine_reorg (gcc::context *ctxt)
3936 : rtl_opt_pass (pass_data_machine_reorg, ctxt)
3939 /* opt_pass methods: */
3940 virtual bool gate (function *)
3942 return targetm.machine_dependent_reorg != 0;
3945 virtual unsigned int execute (function *)
3947 targetm.machine_dependent_reorg ();
3948 return 0;
3951 }; // class pass_machine_reorg
3953 } // anon namespace
3955 rtl_opt_pass *
3956 make_pass_machine_reorg (gcc::context *ctxt)
3958 return new pass_machine_reorg (ctxt);