Revert r244448
[official-gcc.git] / gcc / reorg.c
blob85ef7d6880cb34451422cd7db790fd27374b01f5
1 /* Perform instruction reorganizations for delay slot filling.
2 Copyright (C) 1992-2017 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu).
4 Hacked by Michael Tiemann (tiemann@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Instruction reorganization pass.
24 This pass runs after register allocation and final jump
25 optimization. It should be the last pass to run before peephole.
26 It serves primarily to fill delay slots of insns, typically branch
27 and call insns. Other insns typically involve more complicated
28 interactions of data dependencies and resource constraints, and
29 are better handled by scheduling before register allocation (by the
30 function `schedule_insns').
32 The Branch Penalty is the number of extra cycles that are needed to
33 execute a branch insn. On an ideal machine, branches take a single
34 cycle, and the Branch Penalty is 0. Several RISC machines approach
35 branch delays differently:
37 The MIPS has a single branch delay slot. Most insns
38 (except other branches) can be used to fill this slot. When the
39 slot is filled, two insns execute in two cycles, reducing the
40 branch penalty to zero.
42 The SPARC always has a branch delay slot, but its effects can be
43 annulled when the branch is not taken. This means that failing to
44 find other sources of insns, we can hoist an insn from the branch
45 target that would only be safe to execute knowing that the branch
46 is taken.
48 The HP-PA always has a branch delay slot. For unconditional branches
49 its effects can be annulled when the branch is taken. The effects
50 of the delay slot in a conditional branch can be nullified for forward
51 taken branches, or for untaken backward branches. This means
52 we can hoist insns from the fall-through path for forward branches or
53 steal insns from the target of backward branches.
55 The TMS320C3x and C4x have three branch delay slots. When the three
56 slots are filled, the branch penalty is zero. Most insns can fill the
57 delay slots except jump insns.
59 Three techniques for filling delay slots have been implemented so far:
61 (1) `fill_simple_delay_slots' is the simplest, most efficient way
62 to fill delay slots. This pass first looks for insns which come
63 from before the branch and which are safe to execute after the
64 branch. Then it searches after the insn requiring delay slots or,
65 in the case of a branch, for insns that are after the point at
66 which the branch merges into the fallthrough code, if such a point
67 exists. When such insns are found, the branch penalty decreases
68 and no code expansion takes place.
70 (2) `fill_eager_delay_slots' is more complicated: it is used for
71 scheduling conditional jumps, or for scheduling jumps which cannot
72 be filled using (1). A machine need not have annulled jumps to use
73 this strategy, but it helps (by keeping more options open).
74 `fill_eager_delay_slots' tries to guess the direction the branch
75 will go; if it guesses right 100% of the time, it can reduce the
76 branch penalty as much as `fill_simple_delay_slots' does. If it
77 guesses wrong 100% of the time, it might as well schedule nops. When
78 `fill_eager_delay_slots' takes insns from the fall-through path of
79 the jump, usually there is no code expansion; when it takes insns
80 from the branch target, there is code expansion if it is not the
81 only way to reach that target.
83 (3) `relax_delay_slots' uses a set of rules to simplify code that
84 has been reorganized by (1) and (2). It finds cases where
85 conditional test can be eliminated, jumps can be threaded, extra
86 insns can be eliminated, etc. It is the job of (1) and (2) to do a
87 good job of scheduling locally; `relax_delay_slots' takes care of
88 making the various individual schedules work well together. It is
89 especially tuned to handle the control flow interactions of branch
90 insns. It does nothing for insns with delay slots that do not
91 branch.
93 On machines that use CC0, we are very conservative. We will not make
94 a copy of an insn involving CC0 since we want to maintain a 1-1
95 correspondence between the insn that sets and uses CC0. The insns are
96 allowed to be separated by placing an insn that sets CC0 (but not an insn
97 that uses CC0; we could do this, but it doesn't seem worthwhile) in a
98 delay slot. In that case, we point each insn at the other with REG_CC_USER
99 and REG_CC_SETTER notes. Note that these restrictions affect very few
100 machines because most RISC machines with delay slots will not use CC0
101 (the RT is the only known exception at this point). */
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "backend.h"
107 #include "target.h"
108 #include "rtl.h"
109 #include "tree.h"
110 #include "predict.h"
111 #include "memmodel.h"
112 #include "tm_p.h"
113 #include "expmed.h"
114 #include "insn-config.h"
115 #include "emit-rtl.h"
116 #include "recog.h"
117 #include "insn-attr.h"
118 #include "resource.h"
119 #include "params.h"
120 #include "tree-pass.h"
123 /* First, some functions that were used before GCC got a control flow graph.
124 These functions are now only used here in reorg.c, and have therefore
125 been moved here to avoid inadvertent misuse elsewhere in the compiler. */
127 /* Return the last label to mark the same position as LABEL. Return LABEL
128 itself if it is null or any return rtx. */
130 static rtx
131 skip_consecutive_labels (rtx label_or_return)
133 rtx_insn *insn;
135 if (label_or_return && ANY_RETURN_P (label_or_return))
136 return label_or_return;
138 rtx_insn *label = as_a <rtx_insn *> (label_or_return);
140 for (insn = label; insn != 0 && !INSN_P (insn); insn = NEXT_INSN (insn))
141 if (LABEL_P (insn))
142 label = insn;
144 return label;
147 /* INSN uses CC0 and is being moved into a delay slot. Set up REG_CC_SETTER
148 and REG_CC_USER notes so we can find it. */
150 static void
151 link_cc0_insns (rtx_insn *insn)
153 rtx user = next_nonnote_insn (insn);
155 if (NONJUMP_INSN_P (user) && GET_CODE (PATTERN (user)) == SEQUENCE)
156 user = XVECEXP (PATTERN (user), 0, 0);
158 add_reg_note (user, REG_CC_SETTER, insn);
159 add_reg_note (insn, REG_CC_USER, user);
162 /* Insns which have delay slots that have not yet been filled. */
164 static struct obstack unfilled_slots_obstack;
165 static rtx *unfilled_firstobj;
167 /* Define macros to refer to the first and last slot containing unfilled
168 insns. These are used because the list may move and its address
169 should be recomputed at each use. */
171 #define unfilled_slots_base \
172 ((rtx_insn **) obstack_base (&unfilled_slots_obstack))
174 #define unfilled_slots_next \
175 ((rtx_insn **) obstack_next_free (&unfilled_slots_obstack))
177 /* Points to the label before the end of the function, or before a
178 return insn. */
179 static rtx_code_label *function_return_label;
180 /* Likewise for a simple_return. */
181 static rtx_code_label *function_simple_return_label;
183 /* Mapping between INSN_UID's and position in the code since INSN_UID's do
184 not always monotonically increase. */
185 static int *uid_to_ruid;
187 /* Highest valid index in `uid_to_ruid'. */
188 static int max_uid;
190 static int stop_search_p (rtx_insn *, int);
191 static int resource_conflicts_p (struct resources *, struct resources *);
192 static int insn_references_resource_p (rtx, struct resources *, bool);
193 static int insn_sets_resource_p (rtx, struct resources *, bool);
194 static rtx_code_label *find_end_label (rtx);
195 static rtx_insn *emit_delay_sequence (rtx_insn *, const vec<rtx_insn *> &,
196 int);
197 static void add_to_delay_list (rtx_insn *, vec<rtx_insn *> *);
198 static rtx_insn *delete_from_delay_slot (rtx_insn *);
199 static void delete_scheduled_jump (rtx_insn *);
200 static void note_delay_statistics (int, int);
201 static int get_jump_flags (const rtx_insn *, rtx);
202 static int mostly_true_jump (rtx);
203 static rtx get_branch_condition (const rtx_insn *, rtx);
204 static int condition_dominates_p (rtx, const rtx_insn *);
205 static int redirect_with_delay_slots_safe_p (rtx_insn *, rtx, rtx);
206 static int redirect_with_delay_list_safe_p (rtx_insn *, rtx,
207 const vec<rtx_insn *> &);
208 static int check_annul_list_true_false (int, const vec<rtx_insn *> &);
209 static void steal_delay_list_from_target (rtx_insn *, rtx, rtx_sequence *,
210 vec<rtx_insn *> *,
211 struct resources *,
212 struct resources *,
213 struct resources *,
214 int, int *, int *,
215 rtx *);
216 static void steal_delay_list_from_fallthrough (rtx_insn *, rtx, rtx_sequence *,
217 vec<rtx_insn *> *,
218 struct resources *,
219 struct resources *,
220 struct resources *,
221 int, int *, int *);
222 static void try_merge_delay_insns (rtx_insn *, rtx_insn *);
223 static rtx_insn *redundant_insn (rtx, rtx_insn *, const vec<rtx_insn *> &);
224 static int own_thread_p (rtx, rtx, int);
225 static void update_block (rtx_insn *, rtx_insn *);
226 static int reorg_redirect_jump (rtx_jump_insn *, rtx);
227 static void update_reg_dead_notes (rtx_insn *, rtx_insn *);
228 static void fix_reg_dead_note (rtx_insn *, rtx);
229 static void update_reg_unused_notes (rtx_insn *, rtx);
230 static void fill_simple_delay_slots (int);
231 static void fill_slots_from_thread (rtx_jump_insn *, rtx, rtx, rtx,
232 int, int, int, int,
233 int *, vec<rtx_insn *> *);
234 static void fill_eager_delay_slots (void);
235 static void relax_delay_slots (rtx_insn *);
236 static void make_return_insns (rtx_insn *);
238 /* A wrapper around next_active_insn which takes care to return ret_rtx
239 unchanged. */
241 static rtx
242 first_active_target_insn (rtx insn)
244 if (ANY_RETURN_P (insn))
245 return insn;
246 return next_active_insn (as_a <rtx_insn *> (insn));
249 /* Return true iff INSN is a simplejump, or any kind of return insn. */
251 static bool
252 simplejump_or_return_p (rtx insn)
254 return (JUMP_P (insn)
255 && (simplejump_p (as_a <rtx_insn *> (insn))
256 || ANY_RETURN_P (PATTERN (insn))));
259 /* Return TRUE if this insn should stop the search for insn to fill delay
260 slots. LABELS_P indicates that labels should terminate the search.
261 In all cases, jumps terminate the search. */
263 static int
264 stop_search_p (rtx_insn *insn, int labels_p)
266 if (insn == 0)
267 return 1;
269 /* If the insn can throw an exception that is caught within the function,
270 it may effectively perform a jump from the viewpoint of the function.
271 Therefore act like for a jump. */
272 if (can_throw_internal (insn))
273 return 1;
275 switch (GET_CODE (insn))
277 case NOTE:
278 case CALL_INSN:
279 return 0;
281 case CODE_LABEL:
282 return labels_p;
284 case JUMP_INSN:
285 case BARRIER:
286 return 1;
288 case INSN:
289 /* OK unless it contains a delay slot or is an `asm' insn of some type.
290 We don't know anything about these. */
291 return (GET_CODE (PATTERN (insn)) == SEQUENCE
292 || GET_CODE (PATTERN (insn)) == ASM_INPUT
293 || asm_noperands (PATTERN (insn)) >= 0);
295 default:
296 gcc_unreachable ();
300 /* Return TRUE if any resources are marked in both RES1 and RES2 or if either
301 resource set contains a volatile memory reference. Otherwise, return FALSE. */
303 static int
304 resource_conflicts_p (struct resources *res1, struct resources *res2)
306 if ((res1->cc && res2->cc) || (res1->memory && res2->memory)
307 || res1->volatil || res2->volatil)
308 return 1;
310 return hard_reg_set_intersect_p (res1->regs, res2->regs);
313 /* Return TRUE if any resource marked in RES, a `struct resources', is
314 referenced by INSN. If INCLUDE_DELAYED_EFFECTS is set, return if the called
315 routine is using those resources.
317 We compute this by computing all the resources referenced by INSN and
318 seeing if this conflicts with RES. It might be faster to directly check
319 ourselves, and this is the way it used to work, but it means duplicating
320 a large block of complex code. */
322 static int
323 insn_references_resource_p (rtx insn, struct resources *res,
324 bool include_delayed_effects)
326 struct resources insn_res;
328 CLEAR_RESOURCE (&insn_res);
329 mark_referenced_resources (insn, &insn_res, include_delayed_effects);
330 return resource_conflicts_p (&insn_res, res);
333 /* Return TRUE if INSN modifies resources that are marked in RES.
334 INCLUDE_DELAYED_EFFECTS is set if the actions of that routine should be
335 included. CC0 is only modified if it is explicitly set; see comments
336 in front of mark_set_resources for details. */
338 static int
339 insn_sets_resource_p (rtx insn, struct resources *res,
340 bool include_delayed_effects)
342 struct resources insn_sets;
344 CLEAR_RESOURCE (&insn_sets);
345 mark_set_resources (insn, &insn_sets, 0,
346 (include_delayed_effects
347 ? MARK_SRC_DEST_CALL
348 : MARK_SRC_DEST));
349 return resource_conflicts_p (&insn_sets, res);
352 /* Find a label at the end of the function or before a RETURN. If there
353 is none, try to make one. If that fails, returns 0.
355 The property of such a label is that it is placed just before the
356 epilogue or a bare RETURN insn, so that another bare RETURN can be
357 turned into a jump to the label unconditionally. In particular, the
358 label cannot be placed before a RETURN insn with a filled delay slot.
360 ??? There may be a problem with the current implementation. Suppose
361 we start with a bare RETURN insn and call find_end_label. It may set
362 function_return_label just before the RETURN. Suppose the machinery
363 is able to fill the delay slot of the RETURN insn afterwards. Then
364 function_return_label is no longer valid according to the property
365 described above and find_end_label will still return it unmodified.
366 Note that this is probably mitigated by the following observation:
367 once function_return_label is made, it is very likely the target of
368 a jump, so filling the delay slot of the RETURN will be much more
369 difficult.
370 KIND is either simple_return_rtx or ret_rtx, indicating which type of
371 return we're looking for. */
373 static rtx_code_label *
374 find_end_label (rtx kind)
376 rtx_insn *insn;
377 rtx_code_label **plabel;
379 if (kind == ret_rtx)
380 plabel = &function_return_label;
381 else
383 gcc_assert (kind == simple_return_rtx);
384 plabel = &function_simple_return_label;
387 /* If we found one previously, return it. */
388 if (*plabel)
389 return *plabel;
391 /* Otherwise, see if there is a label at the end of the function. If there
392 is, it must be that RETURN insns aren't needed, so that is our return
393 label and we don't have to do anything else. */
395 insn = get_last_insn ();
396 while (NOTE_P (insn)
397 || (NONJUMP_INSN_P (insn)
398 && (GET_CODE (PATTERN (insn)) == USE
399 || GET_CODE (PATTERN (insn)) == CLOBBER)))
400 insn = PREV_INSN (insn);
402 /* When a target threads its epilogue we might already have a
403 suitable return insn. If so put a label before it for the
404 function_return_label. */
405 if (BARRIER_P (insn)
406 && JUMP_P (PREV_INSN (insn))
407 && PATTERN (PREV_INSN (insn)) == kind)
409 rtx_insn *temp = PREV_INSN (PREV_INSN (insn));
410 rtx_code_label *label = gen_label_rtx ();
411 LABEL_NUSES (label) = 0;
413 /* Put the label before any USE insns that may precede the RETURN
414 insn. */
415 while (GET_CODE (temp) == USE)
416 temp = PREV_INSN (temp);
418 emit_label_after (label, temp);
419 *plabel = label;
422 else if (LABEL_P (insn))
423 *plabel = as_a <rtx_code_label *> (insn);
424 else
426 rtx_code_label *label = gen_label_rtx ();
427 LABEL_NUSES (label) = 0;
428 /* If the basic block reorder pass moves the return insn to
429 some other place try to locate it again and put our
430 function_return_label there. */
431 while (insn && ! (JUMP_P (insn) && (PATTERN (insn) == kind)))
432 insn = PREV_INSN (insn);
433 if (insn)
435 insn = PREV_INSN (insn);
437 /* Put the label before any USE insns that may precede the
438 RETURN insn. */
439 while (GET_CODE (insn) == USE)
440 insn = PREV_INSN (insn);
442 emit_label_after (label, insn);
444 else
446 if (targetm.have_epilogue () && ! targetm.have_return ())
447 /* The RETURN insn has its delay slot filled so we cannot
448 emit the label just before it. Since we already have
449 an epilogue and cannot emit a new RETURN, we cannot
450 emit the label at all. */
451 return NULL;
453 /* Otherwise, make a new label and emit a RETURN and BARRIER,
454 if needed. */
455 emit_label (label);
456 if (targetm.have_return ())
458 /* The return we make may have delay slots too. */
459 rtx_insn *pat = targetm.gen_return ();
460 rtx_insn *insn = emit_jump_insn (pat);
461 set_return_jump_label (insn);
462 emit_barrier ();
463 if (num_delay_slots (insn) > 0)
464 obstack_ptr_grow (&unfilled_slots_obstack, insn);
467 *plabel = label;
470 /* Show one additional use for this label so it won't go away until
471 we are done. */
472 ++LABEL_NUSES (*plabel);
474 return *plabel;
477 /* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace
478 the pattern of INSN with the SEQUENCE.
480 Returns the insn containing the SEQUENCE that replaces INSN. */
482 static rtx_insn *
483 emit_delay_sequence (rtx_insn *insn, const vec<rtx_insn *> &list, int length)
485 /* Allocate the rtvec to hold the insns and the SEQUENCE. */
486 rtvec seqv = rtvec_alloc (length + 1);
487 rtx seq = gen_rtx_SEQUENCE (VOIDmode, seqv);
488 rtx_insn *seq_insn = make_insn_raw (seq);
490 /* If DELAY_INSN has a location, use it for SEQ_INSN. If DELAY_INSN does
491 not have a location, but one of the delayed insns does, we pick up a
492 location from there later. */
493 INSN_LOCATION (seq_insn) = INSN_LOCATION (insn);
495 /* Unlink INSN from the insn chain, so that we can put it into
496 the SEQUENCE. Remember where we want to emit SEQUENCE in AFTER. */
497 rtx_insn *after = PREV_INSN (insn);
498 remove_insn (insn);
499 SET_NEXT_INSN (insn) = SET_PREV_INSN (insn) = NULL;
501 /* Build our SEQUENCE and rebuild the insn chain. */
502 start_sequence ();
503 XVECEXP (seq, 0, 0) = emit_insn (insn);
505 unsigned int delay_insns = list.length ();
506 gcc_assert (delay_insns == (unsigned int) length);
507 for (unsigned int i = 0; i < delay_insns; i++)
509 rtx_insn *tem = list[i];
510 rtx note, next;
512 /* Show that this copy of the insn isn't deleted. */
513 tem->set_undeleted ();
515 /* Unlink insn from its original place, and re-emit it into
516 the sequence. */
517 SET_NEXT_INSN (tem) = SET_PREV_INSN (tem) = NULL;
518 XVECEXP (seq, 0, i + 1) = emit_insn (tem);
520 /* SPARC assembler, for instance, emit warning when debug info is output
521 into the delay slot. */
522 if (INSN_LOCATION (tem) && !INSN_LOCATION (seq_insn))
523 INSN_LOCATION (seq_insn) = INSN_LOCATION (tem);
524 INSN_LOCATION (tem) = 0;
526 for (note = REG_NOTES (tem); note; note = next)
528 next = XEXP (note, 1);
529 switch (REG_NOTE_KIND (note))
531 case REG_DEAD:
532 /* Remove any REG_DEAD notes because we can't rely on them now
533 that the insn has been moved. */
534 remove_note (tem, note);
535 break;
537 case REG_LABEL_OPERAND:
538 case REG_LABEL_TARGET:
539 /* Keep the label reference count up to date. */
540 if (LABEL_P (XEXP (note, 0)))
541 LABEL_NUSES (XEXP (note, 0)) ++;
542 break;
544 default:
545 break;
549 end_sequence ();
551 /* Splice our SEQUENCE into the insn stream where INSN used to be. */
552 add_insn_after (seq_insn, after, NULL);
554 return seq_insn;
557 /* Add INSN to DELAY_LIST and return the head of the new list. The list must
558 be in the order in which the insns are to be executed. */
560 static void
561 add_to_delay_list (rtx_insn *insn, vec<rtx_insn *> *delay_list)
563 /* If INSN has its block number recorded, clear it since we may
564 be moving the insn to a new block. */
565 clear_hashed_info_for_insn (insn);
566 delay_list->safe_push (insn);
569 /* Delete INSN from the delay slot of the insn that it is in, which may
570 produce an insn with no delay slots. Return the new insn. */
572 static rtx_insn *
573 delete_from_delay_slot (rtx_insn *insn)
575 rtx_insn *trial, *seq_insn, *prev;
576 rtx_sequence *seq;
577 int i;
578 int had_barrier = 0;
580 /* We first must find the insn containing the SEQUENCE with INSN in its
581 delay slot. Do this by finding an insn, TRIAL, where
582 PREV_INSN (NEXT_INSN (TRIAL)) != TRIAL. */
584 for (trial = insn;
585 PREV_INSN (NEXT_INSN (trial)) == trial;
586 trial = NEXT_INSN (trial))
589 seq_insn = PREV_INSN (NEXT_INSN (trial));
590 seq = as_a <rtx_sequence *> (PATTERN (seq_insn));
592 if (NEXT_INSN (seq_insn) && BARRIER_P (NEXT_INSN (seq_insn)))
593 had_barrier = 1;
595 /* Create a delay list consisting of all the insns other than the one
596 we are deleting (unless we were the only one). */
597 auto_vec<rtx_insn *, 5> delay_list;
598 if (seq->len () > 2)
599 for (i = 1; i < seq->len (); i++)
600 if (seq->insn (i) != insn)
601 add_to_delay_list (seq->insn (i), &delay_list);
603 /* Delete the old SEQUENCE, re-emit the insn that used to have the delay
604 list, and rebuild the delay list if non-empty. */
605 prev = PREV_INSN (seq_insn);
606 trial = seq->insn (0);
607 delete_related_insns (seq_insn);
608 add_insn_after (trial, prev, NULL);
610 /* If there was a barrier after the old SEQUENCE, remit it. */
611 if (had_barrier)
612 emit_barrier_after (trial);
614 /* If there are any delay insns, remit them. Otherwise clear the
615 annul flag. */
616 if (!delay_list.is_empty ())
617 trial = emit_delay_sequence (trial, delay_list, XVECLEN (seq, 0) - 2);
618 else if (JUMP_P (trial))
619 INSN_ANNULLED_BRANCH_P (trial) = 0;
621 INSN_FROM_TARGET_P (insn) = 0;
623 /* Show we need to fill this insn again. */
624 obstack_ptr_grow (&unfilled_slots_obstack, trial);
626 return trial;
629 /* Delete INSN, a JUMP_INSN. If it is a conditional jump, we must track down
630 the insn that sets CC0 for it and delete it too. */
632 static void
633 delete_scheduled_jump (rtx_insn *insn)
635 /* Delete the insn that sets cc0 for us. On machines without cc0, we could
636 delete the insn that sets the condition code, but it is hard to find it.
637 Since this case is rare anyway, don't bother trying; there would likely
638 be other insns that became dead anyway, which we wouldn't know to
639 delete. */
641 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, insn))
643 rtx note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
645 /* If a reg-note was found, it points to an insn to set CC0. This
646 insn is in the delay list of some other insn. So delete it from
647 the delay list it was in. */
648 if (note)
650 if (! FIND_REG_INC_NOTE (XEXP (note, 0), NULL_RTX)
651 && sets_cc0_p (PATTERN (XEXP (note, 0))) == 1)
652 delete_from_delay_slot (as_a <rtx_insn *> (XEXP (note, 0)));
654 else
656 /* The insn setting CC0 is our previous insn, but it may be in
657 a delay slot. It will be the last insn in the delay slot, if
658 it is. */
659 rtx_insn *trial = previous_insn (insn);
660 if (NOTE_P (trial))
661 trial = prev_nonnote_insn (trial);
662 if (sets_cc0_p (PATTERN (trial)) != 1
663 || FIND_REG_INC_NOTE (trial, NULL_RTX))
664 return;
665 if (PREV_INSN (NEXT_INSN (trial)) == trial)
666 delete_related_insns (trial);
667 else
668 delete_from_delay_slot (trial);
672 delete_related_insns (insn);
675 /* Counters for delay-slot filling. */
677 #define NUM_REORG_FUNCTIONS 2
678 #define MAX_DELAY_HISTOGRAM 3
679 #define MAX_REORG_PASSES 2
681 static int num_insns_needing_delays[NUM_REORG_FUNCTIONS][MAX_REORG_PASSES];
683 static int num_filled_delays[NUM_REORG_FUNCTIONS][MAX_DELAY_HISTOGRAM+1][MAX_REORG_PASSES];
685 static int reorg_pass_number;
687 static void
688 note_delay_statistics (int slots_filled, int index)
690 num_insns_needing_delays[index][reorg_pass_number]++;
691 if (slots_filled > MAX_DELAY_HISTOGRAM)
692 slots_filled = MAX_DELAY_HISTOGRAM;
693 num_filled_delays[index][slots_filled][reorg_pass_number]++;
696 /* Optimize the following cases:
698 1. When a conditional branch skips over only one instruction,
699 use an annulling branch and put that insn in the delay slot.
700 Use either a branch that annuls when the condition if true or
701 invert the test with a branch that annuls when the condition is
702 false. This saves insns, since otherwise we must copy an insn
703 from the L1 target.
705 (orig) (skip) (otherwise)
706 Bcc.n L1 Bcc',a L1 Bcc,a L1'
707 insn insn insn2
708 L1: L1: L1:
709 insn2 insn2 insn2
710 insn3 insn3 L1':
711 insn3
713 2. When a conditional branch skips over only one instruction,
714 and after that, it unconditionally branches somewhere else,
715 perform the similar optimization. This saves executing the
716 second branch in the case where the inverted condition is true.
718 Bcc.n L1 Bcc',a L2
719 insn insn
720 L1: L1:
721 Bra L2 Bra L2
723 INSN is a JUMP_INSN.
725 This should be expanded to skip over N insns, where N is the number
726 of delay slots required. */
728 static void
729 optimize_skip (rtx_jump_insn *insn, vec<rtx_insn *> *delay_list)
731 rtx_insn *trial = next_nonnote_insn (insn);
732 rtx_insn *next_trial = next_active_insn (trial);
733 int flags;
735 flags = get_jump_flags (insn, JUMP_LABEL (insn));
737 if (trial == 0
738 || !NONJUMP_INSN_P (trial)
739 || GET_CODE (PATTERN (trial)) == SEQUENCE
740 || recog_memoized (trial) < 0
741 || (! eligible_for_annul_false (insn, 0, trial, flags)
742 && ! eligible_for_annul_true (insn, 0, trial, flags))
743 || RTX_FRAME_RELATED_P (trial)
744 || can_throw_internal (trial))
745 return;
747 /* There are two cases where we are just executing one insn (we assume
748 here that a branch requires only one insn; this should be generalized
749 at some point): Where the branch goes around a single insn or where
750 we have one insn followed by a branch to the same label we branch to.
751 In both of these cases, inverting the jump and annulling the delay
752 slot give the same effect in fewer insns. */
753 if (next_trial == next_active_insn (JUMP_LABEL_AS_INSN (insn))
754 || (next_trial != 0
755 && simplejump_or_return_p (next_trial)
756 && JUMP_LABEL (insn) == JUMP_LABEL (next_trial)))
758 if (eligible_for_annul_false (insn, 0, trial, flags))
760 if (invert_jump (insn, JUMP_LABEL (insn), 1))
761 INSN_FROM_TARGET_P (trial) = 1;
762 else if (! eligible_for_annul_true (insn, 0, trial, flags))
763 return;
766 add_to_delay_list (trial, delay_list);
767 next_trial = next_active_insn (trial);
768 update_block (trial, trial);
769 delete_related_insns (trial);
771 /* Also, if we are targeting an unconditional
772 branch, thread our jump to the target of that branch. Don't
773 change this into a RETURN here, because it may not accept what
774 we have in the delay slot. We'll fix this up later. */
775 if (next_trial && simplejump_or_return_p (next_trial))
777 rtx target_label = JUMP_LABEL (next_trial);
778 if (ANY_RETURN_P (target_label))
779 target_label = find_end_label (target_label);
781 if (target_label)
783 /* Recompute the flags based on TARGET_LABEL since threading
784 the jump to TARGET_LABEL may change the direction of the
785 jump (which may change the circumstances in which the
786 delay slot is nullified). */
787 flags = get_jump_flags (insn, target_label);
788 if (eligible_for_annul_true (insn, 0, trial, flags))
789 reorg_redirect_jump (insn, target_label);
793 INSN_ANNULLED_BRANCH_P (insn) = 1;
797 /* Encode and return branch direction and prediction information for
798 INSN assuming it will jump to LABEL.
800 Non conditional branches return no direction information and
801 are predicted as very likely taken. */
803 static int
804 get_jump_flags (const rtx_insn *insn, rtx label)
806 int flags;
808 /* get_jump_flags can be passed any insn with delay slots, these may
809 be INSNs, CALL_INSNs, or JUMP_INSNs. Only JUMP_INSNs have branch
810 direction information, and only if they are conditional jumps.
812 If LABEL is a return, then there is no way to determine the branch
813 direction. */
814 if (JUMP_P (insn)
815 && (condjump_p (insn) || condjump_in_parallel_p (insn))
816 && !ANY_RETURN_P (label)
817 && INSN_UID (insn) <= max_uid
818 && INSN_UID (label) <= max_uid)
819 flags
820 = (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)])
821 ? ATTR_FLAG_forward : ATTR_FLAG_backward;
822 /* No valid direction information. */
823 else
824 flags = 0;
826 return flags;
829 /* Return truth value of the statement that this branch
830 is mostly taken. If we think that the branch is extremely likely
831 to be taken, we return 2. If the branch is slightly more likely to be
832 taken, return 1. If the branch is slightly less likely to be taken,
833 return 0 and if the branch is highly unlikely to be taken, return -1. */
835 static int
836 mostly_true_jump (rtx jump_insn)
838 /* If branch probabilities are available, then use that number since it
839 always gives a correct answer. */
840 rtx note = find_reg_note (jump_insn, REG_BR_PROB, 0);
841 if (note)
843 int prob = XINT (note, 0);
845 if (prob >= REG_BR_PROB_BASE * 9 / 10)
846 return 2;
847 else if (prob >= REG_BR_PROB_BASE / 2)
848 return 1;
849 else if (prob >= REG_BR_PROB_BASE / 10)
850 return 0;
851 else
852 return -1;
855 /* If there is no note, assume branches are not taken.
856 This should be rare. */
857 return 0;
860 /* Return the condition under which INSN will branch to TARGET. If TARGET
861 is zero, return the condition under which INSN will return. If INSN is
862 an unconditional branch, return const_true_rtx. If INSN isn't a simple
863 type of jump, or it doesn't go to TARGET, return 0. */
865 static rtx
866 get_branch_condition (const rtx_insn *insn, rtx target)
868 rtx pat = PATTERN (insn);
869 rtx src;
871 if (condjump_in_parallel_p (insn))
872 pat = XVECEXP (pat, 0, 0);
874 if (ANY_RETURN_P (pat) && pat == target)
875 return const_true_rtx;
877 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
878 return 0;
880 src = SET_SRC (pat);
881 if (GET_CODE (src) == LABEL_REF && label_ref_label (src) == target)
882 return const_true_rtx;
884 else if (GET_CODE (src) == IF_THEN_ELSE
885 && XEXP (src, 2) == pc_rtx
886 && ((GET_CODE (XEXP (src, 1)) == LABEL_REF
887 && label_ref_label (XEXP (src, 1)) == target)
888 || (ANY_RETURN_P (XEXP (src, 1)) && XEXP (src, 1) == target)))
889 return XEXP (src, 0);
891 else if (GET_CODE (src) == IF_THEN_ELSE
892 && XEXP (src, 1) == pc_rtx
893 && ((GET_CODE (XEXP (src, 2)) == LABEL_REF
894 && label_ref_label (XEXP (src, 2)) == target)
895 || (ANY_RETURN_P (XEXP (src, 2)) && XEXP (src, 2) == target)))
897 enum rtx_code rev;
898 rev = reversed_comparison_code (XEXP (src, 0), insn);
899 if (rev != UNKNOWN)
900 return gen_rtx_fmt_ee (rev, GET_MODE (XEXP (src, 0)),
901 XEXP (XEXP (src, 0), 0),
902 XEXP (XEXP (src, 0), 1));
905 return 0;
908 /* Return nonzero if CONDITION is more strict than the condition of
909 INSN, i.e., if INSN will always branch if CONDITION is true. */
911 static int
912 condition_dominates_p (rtx condition, const rtx_insn *insn)
914 rtx other_condition = get_branch_condition (insn, JUMP_LABEL (insn));
915 enum rtx_code code = GET_CODE (condition);
916 enum rtx_code other_code;
918 if (rtx_equal_p (condition, other_condition)
919 || other_condition == const_true_rtx)
920 return 1;
922 else if (condition == const_true_rtx || other_condition == 0)
923 return 0;
925 other_code = GET_CODE (other_condition);
926 if (GET_RTX_LENGTH (code) != 2 || GET_RTX_LENGTH (other_code) != 2
927 || ! rtx_equal_p (XEXP (condition, 0), XEXP (other_condition, 0))
928 || ! rtx_equal_p (XEXP (condition, 1), XEXP (other_condition, 1)))
929 return 0;
931 return comparison_dominates_p (code, other_code);
934 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
935 any insns already in the delay slot of JUMP. */
937 static int
938 redirect_with_delay_slots_safe_p (rtx_insn *jump, rtx newlabel, rtx seq)
940 int flags, i;
941 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (seq));
943 /* Make sure all the delay slots of this jump would still
944 be valid after threading the jump. If they are still
945 valid, then return nonzero. */
947 flags = get_jump_flags (jump, newlabel);
948 for (i = 1; i < pat->len (); i++)
949 if (! (
950 #if ANNUL_IFFALSE_SLOTS
951 (INSN_ANNULLED_BRANCH_P (jump)
952 && INSN_FROM_TARGET_P (pat->insn (i)))
953 ? eligible_for_annul_false (jump, i - 1, pat->insn (i), flags) :
954 #endif
955 #if ANNUL_IFTRUE_SLOTS
956 (INSN_ANNULLED_BRANCH_P (jump)
957 && ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
958 ? eligible_for_annul_true (jump, i - 1, pat->insn (i), flags) :
959 #endif
960 eligible_for_delay (jump, i - 1, pat->insn (i), flags)))
961 break;
963 return (i == pat->len ());
966 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
967 any insns we wish to place in the delay slot of JUMP. */
969 static int
970 redirect_with_delay_list_safe_p (rtx_insn *jump, rtx newlabel,
971 const vec<rtx_insn *> &delay_list)
973 /* Make sure all the insns in DELAY_LIST would still be
974 valid after threading the jump. If they are still
975 valid, then return nonzero. */
977 int flags = get_jump_flags (jump, newlabel);
978 unsigned int delay_insns = delay_list.length ();
979 unsigned int i = 0;
980 for (; i < delay_insns; i++)
981 if (! (
982 #if ANNUL_IFFALSE_SLOTS
983 (INSN_ANNULLED_BRANCH_P (jump)
984 && INSN_FROM_TARGET_P (delay_list[i]))
985 ? eligible_for_annul_false (jump, i, delay_list[i], flags) :
986 #endif
987 #if ANNUL_IFTRUE_SLOTS
988 (INSN_ANNULLED_BRANCH_P (jump)
989 && ! INSN_FROM_TARGET_P (delay_list[i]))
990 ? eligible_for_annul_true (jump, i, delay_list[i], flags) :
991 #endif
992 eligible_for_delay (jump, i, delay_list[i], flags)))
993 break;
995 return i == delay_insns;
998 /* DELAY_LIST is a list of insns that have already been placed into delay
999 slots. See if all of them have the same annulling status as ANNUL_TRUE_P.
1000 If not, return 0; otherwise return 1. */
1002 static int
1003 check_annul_list_true_false (int annul_true_p,
1004 const vec<rtx_insn *> &delay_list)
1006 rtx_insn *trial;
1007 unsigned int i;
1008 FOR_EACH_VEC_ELT (delay_list, i, trial)
1009 if ((annul_true_p && INSN_FROM_TARGET_P (trial))
1010 || (!annul_true_p && !INSN_FROM_TARGET_P (trial)))
1011 return 0;
1013 return 1;
1016 /* INSN branches to an insn whose pattern SEQ is a SEQUENCE. Given that
1017 the condition tested by INSN is CONDITION and the resources shown in
1018 OTHER_NEEDED are needed after INSN, see whether INSN can take all the insns
1019 from SEQ's delay list, in addition to whatever insns it may execute
1020 (in DELAY_LIST). SETS and NEEDED are denote resources already set and
1021 needed while searching for delay slot insns. Return the concatenated
1022 delay list if possible, otherwise, return 0.
1024 SLOTS_TO_FILL is the total number of slots required by INSN, and
1025 PSLOTS_FILLED points to the number filled so far (also the number of
1026 insns in DELAY_LIST). It is updated with the number that have been
1027 filled from the SEQUENCE, if any.
1029 PANNUL_P points to a nonzero value if we already know that we need
1030 to annul INSN. If this routine determines that annulling is needed,
1031 it may set that value nonzero.
1033 PNEW_THREAD points to a location that is to receive the place at which
1034 execution should continue. */
1036 static void
1037 steal_delay_list_from_target (rtx_insn *insn, rtx condition, rtx_sequence *seq,
1038 vec<rtx_insn *> *delay_list, resources *sets,
1039 struct resources *needed,
1040 struct resources *other_needed,
1041 int slots_to_fill, int *pslots_filled,
1042 int *pannul_p, rtx *pnew_thread)
1044 int slots_remaining = slots_to_fill - *pslots_filled;
1045 int total_slots_filled = *pslots_filled;
1046 auto_vec<rtx_insn *, 5> new_delay_list;
1047 int must_annul = *pannul_p;
1048 int used_annul = 0;
1049 int i;
1050 struct resources cc_set;
1051 bool *redundant;
1053 /* We can't do anything if there are more delay slots in SEQ than we
1054 can handle, or if we don't know that it will be a taken branch.
1055 We know that it will be a taken branch if it is either an unconditional
1056 branch or a conditional branch with a stricter branch condition.
1058 Also, exit if the branch has more than one set, since then it is computing
1059 other results that can't be ignored, e.g. the HPPA mov&branch instruction.
1060 ??? It may be possible to move other sets into INSN in addition to
1061 moving the instructions in the delay slots.
1063 We can not steal the delay list if one of the instructions in the
1064 current delay_list modifies the condition codes and the jump in the
1065 sequence is a conditional jump. We can not do this because we can
1066 not change the direction of the jump because the condition codes
1067 will effect the direction of the jump in the sequence. */
1069 CLEAR_RESOURCE (&cc_set);
1071 rtx_insn *trial;
1072 FOR_EACH_VEC_ELT (*delay_list, i, trial)
1074 mark_set_resources (trial, &cc_set, 0, MARK_SRC_DEST_CALL);
1075 if (insn_references_resource_p (seq->insn (0), &cc_set, false))
1076 return;
1079 if (XVECLEN (seq, 0) - 1 > slots_remaining
1080 || ! condition_dominates_p (condition, seq->insn (0))
1081 || ! single_set (seq->insn (0)))
1082 return;
1084 /* On some targets, branches with delay slots can have a limited
1085 displacement. Give the back end a chance to tell us we can't do
1086 this. */
1087 if (! targetm.can_follow_jump (insn, seq->insn (0)))
1088 return;
1090 redundant = XALLOCAVEC (bool, XVECLEN (seq, 0));
1091 for (i = 1; i < seq->len (); i++)
1093 rtx_insn *trial = seq->insn (i);
1094 int flags;
1096 if (insn_references_resource_p (trial, sets, false)
1097 || insn_sets_resource_p (trial, needed, false)
1098 || insn_sets_resource_p (trial, sets, false)
1099 /* If TRIAL sets CC0, we can't copy it, so we can't steal this
1100 delay list. */
1101 || (HAVE_cc0 && find_reg_note (trial, REG_CC_USER, NULL_RTX))
1102 /* If TRIAL is from the fallthrough code of an annulled branch insn
1103 in SEQ, we cannot use it. */
1104 || (INSN_ANNULLED_BRANCH_P (seq->insn (0))
1105 && ! INSN_FROM_TARGET_P (trial)))
1106 return;
1108 /* If this insn was already done (usually in a previous delay slot),
1109 pretend we put it in our delay slot. */
1110 redundant[i] = redundant_insn (trial, insn, new_delay_list);
1111 if (redundant[i])
1112 continue;
1114 /* We will end up re-vectoring this branch, so compute flags
1115 based on jumping to the new label. */
1116 flags = get_jump_flags (insn, JUMP_LABEL (seq->insn (0)));
1118 if (! must_annul
1119 && ((condition == const_true_rtx
1120 || (! insn_sets_resource_p (trial, other_needed, false)
1121 && ! may_trap_or_fault_p (PATTERN (trial)))))
1122 ? eligible_for_delay (insn, total_slots_filled, trial, flags)
1123 : (must_annul || (delay_list->is_empty () && new_delay_list.is_empty ()))
1124 && (must_annul = 1,
1125 check_annul_list_true_false (0, *delay_list)
1126 && check_annul_list_true_false (0, new_delay_list)
1127 && eligible_for_annul_false (insn, total_slots_filled,
1128 trial, flags)))
1130 if (must_annul)
1132 /* Frame related instructions cannot go into annulled delay
1133 slots, it messes up the dwarf info. */
1134 if (RTX_FRAME_RELATED_P (trial))
1135 return;
1136 used_annul = 1;
1138 rtx_insn *temp = copy_delay_slot_insn (trial);
1139 INSN_FROM_TARGET_P (temp) = 1;
1140 add_to_delay_list (temp, &new_delay_list);
1141 total_slots_filled++;
1143 if (--slots_remaining == 0)
1144 break;
1146 else
1147 return;
1150 /* Record the effect of the instructions that were redundant and which
1151 we therefore decided not to copy. */
1152 for (i = 1; i < seq->len (); i++)
1153 if (redundant[i])
1154 update_block (seq->insn (i), insn);
1156 /* Show the place to which we will be branching. */
1157 *pnew_thread = first_active_target_insn (JUMP_LABEL (seq->insn (0)));
1159 /* Add any new insns to the delay list and update the count of the
1160 number of slots filled. */
1161 *pslots_filled = total_slots_filled;
1162 if (used_annul)
1163 *pannul_p = 1;
1165 rtx_insn *temp;
1166 FOR_EACH_VEC_ELT (new_delay_list, i, temp)
1167 add_to_delay_list (temp, delay_list);
1170 /* Similar to steal_delay_list_from_target except that SEQ is on the
1171 fallthrough path of INSN. Here we only do something if the delay insn
1172 of SEQ is an unconditional branch. In that case we steal its delay slot
1173 for INSN since unconditional branches are much easier to fill. */
1175 static void
1176 steal_delay_list_from_fallthrough (rtx_insn *insn, rtx condition,
1177 rtx_sequence *seq,
1178 vec<rtx_insn *> *delay_list,
1179 struct resources *sets,
1180 struct resources *needed,
1181 struct resources *other_needed,
1182 int slots_to_fill, int *pslots_filled,
1183 int *pannul_p)
1185 int i;
1186 int flags;
1187 int must_annul = *pannul_p;
1188 int used_annul = 0;
1190 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1192 /* We can't do anything if SEQ's delay insn isn't an
1193 unconditional branch. */
1195 if (! simplejump_or_return_p (seq->insn (0)))
1196 return;
1198 for (i = 1; i < seq->len (); i++)
1200 rtx_insn *trial = seq->insn (i);
1202 /* If TRIAL sets CC0, stealing it will move it too far from the use
1203 of CC0. */
1204 if (insn_references_resource_p (trial, sets, false)
1205 || insn_sets_resource_p (trial, needed, false)
1206 || insn_sets_resource_p (trial, sets, false)
1207 || (HAVE_cc0 && sets_cc0_p (PATTERN (trial))))
1209 break;
1211 /* If this insn was already done, we don't need it. */
1212 if (redundant_insn (trial, insn, *delay_list))
1214 update_block (trial, insn);
1215 delete_from_delay_slot (trial);
1216 continue;
1219 if (! must_annul
1220 && ((condition == const_true_rtx
1221 || (! insn_sets_resource_p (trial, other_needed, false)
1222 && ! may_trap_or_fault_p (PATTERN (trial)))))
1223 ? eligible_for_delay (insn, *pslots_filled, trial, flags)
1224 : (must_annul || delay_list->is_empty ()) && (must_annul = 1,
1225 check_annul_list_true_false (1, *delay_list)
1226 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
1228 if (must_annul)
1229 used_annul = 1;
1230 delete_from_delay_slot (trial);
1231 add_to_delay_list (trial, delay_list);
1233 if (++(*pslots_filled) == slots_to_fill)
1234 break;
1236 else
1237 break;
1240 if (used_annul)
1241 *pannul_p = 1;
1244 /* Try merging insns starting at THREAD which match exactly the insns in
1245 INSN's delay list.
1247 If all insns were matched and the insn was previously annulling, the
1248 annul bit will be cleared.
1250 For each insn that is merged, if the branch is or will be non-annulling,
1251 we delete the merged insn. */
1253 static void
1254 try_merge_delay_insns (rtx_insn *insn, rtx_insn *thread)
1256 rtx_insn *trial, *next_trial;
1257 rtx_insn *delay_insn = as_a <rtx_insn *> (XVECEXP (PATTERN (insn), 0, 0));
1258 int annul_p = JUMP_P (delay_insn) && INSN_ANNULLED_BRANCH_P (delay_insn);
1259 int slot_number = 1;
1260 int num_slots = XVECLEN (PATTERN (insn), 0);
1261 rtx next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1262 struct resources set, needed, modified;
1263 auto_vec<std::pair<rtx_insn *, bool>, 10> merged_insns;
1264 int flags;
1266 flags = get_jump_flags (delay_insn, JUMP_LABEL (delay_insn));
1268 CLEAR_RESOURCE (&needed);
1269 CLEAR_RESOURCE (&set);
1271 /* If this is not an annulling branch, take into account anything needed in
1272 INSN's delay slot. This prevents two increments from being incorrectly
1273 folded into one. If we are annulling, this would be the correct
1274 thing to do. (The alternative, looking at things set in NEXT_TO_MATCH
1275 will essentially disable this optimization. This method is somewhat of
1276 a kludge, but I don't see a better way.) */
1277 if (! annul_p)
1278 for (int i = 1; i < num_slots; i++)
1279 if (XVECEXP (PATTERN (insn), 0, i))
1280 mark_referenced_resources (XVECEXP (PATTERN (insn), 0, i), &needed,
1281 true);
1283 for (trial = thread; !stop_search_p (trial, 1); trial = next_trial)
1285 rtx pat = PATTERN (trial);
1286 rtx oldtrial = trial;
1288 next_trial = next_nonnote_insn (trial);
1290 /* TRIAL must be a CALL_INSN or INSN. Skip USE and CLOBBER. */
1291 if (NONJUMP_INSN_P (trial)
1292 && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER))
1293 continue;
1295 if (GET_CODE (next_to_match) == GET_CODE (trial)
1296 /* We can't share an insn that sets cc0. */
1297 && (!HAVE_cc0 || ! sets_cc0_p (pat))
1298 && ! insn_references_resource_p (trial, &set, true)
1299 && ! insn_sets_resource_p (trial, &set, true)
1300 && ! insn_sets_resource_p (trial, &needed, true)
1301 && (trial = try_split (pat, trial, 0)) != 0
1302 /* Update next_trial, in case try_split succeeded. */
1303 && (next_trial = next_nonnote_insn (trial))
1304 /* Likewise THREAD. */
1305 && (thread = oldtrial == thread ? trial : thread)
1306 && rtx_equal_p (PATTERN (next_to_match), PATTERN (trial))
1307 /* Have to test this condition if annul condition is different
1308 from (and less restrictive than) non-annulling one. */
1309 && eligible_for_delay (delay_insn, slot_number - 1, trial, flags))
1312 if (! annul_p)
1314 update_block (trial, thread);
1315 if (trial == thread)
1316 thread = next_active_insn (thread);
1318 delete_related_insns (trial);
1319 INSN_FROM_TARGET_P (next_to_match) = 0;
1321 else
1322 merged_insns.safe_push (std::pair<rtx_insn *, bool> (trial, false));
1324 if (++slot_number == num_slots)
1325 break;
1327 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1330 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
1331 mark_referenced_resources (trial, &needed, true);
1334 /* See if we stopped on a filled insn. If we did, try to see if its
1335 delay slots match. */
1336 if (slot_number != num_slots
1337 && trial && NONJUMP_INSN_P (trial)
1338 && GET_CODE (PATTERN (trial)) == SEQUENCE
1339 && !(JUMP_P (XVECEXP (PATTERN (trial), 0, 0))
1340 && INSN_ANNULLED_BRANCH_P (XVECEXP (PATTERN (trial), 0, 0))))
1342 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (trial));
1343 rtx filled_insn = XVECEXP (pat, 0, 0);
1345 /* Account for resources set/needed by the filled insn. */
1346 mark_set_resources (filled_insn, &set, 0, MARK_SRC_DEST_CALL);
1347 mark_referenced_resources (filled_insn, &needed, true);
1349 for (int i = 1; i < pat->len (); i++)
1351 rtx_insn *dtrial = pat->insn (i);
1353 CLEAR_RESOURCE (&modified);
1354 /* Account for resources set by the insn following NEXT_TO_MATCH
1355 inside INSN's delay list. */
1356 for (int j = 1; slot_number + j < num_slots; j++)
1357 mark_set_resources (XVECEXP (PATTERN (insn), 0, slot_number + j),
1358 &modified, 0, MARK_SRC_DEST_CALL);
1359 /* Account for resources set by the insn before DTRIAL and inside
1360 TRIAL's delay list. */
1361 for (int j = 1; j < i; j++)
1362 mark_set_resources (XVECEXP (pat, 0, j),
1363 &modified, 0, MARK_SRC_DEST_CALL);
1364 if (! insn_references_resource_p (dtrial, &set, true)
1365 && ! insn_sets_resource_p (dtrial, &set, true)
1366 && ! insn_sets_resource_p (dtrial, &needed, true)
1367 && (!HAVE_cc0 || ! sets_cc0_p (PATTERN (dtrial)))
1368 && rtx_equal_p (PATTERN (next_to_match), PATTERN (dtrial))
1369 /* Check that DTRIAL and NEXT_TO_MATCH does not reference a
1370 resource modified between them (only dtrial is checked because
1371 next_to_match and dtrial shall to be equal in order to hit
1372 this line) */
1373 && ! insn_references_resource_p (dtrial, &modified, true)
1374 && eligible_for_delay (delay_insn, slot_number - 1, dtrial, flags))
1376 if (! annul_p)
1378 rtx_insn *new_rtx;
1380 update_block (dtrial, thread);
1381 new_rtx = delete_from_delay_slot (dtrial);
1382 if (thread->deleted ())
1383 thread = new_rtx;
1384 INSN_FROM_TARGET_P (next_to_match) = 0;
1386 else
1387 merged_insns.safe_push (std::pair<rtx_insn *, bool> (dtrial,
1388 true));
1390 if (++slot_number == num_slots)
1391 break;
1393 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1395 else
1397 /* Keep track of the set/referenced resources for the delay
1398 slots of any trial insns we encounter. */
1399 mark_set_resources (dtrial, &set, 0, MARK_SRC_DEST_CALL);
1400 mark_referenced_resources (dtrial, &needed, true);
1405 /* If all insns in the delay slot have been matched and we were previously
1406 annulling the branch, we need not any more. In that case delete all the
1407 merged insns. Also clear the INSN_FROM_TARGET_P bit of each insn in
1408 the delay list so that we know that it isn't only being used at the
1409 target. */
1410 if (slot_number == num_slots && annul_p)
1412 unsigned int len = merged_insns.length ();
1413 for (unsigned int i = len - 1; i < len; i--)
1414 if (merged_insns[i].second)
1416 update_block (merged_insns[i].first, thread);
1417 rtx_insn *new_rtx = delete_from_delay_slot (merged_insns[i].first);
1418 if (thread->deleted ())
1419 thread = new_rtx;
1421 else
1423 update_block (merged_insns[i].first, thread);
1424 delete_related_insns (merged_insns[i].first);
1427 INSN_ANNULLED_BRANCH_P (delay_insn) = 0;
1429 for (int i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1430 INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i)) = 0;
1434 /* See if INSN is redundant with an insn in front of TARGET. Often this
1435 is called when INSN is a candidate for a delay slot of TARGET.
1436 DELAY_LIST are insns that will be placed in delay slots of TARGET in front
1437 of INSN. Often INSN will be redundant with an insn in a delay slot of
1438 some previous insn. This happens when we have a series of branches to the
1439 same label; in that case the first insn at the target might want to go
1440 into each of the delay slots.
1442 If we are not careful, this routine can take up a significant fraction
1443 of the total compilation time (4%), but only wins rarely. Hence we
1444 speed this routine up by making two passes. The first pass goes back
1445 until it hits a label and sees if it finds an insn with an identical
1446 pattern. Only in this (relatively rare) event does it check for
1447 data conflicts.
1449 We do not split insns we encounter. This could cause us not to find a
1450 redundant insn, but the cost of splitting seems greater than the possible
1451 gain in rare cases. */
1453 static rtx_insn *
1454 redundant_insn (rtx insn, rtx_insn *target, const vec<rtx_insn *> &delay_list)
1456 rtx target_main = target;
1457 rtx ipat = PATTERN (insn);
1458 rtx_insn *trial;
1459 rtx pat;
1460 struct resources needed, set;
1461 int i;
1462 unsigned insns_to_search;
1464 /* If INSN has any REG_UNUSED notes, it can't match anything since we
1465 are allowed to not actually assign to such a register. */
1466 if (find_reg_note (insn, REG_UNUSED, NULL_RTX) != 0)
1467 return 0;
1469 /* Scan backwards looking for a match. */
1470 for (trial = PREV_INSN (target),
1471 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1472 trial && insns_to_search > 0;
1473 trial = PREV_INSN (trial))
1475 /* (use (insn))s can come immediately after a barrier if the
1476 label that used to precede them has been deleted as dead.
1477 See delete_related_insns. */
1478 if (LABEL_P (trial) || BARRIER_P (trial))
1479 return 0;
1481 if (!INSN_P (trial))
1482 continue;
1483 --insns_to_search;
1485 pat = PATTERN (trial);
1486 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1487 continue;
1489 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1491 /* Stop for a CALL and its delay slots because it is difficult to
1492 track its resource needs correctly. */
1493 if (CALL_P (seq->element (0)))
1494 return 0;
1496 /* Stop for an INSN or JUMP_INSN with delayed effects and its delay
1497 slots because it is difficult to track its resource needs
1498 correctly. */
1500 if (INSN_SETS_ARE_DELAYED (seq->insn (0)))
1501 return 0;
1503 if (INSN_REFERENCES_ARE_DELAYED (seq->insn (0)))
1504 return 0;
1506 /* See if any of the insns in the delay slot match, updating
1507 resource requirements as we go. */
1508 for (i = seq->len () - 1; i > 0; i--)
1509 if (GET_CODE (seq->element (i)) == GET_CODE (insn)
1510 && rtx_equal_p (PATTERN (seq->element (i)), ipat)
1511 && ! find_reg_note (seq->element (i), REG_UNUSED, NULL_RTX))
1512 break;
1514 /* If found a match, exit this loop early. */
1515 if (i > 0)
1516 break;
1519 else if (GET_CODE (trial) == GET_CODE (insn) && rtx_equal_p (pat, ipat)
1520 && ! find_reg_note (trial, REG_UNUSED, NULL_RTX))
1521 break;
1524 /* If we didn't find an insn that matches, return 0. */
1525 if (trial == 0)
1526 return 0;
1528 /* See what resources this insn sets and needs. If they overlap, or
1529 if this insn references CC0, it can't be redundant. */
1531 CLEAR_RESOURCE (&needed);
1532 CLEAR_RESOURCE (&set);
1533 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
1534 mark_referenced_resources (insn, &needed, true);
1536 /* If TARGET is a SEQUENCE, get the main insn. */
1537 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1538 target_main = XVECEXP (PATTERN (target), 0, 0);
1540 if (resource_conflicts_p (&needed, &set)
1541 || (HAVE_cc0 && reg_mentioned_p (cc0_rtx, ipat))
1542 /* The insn requiring the delay may not set anything needed or set by
1543 INSN. */
1544 || insn_sets_resource_p (target_main, &needed, true)
1545 || insn_sets_resource_p (target_main, &set, true))
1546 return 0;
1548 /* Insns we pass may not set either NEEDED or SET, so merge them for
1549 simpler tests. */
1550 needed.memory |= set.memory;
1551 IOR_HARD_REG_SET (needed.regs, set.regs);
1553 /* This insn isn't redundant if it conflicts with an insn that either is
1554 or will be in a delay slot of TARGET. */
1556 unsigned int j;
1557 rtx_insn *temp;
1558 FOR_EACH_VEC_ELT (delay_list, j, temp)
1559 if (insn_sets_resource_p (temp, &needed, true))
1560 return 0;
1562 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1563 for (i = 1; i < XVECLEN (PATTERN (target), 0); i++)
1564 if (insn_sets_resource_p (XVECEXP (PATTERN (target), 0, i), &needed,
1565 true))
1566 return 0;
1568 /* Scan backwards until we reach a label or an insn that uses something
1569 INSN sets or sets something insn uses or sets. */
1571 for (trial = PREV_INSN (target),
1572 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1573 trial && !LABEL_P (trial) && insns_to_search > 0;
1574 trial = PREV_INSN (trial))
1576 if (!INSN_P (trial))
1577 continue;
1578 --insns_to_search;
1580 pat = PATTERN (trial);
1581 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1582 continue;
1584 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1586 bool annul_p = false;
1587 rtx_insn *control = seq->insn (0);
1589 /* If this is a CALL_INSN and its delay slots, it is hard to track
1590 the resource needs properly, so give up. */
1591 if (CALL_P (control))
1592 return 0;
1594 /* If this is an INSN or JUMP_INSN with delayed effects, it
1595 is hard to track the resource needs properly, so give up. */
1597 if (INSN_SETS_ARE_DELAYED (control))
1598 return 0;
1600 if (INSN_REFERENCES_ARE_DELAYED (control))
1601 return 0;
1603 if (JUMP_P (control))
1604 annul_p = INSN_ANNULLED_BRANCH_P (control);
1606 /* See if any of the insns in the delay slot match, updating
1607 resource requirements as we go. */
1608 for (i = seq->len () - 1; i > 0; i--)
1610 rtx_insn *candidate = seq->insn (i);
1612 /* If an insn will be annulled if the branch is false, it isn't
1613 considered as a possible duplicate insn. */
1614 if (rtx_equal_p (PATTERN (candidate), ipat)
1615 && ! (annul_p && INSN_FROM_TARGET_P (candidate)))
1617 /* Show that this insn will be used in the sequel. */
1618 INSN_FROM_TARGET_P (candidate) = 0;
1619 return candidate;
1622 /* Unless this is an annulled insn from the target of a branch,
1623 we must stop if it sets anything needed or set by INSN. */
1624 if ((!annul_p || !INSN_FROM_TARGET_P (candidate))
1625 && insn_sets_resource_p (candidate, &needed, true))
1626 return 0;
1629 /* If the insn requiring the delay slot conflicts with INSN, we
1630 must stop. */
1631 if (insn_sets_resource_p (control, &needed, true))
1632 return 0;
1634 else
1636 /* See if TRIAL is the same as INSN. */
1637 pat = PATTERN (trial);
1638 if (rtx_equal_p (pat, ipat))
1639 return trial;
1641 /* Can't go any further if TRIAL conflicts with INSN. */
1642 if (insn_sets_resource_p (trial, &needed, true))
1643 return 0;
1647 return 0;
1650 /* Return 1 if THREAD can only be executed in one way. If LABEL is nonzero,
1651 it is the target of the branch insn being scanned. If ALLOW_FALLTHROUGH
1652 is nonzero, we are allowed to fall into this thread; otherwise, we are
1653 not.
1655 If LABEL is used more than one or we pass a label other than LABEL before
1656 finding an active insn, we do not own this thread. */
1658 static int
1659 own_thread_p (rtx thread, rtx label, int allow_fallthrough)
1661 rtx_insn *active_insn;
1662 rtx_insn *insn;
1664 /* We don't own the function end. */
1665 if (thread == 0 || ANY_RETURN_P (thread))
1666 return 0;
1668 /* We have a non-NULL insn. */
1669 rtx_insn *thread_insn = as_a <rtx_insn *> (thread);
1671 /* Get the first active insn, or THREAD_INSN, if it is an active insn. */
1672 active_insn = next_active_insn (PREV_INSN (thread_insn));
1674 for (insn = thread_insn; insn != active_insn; insn = NEXT_INSN (insn))
1675 if (LABEL_P (insn)
1676 && (insn != label || LABEL_NUSES (insn) != 1))
1677 return 0;
1679 if (allow_fallthrough)
1680 return 1;
1682 /* Ensure that we reach a BARRIER before any insn or label. */
1683 for (insn = prev_nonnote_insn (thread_insn);
1684 insn == 0 || !BARRIER_P (insn);
1685 insn = prev_nonnote_insn (insn))
1686 if (insn == 0
1687 || LABEL_P (insn)
1688 || (NONJUMP_INSN_P (insn)
1689 && GET_CODE (PATTERN (insn)) != USE
1690 && GET_CODE (PATTERN (insn)) != CLOBBER))
1691 return 0;
1693 return 1;
1696 /* Called when INSN is being moved from a location near the target of a jump.
1697 We leave a marker of the form (use (INSN)) immediately in front
1698 of WHERE for mark_target_live_regs. These markers will be deleted when
1699 reorg finishes.
1701 We used to try to update the live status of registers if WHERE is at
1702 the start of a basic block, but that can't work since we may remove a
1703 BARRIER in relax_delay_slots. */
1705 static void
1706 update_block (rtx_insn *insn, rtx_insn *where)
1708 /* Ignore if this was in a delay slot and it came from the target of
1709 a branch. */
1710 if (INSN_FROM_TARGET_P (insn))
1711 return;
1713 emit_insn_before (gen_rtx_USE (VOIDmode, insn), where);
1715 /* INSN might be making a value live in a block where it didn't use to
1716 be. So recompute liveness information for this block. */
1718 incr_ticks_for_insn (insn);
1721 /* Similar to REDIRECT_JUMP except that we update the BB_TICKS entry for
1722 the basic block containing the jump. */
1724 static int
1725 reorg_redirect_jump (rtx_jump_insn *jump, rtx nlabel)
1727 incr_ticks_for_insn (jump);
1728 return redirect_jump (jump, nlabel, 1);
1731 /* Called when INSN is being moved forward into a delay slot of DELAYED_INSN.
1732 We check every instruction between INSN and DELAYED_INSN for REG_DEAD notes
1733 that reference values used in INSN. If we find one, then we move the
1734 REG_DEAD note to INSN.
1736 This is needed to handle the case where a later insn (after INSN) has a
1737 REG_DEAD note for a register used by INSN, and this later insn subsequently
1738 gets moved before a CODE_LABEL because it is a redundant insn. In this
1739 case, mark_target_live_regs may be confused into thinking the register
1740 is dead because it sees a REG_DEAD note immediately before a CODE_LABEL. */
1742 static void
1743 update_reg_dead_notes (rtx_insn *insn, rtx_insn *delayed_insn)
1745 rtx link, next;
1746 rtx_insn *p;
1748 for (p = next_nonnote_insn (insn); p != delayed_insn;
1749 p = next_nonnote_insn (p))
1750 for (link = REG_NOTES (p); link; link = next)
1752 next = XEXP (link, 1);
1754 if (REG_NOTE_KIND (link) != REG_DEAD
1755 || !REG_P (XEXP (link, 0)))
1756 continue;
1758 if (reg_referenced_p (XEXP (link, 0), PATTERN (insn)))
1760 /* Move the REG_DEAD note from P to INSN. */
1761 remove_note (p, link);
1762 XEXP (link, 1) = REG_NOTES (insn);
1763 REG_NOTES (insn) = link;
1768 /* Called when an insn redundant with start_insn is deleted. If there
1769 is a REG_DEAD note for the target of start_insn between start_insn
1770 and stop_insn, then the REG_DEAD note needs to be deleted since the
1771 value no longer dies there.
1773 If the REG_DEAD note isn't deleted, then mark_target_live_regs may be
1774 confused into thinking the register is dead. */
1776 static void
1777 fix_reg_dead_note (rtx_insn *start_insn, rtx stop_insn)
1779 rtx link, next;
1780 rtx_insn *p;
1782 for (p = next_nonnote_insn (start_insn); p != stop_insn;
1783 p = next_nonnote_insn (p))
1784 for (link = REG_NOTES (p); link; link = next)
1786 next = XEXP (link, 1);
1788 if (REG_NOTE_KIND (link) != REG_DEAD
1789 || !REG_P (XEXP (link, 0)))
1790 continue;
1792 if (reg_set_p (XEXP (link, 0), PATTERN (start_insn)))
1794 remove_note (p, link);
1795 return;
1800 /* Delete any REG_UNUSED notes that exist on INSN but not on REDUNDANT_INSN.
1802 This handles the case of udivmodXi4 instructions which optimize their
1803 output depending on whether any REG_UNUSED notes are present.
1804 we must make sure that INSN calculates as many results as REDUNDANT_INSN
1805 does. */
1807 static void
1808 update_reg_unused_notes (rtx_insn *insn, rtx redundant_insn)
1810 rtx link, next;
1812 for (link = REG_NOTES (insn); link; link = next)
1814 next = XEXP (link, 1);
1816 if (REG_NOTE_KIND (link) != REG_UNUSED
1817 || !REG_P (XEXP (link, 0)))
1818 continue;
1820 if (! find_regno_note (redundant_insn, REG_UNUSED,
1821 REGNO (XEXP (link, 0))))
1822 remove_note (insn, link);
1826 static vec <rtx> sibling_labels;
1828 /* Return the label before INSN, or put a new label there. If SIBLING is
1829 non-zero, it is another label associated with the new label (if any),
1830 typically the former target of the jump that will be redirected to
1831 the new label. */
1833 static rtx_insn *
1834 get_label_before (rtx_insn *insn, rtx sibling)
1836 rtx_insn *label;
1838 /* Find an existing label at this point
1839 or make a new one if there is none. */
1840 label = prev_nonnote_insn (insn);
1842 if (label == 0 || !LABEL_P (label))
1844 rtx_insn *prev = PREV_INSN (insn);
1846 label = gen_label_rtx ();
1847 emit_label_after (label, prev);
1848 LABEL_NUSES (label) = 0;
1849 if (sibling)
1851 sibling_labels.safe_push (label);
1852 sibling_labels.safe_push (sibling);
1855 return label;
1858 /* Scan a function looking for insns that need a delay slot and find insns to
1859 put into the delay slot.
1861 NON_JUMPS_P is nonzero if we are to only try to fill non-jump insns (such
1862 as calls). We do these first since we don't want jump insns (that are
1863 easier to fill) to get the only insns that could be used for non-jump insns.
1864 When it is zero, only try to fill JUMP_INSNs.
1866 When slots are filled in this manner, the insns (including the
1867 delay_insn) are put together in a SEQUENCE rtx. In this fashion,
1868 it is possible to tell whether a delay slot has really been filled
1869 or not. `final' knows how to deal with this, by communicating
1870 through FINAL_SEQUENCE. */
1872 static void
1873 fill_simple_delay_slots (int non_jumps_p)
1875 rtx_insn *insn, *trial, *next_trial;
1876 rtx pat;
1877 int i;
1878 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
1879 struct resources needed, set;
1880 int slots_to_fill, slots_filled;
1881 auto_vec<rtx_insn *, 5> delay_list;
1883 for (i = 0; i < num_unfilled_slots; i++)
1885 int flags;
1886 /* Get the next insn to fill. If it has already had any slots assigned,
1887 we can't do anything with it. Maybe we'll improve this later. */
1889 insn = unfilled_slots_base[i];
1890 if (insn == 0
1891 || insn->deleted ()
1892 || (NONJUMP_INSN_P (insn)
1893 && GET_CODE (PATTERN (insn)) == SEQUENCE)
1894 || (JUMP_P (insn) && non_jumps_p)
1895 || (!JUMP_P (insn) && ! non_jumps_p))
1896 continue;
1898 /* It may have been that this insn used to need delay slots, but
1899 now doesn't; ignore in that case. This can happen, for example,
1900 on the HP PA RISC, where the number of delay slots depends on
1901 what insns are nearby. */
1902 slots_to_fill = num_delay_slots (insn);
1904 /* Some machine description have defined instructions to have
1905 delay slots only in certain circumstances which may depend on
1906 nearby insns (which change due to reorg's actions).
1908 For example, the PA port normally has delay slots for unconditional
1909 jumps.
1911 However, the PA port claims such jumps do not have a delay slot
1912 if they are immediate successors of certain CALL_INSNs. This
1913 allows the port to favor filling the delay slot of the call with
1914 the unconditional jump. */
1915 if (slots_to_fill == 0)
1916 continue;
1918 /* This insn needs, or can use, some delay slots. SLOTS_TO_FILL
1919 says how many. After initialization, first try optimizing
1921 call _foo call _foo
1922 nop add %o7,.-L1,%o7
1923 b,a L1
1926 If this case applies, the delay slot of the call is filled with
1927 the unconditional jump. This is done first to avoid having the
1928 delay slot of the call filled in the backward scan. Also, since
1929 the unconditional jump is likely to also have a delay slot, that
1930 insn must exist when it is subsequently scanned.
1932 This is tried on each insn with delay slots as some machines
1933 have insns which perform calls, but are not represented as
1934 CALL_INSNs. */
1936 slots_filled = 0;
1937 delay_list.truncate (0);
1939 if (JUMP_P (insn))
1940 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1941 else
1942 flags = get_jump_flags (insn, NULL_RTX);
1944 if ((trial = next_active_insn (insn))
1945 && JUMP_P (trial)
1946 && simplejump_p (trial)
1947 && eligible_for_delay (insn, slots_filled, trial, flags)
1948 && no_labels_between_p (insn, trial)
1949 && ! can_throw_internal (trial))
1951 rtx_insn **tmp;
1952 slots_filled++;
1953 add_to_delay_list (trial, &delay_list);
1955 /* TRIAL may have had its delay slot filled, then unfilled. When
1956 the delay slot is unfilled, TRIAL is placed back on the unfilled
1957 slots obstack. Unfortunately, it is placed on the end of the
1958 obstack, not in its original location. Therefore, we must search
1959 from entry i + 1 to the end of the unfilled slots obstack to
1960 try and find TRIAL. */
1961 tmp = &unfilled_slots_base[i + 1];
1962 while (*tmp != trial && tmp != unfilled_slots_next)
1963 tmp++;
1965 /* Remove the unconditional jump from consideration for delay slot
1966 filling and unthread it. */
1967 if (*tmp == trial)
1968 *tmp = 0;
1970 rtx_insn *next = NEXT_INSN (trial);
1971 rtx_insn *prev = PREV_INSN (trial);
1972 if (prev)
1973 SET_NEXT_INSN (prev) = next;
1974 if (next)
1975 SET_PREV_INSN (next) = prev;
1979 /* Now, scan backwards from the insn to search for a potential
1980 delay-slot candidate. Stop searching when a label or jump is hit.
1982 For each candidate, if it is to go into the delay slot (moved
1983 forward in execution sequence), it must not need or set any resources
1984 that were set by later insns and must not set any resources that
1985 are needed for those insns.
1987 The delay slot insn itself sets resources unless it is a call
1988 (in which case the called routine, not the insn itself, is doing
1989 the setting). */
1991 if (slots_filled < slots_to_fill)
1993 /* If the flags register is dead after the insn, then we want to be
1994 able to accept a candidate that clobbers it. For this purpose,
1995 we need to filter the flags register during life analysis, so
1996 that it doesn't create RAW and WAW dependencies, while still
1997 creating the necessary WAR dependencies. */
1998 bool filter_flags
1999 = (slots_to_fill == 1
2000 && targetm.flags_regnum != INVALID_REGNUM
2001 && find_regno_note (insn, REG_DEAD, targetm.flags_regnum));
2002 struct resources fset;
2003 CLEAR_RESOURCE (&needed);
2004 CLEAR_RESOURCE (&set);
2005 mark_set_resources (insn, &set, 0, MARK_SRC_DEST);
2006 if (filter_flags)
2008 CLEAR_RESOURCE (&fset);
2009 mark_set_resources (insn, &fset, 0, MARK_SRC_DEST);
2011 mark_referenced_resources (insn, &needed, false);
2013 for (trial = prev_nonnote_insn (insn); ! stop_search_p (trial, 1);
2014 trial = next_trial)
2016 next_trial = prev_nonnote_insn (trial);
2018 /* This must be an INSN or CALL_INSN. */
2019 pat = PATTERN (trial);
2021 /* Stand-alone USE and CLOBBER are just for flow. */
2022 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2023 continue;
2025 /* Check for resource conflict first, to avoid unnecessary
2026 splitting. */
2027 if (! insn_references_resource_p (trial, &set, true)
2028 && ! insn_sets_resource_p (trial,
2029 filter_flags ? &fset : &set,
2030 true)
2031 && ! insn_sets_resource_p (trial, &needed, true)
2032 /* Can't separate set of cc0 from its use. */
2033 && (!HAVE_cc0 || ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat)))
2034 && ! can_throw_internal (trial))
2036 trial = try_split (pat, trial, 1);
2037 next_trial = prev_nonnote_insn (trial);
2038 if (eligible_for_delay (insn, slots_filled, trial, flags))
2040 /* In this case, we are searching backward, so if we
2041 find insns to put on the delay list, we want
2042 to put them at the head, rather than the
2043 tail, of the list. */
2045 update_reg_dead_notes (trial, insn);
2046 delay_list.safe_insert (0, trial);
2047 update_block (trial, trial);
2048 delete_related_insns (trial);
2049 if (slots_to_fill == ++slots_filled)
2050 break;
2051 continue;
2055 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2056 if (filter_flags)
2058 mark_set_resources (trial, &fset, 0, MARK_SRC_DEST_CALL);
2059 /* If the flags register is set, then it doesn't create RAW
2060 dependencies any longer and it also doesn't create WAW
2061 dependencies since it's dead after the original insn. */
2062 if (TEST_HARD_REG_BIT (fset.regs, targetm.flags_regnum))
2064 CLEAR_HARD_REG_BIT (needed.regs, targetm.flags_regnum);
2065 CLEAR_HARD_REG_BIT (fset.regs, targetm.flags_regnum);
2068 mark_referenced_resources (trial, &needed, true);
2072 /* If all needed slots haven't been filled, we come here. */
2074 /* Try to optimize case of jumping around a single insn. */
2075 if ((ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
2076 && slots_filled != slots_to_fill
2077 && delay_list.is_empty ()
2078 && JUMP_P (insn)
2079 && (condjump_p (insn) || condjump_in_parallel_p (insn))
2080 && !ANY_RETURN_P (JUMP_LABEL (insn)))
2082 optimize_skip (as_a <rtx_jump_insn *> (insn), &delay_list);
2083 if (!delay_list.is_empty ())
2084 slots_filled += 1;
2087 /* Try to get insns from beyond the insn needing the delay slot.
2088 These insns can neither set or reference resources set in insns being
2089 skipped, cannot set resources in the insn being skipped, and, if this
2090 is a CALL_INSN (or a CALL_INSN is passed), cannot trap (because the
2091 call might not return).
2093 There used to be code which continued past the target label if
2094 we saw all uses of the target label. This code did not work,
2095 because it failed to account for some instructions which were
2096 both annulled and marked as from the target. This can happen as a
2097 result of optimize_skip. Since this code was redundant with
2098 fill_eager_delay_slots anyways, it was just deleted. */
2100 if (slots_filled != slots_to_fill
2101 /* If this instruction could throw an exception which is
2102 caught in the same function, then it's not safe to fill
2103 the delay slot with an instruction from beyond this
2104 point. For example, consider:
2106 int i = 2;
2108 try {
2109 f();
2110 i = 3;
2111 } catch (...) {}
2113 return i;
2115 Even though `i' is a local variable, we must be sure not
2116 to put `i = 3' in the delay slot if `f' might throw an
2117 exception.
2119 Presumably, we should also check to see if we could get
2120 back to this function via `setjmp'. */
2121 && ! can_throw_internal (insn)
2122 && !JUMP_P (insn))
2124 int maybe_never = 0;
2125 rtx pat, trial_delay;
2127 CLEAR_RESOURCE (&needed);
2128 CLEAR_RESOURCE (&set);
2129 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
2130 mark_referenced_resources (insn, &needed, true);
2132 if (CALL_P (insn))
2133 maybe_never = 1;
2135 for (trial = next_nonnote_insn (insn); !stop_search_p (trial, 1);
2136 trial = next_trial)
2138 next_trial = next_nonnote_insn (trial);
2140 /* This must be an INSN or CALL_INSN. */
2141 pat = PATTERN (trial);
2143 /* Stand-alone USE and CLOBBER are just for flow. */
2144 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2145 continue;
2147 /* If this already has filled delay slots, get the insn needing
2148 the delay slots. */
2149 if (GET_CODE (pat) == SEQUENCE)
2150 trial_delay = XVECEXP (pat, 0, 0);
2151 else
2152 trial_delay = trial;
2154 /* Stop our search when seeing a jump. */
2155 if (JUMP_P (trial_delay))
2156 break;
2158 /* See if we have a resource problem before we try to split. */
2159 if (GET_CODE (pat) != SEQUENCE
2160 && ! insn_references_resource_p (trial, &set, true)
2161 && ! insn_sets_resource_p (trial, &set, true)
2162 && ! insn_sets_resource_p (trial, &needed, true)
2163 && (!HAVE_cc0 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat)))
2164 && ! (maybe_never && may_trap_or_fault_p (pat))
2165 && (trial = try_split (pat, trial, 0))
2166 && eligible_for_delay (insn, slots_filled, trial, flags)
2167 && ! can_throw_internal (trial))
2169 next_trial = next_nonnote_insn (trial);
2170 add_to_delay_list (trial, &delay_list);
2171 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, pat))
2172 link_cc0_insns (trial);
2174 delete_related_insns (trial);
2175 if (slots_to_fill == ++slots_filled)
2176 break;
2177 continue;
2180 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2181 mark_referenced_resources (trial, &needed, true);
2183 /* Ensure we don't put insns between the setting of cc and the
2184 comparison by moving a setting of cc into an earlier delay
2185 slot since these insns could clobber the condition code. */
2186 set.cc = 1;
2188 /* If this is a call, we might not get here. */
2189 if (CALL_P (trial_delay))
2190 maybe_never = 1;
2193 /* If there are slots left to fill and our search was stopped by an
2194 unconditional branch, try the insn at the branch target. We can
2195 redirect the branch if it works.
2197 Don't do this if the insn at the branch target is a branch. */
2198 if (slots_to_fill != slots_filled
2199 && trial
2200 && jump_to_label_p (trial)
2201 && simplejump_p (trial)
2202 && (next_trial = next_active_insn (JUMP_LABEL_AS_INSN (trial))) != 0
2203 && ! (NONJUMP_INSN_P (next_trial)
2204 && GET_CODE (PATTERN (next_trial)) == SEQUENCE)
2205 && !JUMP_P (next_trial)
2206 && ! insn_references_resource_p (next_trial, &set, true)
2207 && ! insn_sets_resource_p (next_trial, &set, true)
2208 && ! insn_sets_resource_p (next_trial, &needed, true)
2209 && (!HAVE_cc0 || ! reg_mentioned_p (cc0_rtx, PATTERN (next_trial)))
2210 && ! (maybe_never && may_trap_or_fault_p (PATTERN (next_trial)))
2211 && (next_trial = try_split (PATTERN (next_trial), next_trial, 0))
2212 && eligible_for_delay (insn, slots_filled, next_trial, flags)
2213 && ! can_throw_internal (trial))
2215 /* See comment in relax_delay_slots about necessity of using
2216 next_real_insn here. */
2217 rtx_insn *new_label = next_real_insn (next_trial);
2219 if (new_label != 0)
2220 new_label = get_label_before (new_label, JUMP_LABEL (trial));
2221 else
2222 new_label = find_end_label (simple_return_rtx);
2224 if (new_label)
2226 add_to_delay_list (copy_delay_slot_insn (next_trial),
2227 &delay_list);
2228 slots_filled++;
2229 reorg_redirect_jump (as_a <rtx_jump_insn *> (trial),
2230 new_label);
2235 /* If this is an unconditional jump, then try to get insns from the
2236 target of the jump. */
2237 rtx_jump_insn *jump_insn;
2238 if ((jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2239 && simplejump_p (jump_insn)
2240 && slots_filled != slots_to_fill)
2241 fill_slots_from_thread (jump_insn, const_true_rtx,
2242 next_active_insn (JUMP_LABEL_AS_INSN (insn)),
2243 NULL, 1, 1, own_thread_p (JUMP_LABEL (insn),
2244 JUMP_LABEL (insn), 0),
2245 slots_to_fill, &slots_filled, &delay_list);
2247 if (!delay_list.is_empty ())
2248 unfilled_slots_base[i]
2249 = emit_delay_sequence (insn, delay_list, slots_filled);
2251 if (slots_to_fill == slots_filled)
2252 unfilled_slots_base[i] = 0;
2254 note_delay_statistics (slots_filled, 0);
2258 /* Follow any unconditional jump at LABEL, for the purpose of redirecting JUMP;
2259 return the ultimate label reached by any such chain of jumps.
2260 Return a suitable return rtx if the chain ultimately leads to a
2261 return instruction.
2262 If LABEL is not followed by a jump, return LABEL.
2263 If the chain loops or we can't find end, return LABEL,
2264 since that tells caller to avoid changing the insn.
2265 If the returned label is obtained by following a crossing jump,
2266 set *CROSSING to true, otherwise set it to false. */
2268 static rtx
2269 follow_jumps (rtx label, rtx_insn *jump, bool *crossing)
2271 rtx_insn *insn;
2272 rtx_insn *next;
2273 int depth;
2275 *crossing = false;
2276 if (ANY_RETURN_P (label))
2277 return label;
2279 rtx_insn *value = as_a <rtx_insn *> (label);
2281 for (depth = 0;
2282 (depth < 10
2283 && (insn = next_active_insn (value)) != 0
2284 && JUMP_P (insn)
2285 && JUMP_LABEL (insn) != NULL_RTX
2286 && ((any_uncondjump_p (insn) && onlyjump_p (insn))
2287 || ANY_RETURN_P (PATTERN (insn)))
2288 && (next = NEXT_INSN (insn))
2289 && BARRIER_P (next));
2290 depth++)
2292 rtx this_label_or_return = JUMP_LABEL (insn);
2294 /* If we have found a cycle, make the insn jump to itself. */
2295 if (this_label_or_return == label)
2296 return label;
2298 /* Cannot follow returns and cannot look through tablejumps. */
2299 if (ANY_RETURN_P (this_label_or_return))
2300 return this_label_or_return;
2302 rtx_insn *this_label = as_a <rtx_insn *> (this_label_or_return);
2303 if (NEXT_INSN (this_label)
2304 && JUMP_TABLE_DATA_P (NEXT_INSN (this_label)))
2305 break;
2307 if (!targetm.can_follow_jump (jump, insn))
2308 break;
2309 if (!*crossing)
2310 *crossing = CROSSING_JUMP_P (jump);
2311 value = this_label;
2313 if (depth == 10)
2314 return label;
2315 return value;
2318 /* Try to find insns to place in delay slots.
2320 INSN is the jump needing SLOTS_TO_FILL delay slots. It tests CONDITION
2321 or is an unconditional branch if CONDITION is const_true_rtx.
2322 *PSLOTS_FILLED is updated with the number of slots that we have filled.
2324 THREAD is a flow-of-control, either the insns to be executed if the
2325 branch is true or if the branch is false, THREAD_IF_TRUE says which.
2327 OPPOSITE_THREAD is the thread in the opposite direction. It is used
2328 to see if any potential delay slot insns set things needed there.
2330 LIKELY is nonzero if it is extremely likely that the branch will be
2331 taken and THREAD_IF_TRUE is set. This is used for the branch at the
2332 end of a loop back up to the top.
2334 OWN_THREAD and OWN_OPPOSITE_THREAD are true if we are the only user of the
2335 thread. I.e., it is the fallthrough code of our jump or the target of the
2336 jump when we are the only jump going there.
2338 If OWN_THREAD is false, it must be the "true" thread of a jump. In that
2339 case, we can only take insns from the head of the thread for our delay
2340 slot. We then adjust the jump to point after the insns we have taken. */
2342 static void
2343 fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
2344 rtx thread_or_return, rtx opposite_thread, int likely,
2345 int thread_if_true, int own_thread, int slots_to_fill,
2346 int *pslots_filled, vec<rtx_insn *> *delay_list)
2348 rtx new_thread;
2349 struct resources opposite_needed, set, needed;
2350 rtx_insn *trial;
2351 int lose = 0;
2352 int must_annul = 0;
2353 int flags;
2355 /* Validate our arguments. */
2356 gcc_assert (condition != const_true_rtx || thread_if_true);
2357 gcc_assert (own_thread || thread_if_true);
2359 flags = get_jump_flags (insn, JUMP_LABEL (insn));
2361 /* If our thread is the end of subroutine, we can't get any delay
2362 insns from that. */
2363 if (thread_or_return == NULL_RTX || ANY_RETURN_P (thread_or_return))
2364 return;
2366 rtx_insn *thread = as_a <rtx_insn *> (thread_or_return);
2368 /* If this is an unconditional branch, nothing is needed at the
2369 opposite thread. Otherwise, compute what is needed there. */
2370 if (condition == const_true_rtx)
2371 CLEAR_RESOURCE (&opposite_needed);
2372 else
2373 mark_target_live_regs (get_insns (), opposite_thread, &opposite_needed);
2375 /* If the insn at THREAD can be split, do it here to avoid having to
2376 update THREAD and NEW_THREAD if it is done in the loop below. Also
2377 initialize NEW_THREAD. */
2379 new_thread = thread = try_split (PATTERN (thread), thread, 0);
2381 /* Scan insns at THREAD. We are looking for an insn that can be removed
2382 from THREAD (it neither sets nor references resources that were set
2383 ahead of it and it doesn't set anything needs by the insns ahead of
2384 it) and that either can be placed in an annulling insn or aren't
2385 needed at OPPOSITE_THREAD. */
2387 CLEAR_RESOURCE (&needed);
2388 CLEAR_RESOURCE (&set);
2390 /* If we do not own this thread, we must stop as soon as we find
2391 something that we can't put in a delay slot, since all we can do
2392 is branch into THREAD at a later point. Therefore, labels stop
2393 the search if this is not the `true' thread. */
2395 for (trial = thread;
2396 ! stop_search_p (trial, ! thread_if_true) && (! lose || own_thread);
2397 trial = next_nonnote_insn (trial))
2399 rtx pat, old_trial;
2401 /* If we have passed a label, we no longer own this thread. */
2402 if (LABEL_P (trial))
2404 own_thread = 0;
2405 continue;
2408 pat = PATTERN (trial);
2409 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2410 continue;
2412 /* If TRIAL conflicts with the insns ahead of it, we lose. Also,
2413 don't separate or copy insns that set and use CC0. */
2414 if (! insn_references_resource_p (trial, &set, true)
2415 && ! insn_sets_resource_p (trial, &set, true)
2416 && ! insn_sets_resource_p (trial, &needed, true)
2417 && (!HAVE_cc0 || (! (reg_mentioned_p (cc0_rtx, pat)
2418 && (! own_thread || ! sets_cc0_p (pat)))))
2419 && ! can_throw_internal (trial))
2421 rtx_insn *prior_insn;
2423 /* If TRIAL is redundant with some insn before INSN, we don't
2424 actually need to add it to the delay list; we can merely pretend
2425 we did. */
2426 if ((prior_insn = redundant_insn (trial, insn, *delay_list)))
2428 fix_reg_dead_note (prior_insn, insn);
2429 if (own_thread)
2431 update_block (trial, thread);
2432 if (trial == thread)
2434 thread = next_active_insn (thread);
2435 if (new_thread == trial)
2436 new_thread = thread;
2439 delete_related_insns (trial);
2441 else
2443 update_reg_unused_notes (prior_insn, trial);
2444 new_thread = next_active_insn (trial);
2447 continue;
2450 /* There are two ways we can win: If TRIAL doesn't set anything
2451 needed at the opposite thread and can't trap, or if it can
2452 go into an annulled delay slot. But we want neither to copy
2453 nor to speculate frame-related insns. */
2454 if (!must_annul
2455 && ((condition == const_true_rtx
2456 && (own_thread || !RTX_FRAME_RELATED_P (trial)))
2457 || (! insn_sets_resource_p (trial, &opposite_needed, true)
2458 && ! may_trap_or_fault_p (pat)
2459 && ! RTX_FRAME_RELATED_P (trial))))
2461 old_trial = trial;
2462 trial = try_split (pat, trial, 0);
2463 if (new_thread == old_trial)
2464 new_thread = trial;
2465 if (thread == old_trial)
2466 thread = trial;
2467 pat = PATTERN (trial);
2468 if (eligible_for_delay (insn, *pslots_filled, trial, flags))
2469 goto winner;
2471 else if (!RTX_FRAME_RELATED_P (trial)
2472 && ((ANNUL_IFTRUE_SLOTS && ! thread_if_true)
2473 || (ANNUL_IFFALSE_SLOTS && thread_if_true)))
2475 old_trial = trial;
2476 trial = try_split (pat, trial, 0);
2477 if (new_thread == old_trial)
2478 new_thread = trial;
2479 if (thread == old_trial)
2480 thread = trial;
2481 pat = PATTERN (trial);
2482 if ((must_annul || delay_list->is_empty ()) && (thread_if_true
2483 ? check_annul_list_true_false (0, *delay_list)
2484 && eligible_for_annul_false (insn, *pslots_filled, trial, flags)
2485 : check_annul_list_true_false (1, *delay_list)
2486 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
2488 rtx_insn *temp;
2490 must_annul = 1;
2491 winner:
2493 if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, pat))
2494 link_cc0_insns (trial);
2496 /* If we own this thread, delete the insn. If this is the
2497 destination of a branch, show that a basic block status
2498 may have been updated. In any case, mark the new
2499 starting point of this thread. */
2500 if (own_thread)
2502 rtx note;
2504 update_block (trial, thread);
2505 if (trial == thread)
2507 thread = next_active_insn (thread);
2508 if (new_thread == trial)
2509 new_thread = thread;
2512 /* We are moving this insn, not deleting it. We must
2513 temporarily increment the use count on any referenced
2514 label lest it be deleted by delete_related_insns. */
2515 for (note = REG_NOTES (trial);
2516 note != NULL_RTX;
2517 note = XEXP (note, 1))
2518 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2519 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2521 /* REG_LABEL_OPERAND could be
2522 NOTE_INSN_DELETED_LABEL too. */
2523 if (LABEL_P (XEXP (note, 0)))
2524 LABEL_NUSES (XEXP (note, 0))++;
2525 else
2526 gcc_assert (REG_NOTE_KIND (note)
2527 == REG_LABEL_OPERAND);
2529 if (jump_to_label_p (trial))
2530 LABEL_NUSES (JUMP_LABEL (trial))++;
2532 delete_related_insns (trial);
2534 for (note = REG_NOTES (trial);
2535 note != NULL_RTX;
2536 note = XEXP (note, 1))
2537 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2538 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2540 /* REG_LABEL_OPERAND could be
2541 NOTE_INSN_DELETED_LABEL too. */
2542 if (LABEL_P (XEXP (note, 0)))
2543 LABEL_NUSES (XEXP (note, 0))--;
2544 else
2545 gcc_assert (REG_NOTE_KIND (note)
2546 == REG_LABEL_OPERAND);
2548 if (jump_to_label_p (trial))
2549 LABEL_NUSES (JUMP_LABEL (trial))--;
2551 else
2552 new_thread = next_active_insn (trial);
2554 temp = own_thread ? trial : copy_delay_slot_insn (trial);
2555 if (thread_if_true)
2556 INSN_FROM_TARGET_P (temp) = 1;
2558 add_to_delay_list (temp, delay_list);
2560 if (slots_to_fill == ++(*pslots_filled))
2562 /* Even though we have filled all the slots, we
2563 may be branching to a location that has a
2564 redundant insn. Skip any if so. */
2565 while (new_thread && ! own_thread
2566 && ! insn_sets_resource_p (new_thread, &set, true)
2567 && ! insn_sets_resource_p (new_thread, &needed,
2568 true)
2569 && ! insn_references_resource_p (new_thread,
2570 &set, true)
2571 && (prior_insn
2572 = redundant_insn (new_thread, insn,
2573 *delay_list)))
2575 /* We know we do not own the thread, so no need
2576 to call update_block and delete_insn. */
2577 fix_reg_dead_note (prior_insn, insn);
2578 update_reg_unused_notes (prior_insn, new_thread);
2579 new_thread
2580 = next_active_insn (as_a<rtx_insn *> (new_thread));
2582 break;
2585 continue;
2590 /* This insn can't go into a delay slot. */
2591 lose = 1;
2592 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2593 mark_referenced_resources (trial, &needed, true);
2595 /* Ensure we don't put insns between the setting of cc and the comparison
2596 by moving a setting of cc into an earlier delay slot since these insns
2597 could clobber the condition code. */
2598 set.cc = 1;
2600 /* If this insn is a register-register copy and the next insn has
2601 a use of our destination, change it to use our source. That way,
2602 it will become a candidate for our delay slot the next time
2603 through this loop. This case occurs commonly in loops that
2604 scan a list.
2606 We could check for more complex cases than those tested below,
2607 but it doesn't seem worth it. It might also be a good idea to try
2608 to swap the two insns. That might do better.
2610 We can't do this if the next insn modifies our destination, because
2611 that would make the replacement into the insn invalid. We also can't
2612 do this if it modifies our source, because it might be an earlyclobber
2613 operand. This latter test also prevents updating the contents of
2614 a PRE_INC. We also can't do this if there's overlap of source and
2615 destination. Overlap may happen for larger-than-register-size modes. */
2617 if (NONJUMP_INSN_P (trial) && GET_CODE (pat) == SET
2618 && REG_P (SET_SRC (pat))
2619 && REG_P (SET_DEST (pat))
2620 && !reg_overlap_mentioned_p (SET_DEST (pat), SET_SRC (pat)))
2622 rtx_insn *next = next_nonnote_insn (trial);
2624 if (next && NONJUMP_INSN_P (next)
2625 && GET_CODE (PATTERN (next)) != USE
2626 && ! reg_set_p (SET_DEST (pat), next)
2627 && ! reg_set_p (SET_SRC (pat), next)
2628 && reg_referenced_p (SET_DEST (pat), PATTERN (next))
2629 && ! modified_in_p (SET_DEST (pat), next))
2630 validate_replace_rtx (SET_DEST (pat), SET_SRC (pat), next);
2634 /* If we stopped on a branch insn that has delay slots, see if we can
2635 steal some of the insns in those slots. */
2636 if (trial && NONJUMP_INSN_P (trial)
2637 && GET_CODE (PATTERN (trial)) == SEQUENCE
2638 && JUMP_P (XVECEXP (PATTERN (trial), 0, 0)))
2640 rtx_sequence *sequence = as_a <rtx_sequence *> (PATTERN (trial));
2641 /* If this is the `true' thread, we will want to follow the jump,
2642 so we can only do this if we have taken everything up to here. */
2643 if (thread_if_true && trial == new_thread)
2645 steal_delay_list_from_target (insn, condition, sequence,
2646 delay_list, &set, &needed,
2647 &opposite_needed, slots_to_fill,
2648 pslots_filled, &must_annul,
2649 &new_thread);
2650 /* If we owned the thread and are told that it branched
2651 elsewhere, make sure we own the thread at the new location. */
2652 if (own_thread && trial != new_thread)
2653 own_thread = own_thread_p (new_thread, new_thread, 0);
2655 else if (! thread_if_true)
2656 steal_delay_list_from_fallthrough (insn, condition, sequence,
2657 delay_list, &set, &needed,
2658 &opposite_needed, slots_to_fill,
2659 pslots_filled, &must_annul);
2662 /* If we haven't found anything for this delay slot and it is very
2663 likely that the branch will be taken, see if the insn at our target
2664 increments or decrements a register with an increment that does not
2665 depend on the destination register. If so, try to place the opposite
2666 arithmetic insn after the jump insn and put the arithmetic insn in the
2667 delay slot. If we can't do this, return. */
2668 if (delay_list->is_empty () && likely
2669 && new_thread && !ANY_RETURN_P (new_thread)
2670 && NONJUMP_INSN_P (new_thread)
2671 && !RTX_FRAME_RELATED_P (new_thread)
2672 && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
2673 && asm_noperands (PATTERN (new_thread)) < 0)
2675 rtx pat = PATTERN (new_thread);
2676 rtx dest;
2677 rtx src;
2679 /* We know "new_thread" is an insn due to NONJUMP_INSN_P (new_thread)
2680 above. */
2681 trial = as_a <rtx_insn *> (new_thread);
2682 pat = PATTERN (trial);
2684 if (!NONJUMP_INSN_P (trial)
2685 || GET_CODE (pat) != SET
2686 || ! eligible_for_delay (insn, 0, trial, flags)
2687 || can_throw_internal (trial))
2688 return;
2690 dest = SET_DEST (pat), src = SET_SRC (pat);
2691 if ((GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
2692 && rtx_equal_p (XEXP (src, 0), dest)
2693 && (!FLOAT_MODE_P (GET_MODE (src))
2694 || flag_unsafe_math_optimizations)
2695 && ! reg_overlap_mentioned_p (dest, XEXP (src, 1))
2696 && ! side_effects_p (pat))
2698 rtx other = XEXP (src, 1);
2699 rtx new_arith;
2700 rtx_insn *ninsn;
2702 /* If this is a constant adjustment, use the same code with
2703 the negated constant. Otherwise, reverse the sense of the
2704 arithmetic. */
2705 if (CONST_INT_P (other))
2706 new_arith = gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src), dest,
2707 negate_rtx (GET_MODE (src), other));
2708 else
2709 new_arith = gen_rtx_fmt_ee (GET_CODE (src) == PLUS ? MINUS : PLUS,
2710 GET_MODE (src), dest, other);
2712 ninsn = emit_insn_after (gen_rtx_SET (dest, new_arith), insn);
2714 if (recog_memoized (ninsn) < 0
2715 || (extract_insn (ninsn),
2716 !constrain_operands (1, get_preferred_alternatives (ninsn))))
2718 delete_related_insns (ninsn);
2719 return;
2722 if (own_thread)
2724 update_block (trial, thread);
2725 if (trial == thread)
2727 thread = next_active_insn (thread);
2728 if (new_thread == trial)
2729 new_thread = thread;
2731 delete_related_insns (trial);
2733 else
2734 new_thread = next_active_insn (trial);
2736 ninsn = own_thread ? trial : copy_delay_slot_insn (trial);
2737 if (thread_if_true)
2738 INSN_FROM_TARGET_P (ninsn) = 1;
2740 add_to_delay_list (ninsn, delay_list);
2741 (*pslots_filled)++;
2745 if (!delay_list->is_empty () && must_annul)
2746 INSN_ANNULLED_BRANCH_P (insn) = 1;
2748 /* If we are to branch into the middle of this thread, find an appropriate
2749 label or make a new one if none, and redirect INSN to it. If we hit the
2750 end of the function, use the end-of-function label. */
2751 if (new_thread != thread)
2753 rtx label;
2754 bool crossing = false;
2756 gcc_assert (thread_if_true);
2758 if (new_thread && simplejump_or_return_p (new_thread)
2759 && redirect_with_delay_list_safe_p (insn,
2760 JUMP_LABEL (new_thread),
2761 *delay_list))
2762 new_thread = follow_jumps (JUMP_LABEL (new_thread), insn,
2763 &crossing);
2765 if (ANY_RETURN_P (new_thread))
2766 label = find_end_label (new_thread);
2767 else if (LABEL_P (new_thread))
2768 label = new_thread;
2769 else
2770 label = get_label_before (as_a <rtx_insn *> (new_thread),
2771 JUMP_LABEL (insn));
2773 if (label)
2775 reorg_redirect_jump (insn, label);
2776 if (crossing)
2777 CROSSING_JUMP_P (insn) = 1;
2782 /* Make another attempt to find insns to place in delay slots.
2784 We previously looked for insns located in front of the delay insn
2785 and, for non-jump delay insns, located behind the delay insn.
2787 Here only try to schedule jump insns and try to move insns from either
2788 the target or the following insns into the delay slot. If annulling is
2789 supported, we will be likely to do this. Otherwise, we can do this only
2790 if safe. */
2792 static void
2793 fill_eager_delay_slots (void)
2795 rtx_insn *insn;
2796 int i;
2797 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
2799 for (i = 0; i < num_unfilled_slots; i++)
2801 rtx condition;
2802 rtx target_label, insn_at_target;
2803 rtx_insn *fallthrough_insn;
2804 auto_vec<rtx_insn *, 5> delay_list;
2805 rtx_jump_insn *jump_insn;
2806 int own_target;
2807 int own_fallthrough;
2808 int prediction, slots_to_fill, slots_filled;
2810 insn = unfilled_slots_base[i];
2811 if (insn == 0
2812 || insn->deleted ()
2813 || ! (jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2814 || ! (condjump_p (jump_insn) || condjump_in_parallel_p (jump_insn)))
2815 continue;
2817 slots_to_fill = num_delay_slots (jump_insn);
2818 /* Some machine description have defined instructions to have
2819 delay slots only in certain circumstances which may depend on
2820 nearby insns (which change due to reorg's actions).
2822 For example, the PA port normally has delay slots for unconditional
2823 jumps.
2825 However, the PA port claims such jumps do not have a delay slot
2826 if they are immediate successors of certain CALL_INSNs. This
2827 allows the port to favor filling the delay slot of the call with
2828 the unconditional jump. */
2829 if (slots_to_fill == 0)
2830 continue;
2832 slots_filled = 0;
2833 target_label = JUMP_LABEL (jump_insn);
2834 condition = get_branch_condition (jump_insn, target_label);
2836 if (condition == 0)
2837 continue;
2839 /* Get the next active fallthrough and target insns and see if we own
2840 them. Then see whether the branch is likely true. We don't need
2841 to do a lot of this for unconditional branches. */
2843 insn_at_target = first_active_target_insn (target_label);
2844 own_target = own_thread_p (target_label, target_label, 0);
2846 if (condition == const_true_rtx)
2848 own_fallthrough = 0;
2849 fallthrough_insn = 0;
2850 prediction = 2;
2852 else
2854 fallthrough_insn = next_active_insn (jump_insn);
2855 own_fallthrough = own_thread_p (NEXT_INSN (jump_insn), NULL_RTX, 1);
2856 prediction = mostly_true_jump (jump_insn);
2859 /* If this insn is expected to branch, first try to get insns from our
2860 target, then our fallthrough insns. If it is not expected to branch,
2861 try the other order. */
2863 if (prediction > 0)
2865 fill_slots_from_thread (jump_insn, condition, insn_at_target,
2866 fallthrough_insn, prediction == 2, 1,
2867 own_target,
2868 slots_to_fill, &slots_filled, &delay_list);
2870 if (delay_list.is_empty () && own_fallthrough)
2872 /* Even though we didn't find anything for delay slots,
2873 we might have found a redundant insn which we deleted
2874 from the thread that was filled. So we have to recompute
2875 the next insn at the target. */
2876 target_label = JUMP_LABEL (jump_insn);
2877 insn_at_target = first_active_target_insn (target_label);
2879 fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2880 insn_at_target, 0, 0, own_fallthrough,
2881 slots_to_fill, &slots_filled,
2882 &delay_list);
2885 else
2887 if (own_fallthrough)
2888 fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2889 insn_at_target, 0, 0, own_fallthrough,
2890 slots_to_fill, &slots_filled, &delay_list);
2892 if (delay_list.is_empty ())
2893 fill_slots_from_thread (jump_insn, condition, insn_at_target,
2894 next_active_insn (insn), 0, 1, own_target,
2895 slots_to_fill, &slots_filled, &delay_list);
2898 if (!delay_list.is_empty ())
2899 unfilled_slots_base[i]
2900 = emit_delay_sequence (jump_insn, delay_list, slots_filled);
2902 if (slots_to_fill == slots_filled)
2903 unfilled_slots_base[i] = 0;
2905 note_delay_statistics (slots_filled, 1);
2909 static void delete_computation (rtx_insn *insn);
2911 /* Recursively delete prior insns that compute the value (used only by INSN
2912 which the caller is deleting) stored in the register mentioned by NOTE
2913 which is a REG_DEAD note associated with INSN. */
2915 static void
2916 delete_prior_computation (rtx note, rtx_insn *insn)
2918 rtx_insn *our_prev;
2919 rtx reg = XEXP (note, 0);
2921 for (our_prev = prev_nonnote_insn (insn);
2922 our_prev && (NONJUMP_INSN_P (our_prev)
2923 || CALL_P (our_prev));
2924 our_prev = prev_nonnote_insn (our_prev))
2926 rtx pat = PATTERN (our_prev);
2928 /* If we reach a CALL which is not calling a const function
2929 or the callee pops the arguments, then give up. */
2930 if (CALL_P (our_prev)
2931 && (! RTL_CONST_CALL_P (our_prev)
2932 || GET_CODE (pat) != SET || GET_CODE (SET_SRC (pat)) != CALL))
2933 break;
2935 /* If we reach a SEQUENCE, it is too complex to try to
2936 do anything with it, so give up. We can be run during
2937 and after reorg, so SEQUENCE rtl can legitimately show
2938 up here. */
2939 if (GET_CODE (pat) == SEQUENCE)
2940 break;
2942 if (GET_CODE (pat) == USE
2943 && NONJUMP_INSN_P (XEXP (pat, 0)))
2944 /* reorg creates USEs that look like this. We leave them
2945 alone because reorg needs them for its own purposes. */
2946 break;
2948 if (reg_set_p (reg, pat))
2950 if (side_effects_p (pat) && !CALL_P (our_prev))
2951 break;
2953 if (GET_CODE (pat) == PARALLEL)
2955 /* If we find a SET of something else, we can't
2956 delete the insn. */
2958 int i;
2960 for (i = 0; i < XVECLEN (pat, 0); i++)
2962 rtx part = XVECEXP (pat, 0, i);
2964 if (GET_CODE (part) == SET
2965 && SET_DEST (part) != reg)
2966 break;
2969 if (i == XVECLEN (pat, 0))
2970 delete_computation (our_prev);
2972 else if (GET_CODE (pat) == SET
2973 && REG_P (SET_DEST (pat)))
2975 int dest_regno = REGNO (SET_DEST (pat));
2976 int dest_endregno = END_REGNO (SET_DEST (pat));
2977 int regno = REGNO (reg);
2978 int endregno = END_REGNO (reg);
2980 if (dest_regno >= regno
2981 && dest_endregno <= endregno)
2982 delete_computation (our_prev);
2984 /* We may have a multi-word hard register and some, but not
2985 all, of the words of the register are needed in subsequent
2986 insns. Write REG_UNUSED notes for those parts that were not
2987 needed. */
2988 else if (dest_regno <= regno
2989 && dest_endregno >= endregno)
2991 int i;
2993 add_reg_note (our_prev, REG_UNUSED, reg);
2995 for (i = dest_regno; i < dest_endregno; i++)
2996 if (! find_regno_note (our_prev, REG_UNUSED, i))
2997 break;
2999 if (i == dest_endregno)
3000 delete_computation (our_prev);
3004 break;
3007 /* If PAT references the register that dies here, it is an
3008 additional use. Hence any prior SET isn't dead. However, this
3009 insn becomes the new place for the REG_DEAD note. */
3010 if (reg_overlap_mentioned_p (reg, pat))
3012 XEXP (note, 1) = REG_NOTES (our_prev);
3013 REG_NOTES (our_prev) = note;
3014 break;
3019 /* Delete INSN and recursively delete insns that compute values used only
3020 by INSN. This uses the REG_DEAD notes computed during flow analysis.
3022 Look at all our REG_DEAD notes. If a previous insn does nothing other
3023 than set a register that dies in this insn, we can delete that insn
3024 as well.
3026 On machines with CC0, if CC0 is used in this insn, we may be able to
3027 delete the insn that set it. */
3029 static void
3030 delete_computation (rtx_insn *insn)
3032 rtx note, next;
3034 if (HAVE_cc0 && reg_referenced_p (cc0_rtx, PATTERN (insn)))
3036 rtx_insn *prev = prev_nonnote_insn (insn);
3037 /* We assume that at this stage
3038 CC's are always set explicitly
3039 and always immediately before the jump that
3040 will use them. So if the previous insn
3041 exists to set the CC's, delete it
3042 (unless it performs auto-increments, etc.). */
3043 if (prev && NONJUMP_INSN_P (prev)
3044 && sets_cc0_p (PATTERN (prev)))
3046 if (sets_cc0_p (PATTERN (prev)) > 0
3047 && ! side_effects_p (PATTERN (prev)))
3048 delete_computation (prev);
3049 else
3050 /* Otherwise, show that cc0 won't be used. */
3051 add_reg_note (prev, REG_UNUSED, cc0_rtx);
3055 for (note = REG_NOTES (insn); note; note = next)
3057 next = XEXP (note, 1);
3059 if (REG_NOTE_KIND (note) != REG_DEAD
3060 /* Verify that the REG_NOTE is legitimate. */
3061 || !REG_P (XEXP (note, 0)))
3062 continue;
3064 delete_prior_computation (note, insn);
3067 delete_related_insns (insn);
3070 /* If all INSN does is set the pc, delete it,
3071 and delete the insn that set the condition codes for it
3072 if that's what the previous thing was. */
3074 static void
3075 delete_jump (rtx_insn *insn)
3077 rtx set = single_set (insn);
3079 if (set && GET_CODE (SET_DEST (set)) == PC)
3080 delete_computation (insn);
3083 static rtx_insn *
3084 label_before_next_insn (rtx_insn *x, rtx scan_limit)
3086 rtx_insn *insn = next_active_insn (x);
3087 while (insn)
3089 insn = PREV_INSN (insn);
3090 if (insn == scan_limit || insn == NULL_RTX)
3091 return NULL;
3092 if (LABEL_P (insn))
3093 break;
3095 return insn;
3098 /* Return TRUE if there is a NOTE_INSN_SWITCH_TEXT_SECTIONS note in between
3099 BEG and END. */
3101 static bool
3102 switch_text_sections_between_p (const rtx_insn *beg, const rtx_insn *end)
3104 const rtx_insn *p;
3105 for (p = beg; p != end; p = NEXT_INSN (p))
3106 if (NOTE_P (p) && NOTE_KIND (p) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
3107 return true;
3108 return false;
3112 /* Once we have tried two ways to fill a delay slot, make a pass over the
3113 code to try to improve the results and to do such things as more jump
3114 threading. */
3116 static void
3117 relax_delay_slots (rtx_insn *first)
3119 rtx_insn *insn, *next;
3120 rtx_sequence *pat;
3121 rtx_insn *delay_insn;
3122 rtx target_label;
3124 /* Look at every JUMP_INSN and see if we can improve it. */
3125 for (insn = first; insn; insn = next)
3127 rtx_insn *other;
3128 bool crossing;
3130 next = next_active_insn (insn);
3132 /* If this is a jump insn, see if it now jumps to a jump, jumps to
3133 the next insn, or jumps to a label that is not the last of a
3134 group of consecutive labels. */
3135 if (is_a <rtx_jump_insn *> (insn)
3136 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3137 && !ANY_RETURN_P (target_label = JUMP_LABEL (insn)))
3139 rtx_jump_insn *jump_insn = as_a <rtx_jump_insn *> (insn);
3140 target_label
3141 = skip_consecutive_labels (follow_jumps (target_label, jump_insn,
3142 &crossing));
3143 if (ANY_RETURN_P (target_label))
3144 target_label = find_end_label (target_label);
3146 if (target_label
3147 && next_active_insn (as_a<rtx_insn *> (target_label)) == next
3148 && ! condjump_in_parallel_p (jump_insn)
3149 && ! (next && switch_text_sections_between_p (jump_insn, next)))
3151 delete_jump (jump_insn);
3152 continue;
3155 if (target_label && target_label != JUMP_LABEL (jump_insn))
3157 reorg_redirect_jump (jump_insn, target_label);
3158 if (crossing)
3159 CROSSING_JUMP_P (jump_insn) = 1;
3162 /* See if this jump conditionally branches around an unconditional
3163 jump. If so, invert this jump and point it to the target of the
3164 second jump. Check if it's possible on the target. */
3165 if (next && simplejump_or_return_p (next)
3166 && any_condjump_p (jump_insn)
3167 && target_label
3168 && (next_active_insn (as_a<rtx_insn *> (target_label))
3169 == next_active_insn (next))
3170 && no_labels_between_p (jump_insn, next)
3171 && targetm.can_follow_jump (jump_insn, next))
3173 rtx label = JUMP_LABEL (next);
3175 /* Be careful how we do this to avoid deleting code or
3176 labels that are momentarily dead. See similar optimization
3177 in jump.c.
3179 We also need to ensure we properly handle the case when
3180 invert_jump fails. */
3182 ++LABEL_NUSES (target_label);
3183 if (!ANY_RETURN_P (label))
3184 ++LABEL_NUSES (label);
3186 if (invert_jump (jump_insn, label, 1))
3188 delete_related_insns (next);
3189 next = jump_insn;
3192 if (!ANY_RETURN_P (label))
3193 --LABEL_NUSES (label);
3195 if (--LABEL_NUSES (target_label) == 0)
3196 delete_related_insns (target_label);
3198 continue;
3202 /* If this is an unconditional jump and the previous insn is a
3203 conditional jump, try reversing the condition of the previous
3204 insn and swapping our targets. The next pass might be able to
3205 fill the slots.
3207 Don't do this if we expect the conditional branch to be true, because
3208 we would then be making the more common case longer. */
3210 if (simplejump_or_return_p (insn)
3211 && (other = prev_active_insn (insn)) != 0
3212 && any_condjump_p (other)
3213 && no_labels_between_p (other, insn)
3214 && 0 > mostly_true_jump (other))
3216 rtx other_target = JUMP_LABEL (other);
3217 target_label = JUMP_LABEL (insn);
3219 if (invert_jump (as_a <rtx_jump_insn *> (other), target_label, 0))
3220 reorg_redirect_jump (as_a <rtx_jump_insn *> (insn), other_target);
3223 /* Now look only at cases where we have a filled delay slot. */
3224 if (!NONJUMP_INSN_P (insn) || GET_CODE (PATTERN (insn)) != SEQUENCE)
3225 continue;
3227 pat = as_a <rtx_sequence *> (PATTERN (insn));
3228 delay_insn = pat->insn (0);
3230 /* See if the first insn in the delay slot is redundant with some
3231 previous insn. Remove it from the delay slot if so; then set up
3232 to reprocess this insn. */
3233 if (redundant_insn (pat->insn (1), delay_insn, vNULL))
3235 update_block (pat->insn (1), insn);
3236 delete_from_delay_slot (pat->insn (1));
3237 next = prev_active_insn (next);
3238 continue;
3241 /* See if we have a RETURN insn with a filled delay slot followed
3242 by a RETURN insn with an unfilled a delay slot. If so, we can delete
3243 the first RETURN (but not its delay insn). This gives the same
3244 effect in fewer instructions.
3246 Only do so if optimizing for size since this results in slower, but
3247 smaller code. */
3248 if (optimize_function_for_size_p (cfun)
3249 && ANY_RETURN_P (PATTERN (delay_insn))
3250 && next
3251 && JUMP_P (next)
3252 && PATTERN (next) == PATTERN (delay_insn))
3254 rtx_insn *after;
3255 int i;
3257 /* Delete the RETURN and just execute the delay list insns.
3259 We do this by deleting the INSN containing the SEQUENCE, then
3260 re-emitting the insns separately, and then deleting the RETURN.
3261 This allows the count of the jump target to be properly
3262 decremented.
3264 Note that we need to change the INSN_UID of the re-emitted insns
3265 since it is used to hash the insns for mark_target_live_regs and
3266 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3268 Clear the from target bit, since these insns are no longer
3269 in delay slots. */
3270 for (i = 0; i < XVECLEN (pat, 0); i++)
3271 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3273 rtx_insn *prev = PREV_INSN (insn);
3274 delete_related_insns (insn);
3275 gcc_assert (GET_CODE (pat) == SEQUENCE);
3276 add_insn_after (delay_insn, prev, NULL);
3277 after = delay_insn;
3278 for (i = 1; i < pat->len (); i++)
3279 after = emit_copy_of_insn_after (pat->insn (i), after);
3280 delete_scheduled_jump (delay_insn);
3281 continue;
3284 /* Now look only at the cases where we have a filled JUMP_INSN. */
3285 rtx_jump_insn *delay_jump_insn =
3286 dyn_cast <rtx_jump_insn *> (delay_insn);
3287 if (! delay_jump_insn || !(condjump_p (delay_jump_insn)
3288 || condjump_in_parallel_p (delay_jump_insn)))
3289 continue;
3291 target_label = JUMP_LABEL (delay_jump_insn);
3292 if (target_label && ANY_RETURN_P (target_label))
3293 continue;
3295 /* If this jump goes to another unconditional jump, thread it, but
3296 don't convert a jump into a RETURN here. */
3297 rtx trial = skip_consecutive_labels (follow_jumps (target_label,
3298 delay_jump_insn,
3299 &crossing));
3300 if (ANY_RETURN_P (trial))
3301 trial = find_end_label (trial);
3303 if (trial && trial != target_label
3304 && redirect_with_delay_slots_safe_p (delay_jump_insn, trial, insn))
3306 reorg_redirect_jump (delay_jump_insn, trial);
3307 target_label = trial;
3308 if (crossing)
3309 CROSSING_JUMP_P (delay_jump_insn) = 1;
3312 /* If the first insn at TARGET_LABEL is redundant with a previous
3313 insn, redirect the jump to the following insn and process again.
3314 We use next_real_insn instead of next_active_insn so we
3315 don't skip USE-markers, or we'll end up with incorrect
3316 liveness info. */
3317 trial = next_real_insn (target_label);
3318 if (trial && GET_CODE (PATTERN (trial)) != SEQUENCE
3319 && redundant_insn (trial, insn, vNULL)
3320 && ! can_throw_internal (trial))
3322 /* Figure out where to emit the special USE insn so we don't
3323 later incorrectly compute register live/death info. */
3324 rtx_insn *tmp = next_active_insn (as_a<rtx_insn *> (trial));
3325 if (tmp == 0)
3326 tmp = find_end_label (simple_return_rtx);
3328 if (tmp)
3330 /* Insert the special USE insn and update dataflow info.
3331 We know "trial" is an insn here as it is the output of
3332 next_real_insn () above. */
3333 update_block (as_a <rtx_insn *> (trial), tmp);
3335 /* Now emit a label before the special USE insn, and
3336 redirect our jump to the new label. */
3337 target_label = get_label_before (PREV_INSN (tmp), target_label);
3338 reorg_redirect_jump (delay_jump_insn, target_label);
3339 next = insn;
3340 continue;
3344 /* Similarly, if it is an unconditional jump with one insn in its
3345 delay list and that insn is redundant, thread the jump. */
3346 rtx_sequence *trial_seq =
3347 trial ? dyn_cast <rtx_sequence *> (PATTERN (trial)) : NULL;
3348 if (trial_seq
3349 && trial_seq->len () == 2
3350 && JUMP_P (trial_seq->insn (0))
3351 && simplejump_or_return_p (trial_seq->insn (0))
3352 && redundant_insn (trial_seq->insn (1), insn, vNULL))
3354 target_label = JUMP_LABEL (trial_seq->insn (0));
3355 if (ANY_RETURN_P (target_label))
3356 target_label = find_end_label (target_label);
3358 if (target_label
3359 && redirect_with_delay_slots_safe_p (delay_jump_insn,
3360 target_label, insn))
3362 update_block (trial_seq->insn (1), insn);
3363 reorg_redirect_jump (delay_jump_insn, target_label);
3364 next = insn;
3365 continue;
3369 /* See if we have a simple (conditional) jump that is useless. */
3370 if (! INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3371 && ! condjump_in_parallel_p (delay_jump_insn)
3372 && prev_active_insn (as_a<rtx_insn *> (target_label)) == insn
3373 && ! BARRIER_P (prev_nonnote_insn (as_a<rtx_insn *> (target_label)))
3374 /* If the last insn in the delay slot sets CC0 for some insn,
3375 various code assumes that it is in a delay slot. We could
3376 put it back where it belonged and delete the register notes,
3377 but it doesn't seem worthwhile in this uncommon case. */
3378 && (!HAVE_cc0
3379 || ! find_reg_note (XVECEXP (pat, 0, XVECLEN (pat, 0) - 1),
3380 REG_CC_USER, NULL_RTX)))
3382 rtx_insn *after;
3383 int i;
3385 /* All this insn does is execute its delay list and jump to the
3386 following insn. So delete the jump and just execute the delay
3387 list insns.
3389 We do this by deleting the INSN containing the SEQUENCE, then
3390 re-emitting the insns separately, and then deleting the jump.
3391 This allows the count of the jump target to be properly
3392 decremented.
3394 Note that we need to change the INSN_UID of the re-emitted insns
3395 since it is used to hash the insns for mark_target_live_regs and
3396 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3398 Clear the from target bit, since these insns are no longer
3399 in delay slots. */
3400 for (i = 0; i < XVECLEN (pat, 0); i++)
3401 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3403 rtx_insn *prev = PREV_INSN (insn);
3404 delete_related_insns (insn);
3405 gcc_assert (GET_CODE (pat) == SEQUENCE);
3406 add_insn_after (delay_jump_insn, prev, NULL);
3407 after = delay_jump_insn;
3408 for (i = 1; i < pat->len (); i++)
3409 after = emit_copy_of_insn_after (pat->insn (i), after);
3410 delete_scheduled_jump (delay_jump_insn);
3411 continue;
3414 /* See if this is an unconditional jump around a single insn which is
3415 identical to the one in its delay slot. In this case, we can just
3416 delete the branch and the insn in its delay slot. */
3417 if (next && NONJUMP_INSN_P (next)
3418 && label_before_next_insn (next, insn) == target_label
3419 && simplejump_p (insn)
3420 && XVECLEN (pat, 0) == 2
3421 && rtx_equal_p (PATTERN (next), PATTERN (pat->insn (1))))
3423 delete_related_insns (insn);
3424 continue;
3427 /* See if this jump (with its delay slots) conditionally branches
3428 around an unconditional jump (without delay slots). If so, invert
3429 this jump and point it to the target of the second jump. We cannot
3430 do this for annulled jumps, though. Again, don't convert a jump to
3431 a RETURN here. */
3432 if (! INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3433 && any_condjump_p (delay_jump_insn)
3434 && next && simplejump_or_return_p (next)
3435 && (next_active_insn (as_a<rtx_insn *> (target_label))
3436 == next_active_insn (next))
3437 && no_labels_between_p (insn, next))
3439 rtx label = JUMP_LABEL (next);
3440 rtx old_label = JUMP_LABEL (delay_jump_insn);
3442 if (ANY_RETURN_P (label))
3443 label = find_end_label (label);
3445 /* find_end_label can generate a new label. Check this first. */
3446 if (label
3447 && no_labels_between_p (insn, next)
3448 && redirect_with_delay_slots_safe_p (delay_jump_insn,
3449 label, insn))
3451 /* Be careful how we do this to avoid deleting code or labels
3452 that are momentarily dead. See similar optimization in
3453 jump.c */
3454 if (old_label)
3455 ++LABEL_NUSES (old_label);
3457 if (invert_jump (delay_jump_insn, label, 1))
3459 int i;
3461 /* Must update the INSN_FROM_TARGET_P bits now that
3462 the branch is reversed, so that mark_target_live_regs
3463 will handle the delay slot insn correctly. */
3464 for (i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
3466 rtx slot = XVECEXP (PATTERN (insn), 0, i);
3467 INSN_FROM_TARGET_P (slot) = ! INSN_FROM_TARGET_P (slot);
3470 delete_related_insns (next);
3471 next = insn;
3474 if (old_label && --LABEL_NUSES (old_label) == 0)
3475 delete_related_insns (old_label);
3476 continue;
3480 /* If we own the thread opposite the way this insn branches, see if we
3481 can merge its delay slots with following insns. */
3482 if (INSN_FROM_TARGET_P (pat->insn (1))
3483 && own_thread_p (NEXT_INSN (insn), 0, 1))
3484 try_merge_delay_insns (insn, next);
3485 else if (! INSN_FROM_TARGET_P (pat->insn (1))
3486 && own_thread_p (target_label, target_label, 0))
3487 try_merge_delay_insns (insn,
3488 next_active_insn (as_a<rtx_insn *> (target_label)));
3490 /* If we get here, we haven't deleted INSN. But we may have deleted
3491 NEXT, so recompute it. */
3492 next = next_active_insn (insn);
3497 /* Look for filled jumps to the end of function label. We can try to convert
3498 them into RETURN insns if the insns in the delay slot are valid for the
3499 RETURN as well. */
3501 static void
3502 make_return_insns (rtx_insn *first)
3504 rtx_insn *insn;
3505 rtx_jump_insn *jump_insn;
3506 rtx real_return_label = function_return_label;
3507 rtx real_simple_return_label = function_simple_return_label;
3508 int slots, i;
3510 /* See if there is a RETURN insn in the function other than the one we
3511 made for END_OF_FUNCTION_LABEL. If so, set up anything we can't change
3512 into a RETURN to jump to it. */
3513 for (insn = first; insn; insn = NEXT_INSN (insn))
3514 if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
3516 rtx t = get_label_before (insn, NULL_RTX);
3517 if (PATTERN (insn) == ret_rtx)
3518 real_return_label = t;
3519 else
3520 real_simple_return_label = t;
3521 break;
3524 /* Show an extra usage of REAL_RETURN_LABEL so it won't go away if it
3525 was equal to END_OF_FUNCTION_LABEL. */
3526 if (real_return_label)
3527 LABEL_NUSES (real_return_label)++;
3528 if (real_simple_return_label)
3529 LABEL_NUSES (real_simple_return_label)++;
3531 /* Clear the list of insns to fill so we can use it. */
3532 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3534 for (insn = first; insn; insn = NEXT_INSN (insn))
3536 int flags;
3537 rtx kind, real_label;
3539 /* Only look at filled JUMP_INSNs that go to the end of function
3540 label. */
3541 if (!NONJUMP_INSN_P (insn))
3542 continue;
3544 if (GET_CODE (PATTERN (insn)) != SEQUENCE)
3545 continue;
3547 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (insn));
3549 if (!jump_to_label_p (pat->insn (0)))
3550 continue;
3552 if (JUMP_LABEL (pat->insn (0)) == function_return_label)
3554 kind = ret_rtx;
3555 real_label = real_return_label;
3557 else if (JUMP_LABEL (pat->insn (0)) == function_simple_return_label)
3559 kind = simple_return_rtx;
3560 real_label = real_simple_return_label;
3562 else
3563 continue;
3565 jump_insn = as_a <rtx_jump_insn *> (pat->insn (0));
3567 /* If we can't make the jump into a RETURN, try to redirect it to the best
3568 RETURN and go on to the next insn. */
3569 if (!reorg_redirect_jump (jump_insn, kind))
3571 /* Make sure redirecting the jump will not invalidate the delay
3572 slot insns. */
3573 if (redirect_with_delay_slots_safe_p (jump_insn, real_label, insn))
3574 reorg_redirect_jump (jump_insn, real_label);
3575 continue;
3578 /* See if this RETURN can accept the insns current in its delay slot.
3579 It can if it has more or an equal number of slots and the contents
3580 of each is valid. */
3582 flags = get_jump_flags (jump_insn, JUMP_LABEL (jump_insn));
3583 slots = num_delay_slots (jump_insn);
3584 if (slots >= XVECLEN (pat, 0) - 1)
3586 for (i = 1; i < XVECLEN (pat, 0); i++)
3587 if (! (
3588 #if ANNUL_IFFALSE_SLOTS
3589 (INSN_ANNULLED_BRANCH_P (jump_insn)
3590 && INSN_FROM_TARGET_P (pat->insn (i)))
3591 ? eligible_for_annul_false (jump_insn, i - 1,
3592 pat->insn (i), flags) :
3593 #endif
3594 #if ANNUL_IFTRUE_SLOTS
3595 (INSN_ANNULLED_BRANCH_P (jump_insn)
3596 && ! INSN_FROM_TARGET_P (pat->insn (i)))
3597 ? eligible_for_annul_true (jump_insn, i - 1,
3598 pat->insn (i), flags) :
3599 #endif
3600 eligible_for_delay (jump_insn, i - 1,
3601 pat->insn (i), flags)))
3602 break;
3604 else
3605 i = 0;
3607 if (i == XVECLEN (pat, 0))
3608 continue;
3610 /* We have to do something with this insn. If it is an unconditional
3611 RETURN, delete the SEQUENCE and output the individual insns,
3612 followed by the RETURN. Then set things up so we try to find
3613 insns for its delay slots, if it needs some. */
3614 if (ANY_RETURN_P (PATTERN (jump_insn)))
3616 rtx_insn *prev = PREV_INSN (insn);
3618 delete_related_insns (insn);
3619 for (i = 1; i < XVECLEN (pat, 0); i++)
3620 prev = emit_insn_after (PATTERN (XVECEXP (pat, 0, i)), prev);
3622 insn = emit_jump_insn_after (PATTERN (jump_insn), prev);
3623 emit_barrier_after (insn);
3625 if (slots)
3626 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3628 else
3629 /* It is probably more efficient to keep this with its current
3630 delay slot as a branch to a RETURN. */
3631 reorg_redirect_jump (jump_insn, real_label);
3634 /* Now delete REAL_RETURN_LABEL if we never used it. Then try to fill any
3635 new delay slots we have created. */
3636 if (real_return_label != NULL_RTX && --LABEL_NUSES (real_return_label) == 0)
3637 delete_related_insns (real_return_label);
3638 if (real_simple_return_label != NULL_RTX
3639 && --LABEL_NUSES (real_simple_return_label) == 0)
3640 delete_related_insns (real_simple_return_label);
3642 fill_simple_delay_slots (1);
3643 fill_simple_delay_slots (0);
3646 /* Try to find insns to place in delay slots. */
3648 static void
3649 dbr_schedule (rtx_insn *first)
3651 rtx_insn *insn, *next, *epilogue_insn = 0;
3652 int i;
3653 bool need_return_insns;
3655 /* If the current function has no insns other than the prologue and
3656 epilogue, then do not try to fill any delay slots. */
3657 if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS)
3658 return;
3660 /* Find the highest INSN_UID and allocate and initialize our map from
3661 INSN_UID's to position in code. */
3662 for (max_uid = 0, insn = first; insn; insn = NEXT_INSN (insn))
3664 if (INSN_UID (insn) > max_uid)
3665 max_uid = INSN_UID (insn);
3666 if (NOTE_P (insn)
3667 && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
3668 epilogue_insn = insn;
3671 uid_to_ruid = XNEWVEC (int, max_uid + 1);
3672 for (i = 0, insn = first; insn; i++, insn = NEXT_INSN (insn))
3673 uid_to_ruid[INSN_UID (insn)] = i;
3675 /* Initialize the list of insns that need filling. */
3676 if (unfilled_firstobj == 0)
3678 gcc_obstack_init (&unfilled_slots_obstack);
3679 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3682 for (insn = next_active_insn (first); insn; insn = next_active_insn (insn))
3684 rtx target;
3686 /* Skip vector tables. We can't get attributes for them. */
3687 if (JUMP_TABLE_DATA_P (insn))
3688 continue;
3690 if (JUMP_P (insn))
3691 INSN_ANNULLED_BRANCH_P (insn) = 0;
3692 INSN_FROM_TARGET_P (insn) = 0;
3694 if (num_delay_slots (insn) > 0)
3695 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3697 /* Ensure all jumps go to the last of a set of consecutive labels. */
3698 if (JUMP_P (insn)
3699 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3700 && !ANY_RETURN_P (JUMP_LABEL (insn))
3701 && ((target = skip_consecutive_labels (JUMP_LABEL (insn)))
3702 != JUMP_LABEL (insn)))
3703 redirect_jump (as_a <rtx_jump_insn *> (insn), target, 1);
3706 init_resource_info (epilogue_insn);
3708 /* Show we haven't computed an end-of-function label yet. */
3709 function_return_label = function_simple_return_label = NULL;
3711 /* Initialize the statistics for this function. */
3712 memset (num_insns_needing_delays, 0, sizeof num_insns_needing_delays);
3713 memset (num_filled_delays, 0, sizeof num_filled_delays);
3715 /* Now do the delay slot filling. Try everything twice in case earlier
3716 changes make more slots fillable. */
3718 for (reorg_pass_number = 0;
3719 reorg_pass_number < MAX_REORG_PASSES;
3720 reorg_pass_number++)
3722 fill_simple_delay_slots (1);
3723 fill_simple_delay_slots (0);
3724 if (!targetm.no_speculation_in_delay_slots_p ())
3725 fill_eager_delay_slots ();
3726 relax_delay_slots (first);
3729 /* If we made an end of function label, indicate that it is now
3730 safe to delete it by undoing our prior adjustment to LABEL_NUSES.
3731 If it is now unused, delete it. */
3732 if (function_return_label && --LABEL_NUSES (function_return_label) == 0)
3733 delete_related_insns (function_return_label);
3734 if (function_simple_return_label
3735 && --LABEL_NUSES (function_simple_return_label) == 0)
3736 delete_related_insns (function_simple_return_label);
3738 need_return_insns = false;
3739 need_return_insns |= targetm.have_return () && function_return_label != 0;
3740 need_return_insns |= (targetm.have_simple_return ()
3741 && function_simple_return_label != 0);
3742 if (need_return_insns)
3743 make_return_insns (first);
3745 /* Delete any USE insns made by update_block; subsequent passes don't need
3746 them or know how to deal with them. */
3747 for (insn = first; insn; insn = next)
3749 next = NEXT_INSN (insn);
3751 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
3752 && INSN_P (XEXP (PATTERN (insn), 0)))
3753 next = delete_related_insns (insn);
3756 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3758 /* It is not clear why the line below is needed, but it does seem to be. */
3759 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3761 if (dump_file)
3763 int i, j, need_comma;
3764 int total_delay_slots[MAX_DELAY_HISTOGRAM + 1];
3765 int total_annul_slots[MAX_DELAY_HISTOGRAM + 1];
3767 for (reorg_pass_number = 0;
3768 reorg_pass_number < MAX_REORG_PASSES;
3769 reorg_pass_number++)
3771 fprintf (dump_file, ";; Reorg pass #%d:\n", reorg_pass_number + 1);
3772 for (i = 0; i < NUM_REORG_FUNCTIONS; i++)
3774 need_comma = 0;
3775 fprintf (dump_file, ";; Reorg function #%d\n", i);
3777 fprintf (dump_file, ";; %d insns needing delay slots\n;; ",
3778 num_insns_needing_delays[i][reorg_pass_number]);
3780 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3781 if (num_filled_delays[i][j][reorg_pass_number])
3783 if (need_comma)
3784 fprintf (dump_file, ", ");
3785 need_comma = 1;
3786 fprintf (dump_file, "%d got %d delays",
3787 num_filled_delays[i][j][reorg_pass_number], j);
3789 fprintf (dump_file, "\n");
3792 memset (total_delay_slots, 0, sizeof total_delay_slots);
3793 memset (total_annul_slots, 0, sizeof total_annul_slots);
3794 for (insn = first; insn; insn = NEXT_INSN (insn))
3796 if (! insn->deleted ()
3797 && NONJUMP_INSN_P (insn)
3798 && GET_CODE (PATTERN (insn)) != USE
3799 && GET_CODE (PATTERN (insn)) != CLOBBER)
3801 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
3803 rtx control;
3804 j = XVECLEN (PATTERN (insn), 0) - 1;
3805 if (j > MAX_DELAY_HISTOGRAM)
3806 j = MAX_DELAY_HISTOGRAM;
3807 control = XVECEXP (PATTERN (insn), 0, 0);
3808 if (JUMP_P (control) && INSN_ANNULLED_BRANCH_P (control))
3809 total_annul_slots[j]++;
3810 else
3811 total_delay_slots[j]++;
3813 else if (num_delay_slots (insn) > 0)
3814 total_delay_slots[0]++;
3817 fprintf (dump_file, ";; Reorg totals: ");
3818 need_comma = 0;
3819 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3821 if (total_delay_slots[j])
3823 if (need_comma)
3824 fprintf (dump_file, ", ");
3825 need_comma = 1;
3826 fprintf (dump_file, "%d got %d delays", total_delay_slots[j], j);
3829 fprintf (dump_file, "\n");
3831 if (ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
3833 fprintf (dump_file, ";; Reorg annuls: ");
3834 need_comma = 0;
3835 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3837 if (total_annul_slots[j])
3839 if (need_comma)
3840 fprintf (dump_file, ", ");
3841 need_comma = 1;
3842 fprintf (dump_file, "%d got %d delays", total_annul_slots[j], j);
3845 fprintf (dump_file, "\n");
3848 fprintf (dump_file, "\n");
3851 if (!sibling_labels.is_empty ())
3853 update_alignments (sibling_labels);
3854 sibling_labels.release ();
3857 free_resource_info ();
3858 free (uid_to_ruid);
3859 crtl->dbr_scheduled_p = true;
3862 /* Run delay slot optimization. */
3863 static unsigned int
3864 rest_of_handle_delay_slots (void)
3866 if (DELAY_SLOTS)
3867 dbr_schedule (get_insns ());
3869 return 0;
3872 namespace {
3874 const pass_data pass_data_delay_slots =
3876 RTL_PASS, /* type */
3877 "dbr", /* name */
3878 OPTGROUP_NONE, /* optinfo_flags */
3879 TV_DBR_SCHED, /* tv_id */
3880 0, /* properties_required */
3881 0, /* properties_provided */
3882 0, /* properties_destroyed */
3883 0, /* todo_flags_start */
3884 0, /* todo_flags_finish */
3887 class pass_delay_slots : public rtl_opt_pass
3889 public:
3890 pass_delay_slots (gcc::context *ctxt)
3891 : rtl_opt_pass (pass_data_delay_slots, ctxt)
3894 /* opt_pass methods: */
3895 virtual bool gate (function *);
3896 virtual unsigned int execute (function *)
3898 return rest_of_handle_delay_slots ();
3901 }; // class pass_delay_slots
3903 bool
3904 pass_delay_slots::gate (function *)
3906 /* At -O0 dataflow info isn't updated after RA. */
3907 if (DELAY_SLOTS)
3908 return optimize > 0 && flag_delayed_branch && !crtl->dbr_scheduled_p;
3910 return false;
3913 } // anon namespace
3915 rtl_opt_pass *
3916 make_pass_delay_slots (gcc::context *ctxt)
3918 return new pass_delay_slots (ctxt);
3921 /* Machine dependent reorg pass. */
3923 namespace {
3925 const pass_data pass_data_machine_reorg =
3927 RTL_PASS, /* type */
3928 "mach", /* name */
3929 OPTGROUP_NONE, /* optinfo_flags */
3930 TV_MACH_DEP, /* tv_id */
3931 0, /* properties_required */
3932 0, /* properties_provided */
3933 0, /* properties_destroyed */
3934 0, /* todo_flags_start */
3935 0, /* todo_flags_finish */
3938 class pass_machine_reorg : public rtl_opt_pass
3940 public:
3941 pass_machine_reorg (gcc::context *ctxt)
3942 : rtl_opt_pass (pass_data_machine_reorg, ctxt)
3945 /* opt_pass methods: */
3946 virtual bool gate (function *)
3948 return targetm.machine_dependent_reorg != 0;
3951 virtual unsigned int execute (function *)
3953 targetm.machine_dependent_reorg ();
3954 return 0;
3957 }; // class pass_machine_reorg
3959 } // anon namespace
3961 rtl_opt_pass *
3962 make_pass_machine_reorg (gcc::context *ctxt)
3964 return new pass_machine_reorg (ctxt);