OpenACC acc_on_device.
[official-gcc.git] / gcc / reorg.c
blob3d438e551201bbb4deec9b2f70db1401dac9b6f7
1 /* Perform instruction reorganizations for delay slot filling.
2 Copyright (C) 1992-2014 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu).
4 Hacked by Michael Tiemann (tiemann@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Instruction reorganization pass.
24 This pass runs after register allocation and final jump
25 optimization. It should be the last pass to run before peephole.
26 It serves primarily to fill delay slots of insns, typically branch
27 and call insns. Other insns typically involve more complicated
28 interactions of data dependencies and resource constraints, and
29 are better handled by scheduling before register allocation (by the
30 function `schedule_insns').
32 The Branch Penalty is the number of extra cycles that are needed to
33 execute a branch insn. On an ideal machine, branches take a single
34 cycle, and the Branch Penalty is 0. Several RISC machines approach
35 branch delays differently:
37 The MIPS has a single branch delay slot. Most insns
38 (except other branches) can be used to fill this slot. When the
39 slot is filled, two insns execute in two cycles, reducing the
40 branch penalty to zero.
42 The SPARC always has a branch delay slot, but its effects can be
43 annulled when the branch is not taken. This means that failing to
44 find other sources of insns, we can hoist an insn from the branch
45 target that would only be safe to execute knowing that the branch
46 is taken.
48 The HP-PA always has a branch delay slot. For unconditional branches
49 its effects can be annulled when the branch is taken. The effects
50 of the delay slot in a conditional branch can be nullified for forward
51 taken branches, or for untaken backward branches. This means
52 we can hoist insns from the fall-through path for forward branches or
53 steal insns from the target of backward branches.
55 The TMS320C3x and C4x have three branch delay slots. When the three
56 slots are filled, the branch penalty is zero. Most insns can fill the
57 delay slots except jump insns.
59 Three techniques for filling delay slots have been implemented so far:
61 (1) `fill_simple_delay_slots' is the simplest, most efficient way
62 to fill delay slots. This pass first looks for insns which come
63 from before the branch and which are safe to execute after the
64 branch. Then it searches after the insn requiring delay slots or,
65 in the case of a branch, for insns that are after the point at
66 which the branch merges into the fallthrough code, if such a point
67 exists. When such insns are found, the branch penalty decreases
68 and no code expansion takes place.
70 (2) `fill_eager_delay_slots' is more complicated: it is used for
71 scheduling conditional jumps, or for scheduling jumps which cannot
72 be filled using (1). A machine need not have annulled jumps to use
73 this strategy, but it helps (by keeping more options open).
74 `fill_eager_delay_slots' tries to guess the direction the branch
75 will go; if it guesses right 100% of the time, it can reduce the
76 branch penalty as much as `fill_simple_delay_slots' does. If it
77 guesses wrong 100% of the time, it might as well schedule nops. When
78 `fill_eager_delay_slots' takes insns from the fall-through path of
79 the jump, usually there is no code expansion; when it takes insns
80 from the branch target, there is code expansion if it is not the
81 only way to reach that target.
83 (3) `relax_delay_slots' uses a set of rules to simplify code that
84 has been reorganized by (1) and (2). It finds cases where
85 conditional test can be eliminated, jumps can be threaded, extra
86 insns can be eliminated, etc. It is the job of (1) and (2) to do a
87 good job of scheduling locally; `relax_delay_slots' takes care of
88 making the various individual schedules work well together. It is
89 especially tuned to handle the control flow interactions of branch
90 insns. It does nothing for insns with delay slots that do not
91 branch.
93 On machines that use CC0, we are very conservative. We will not make
94 a copy of an insn involving CC0 since we want to maintain a 1-1
95 correspondence between the insn that sets and uses CC0. The insns are
96 allowed to be separated by placing an insn that sets CC0 (but not an insn
97 that uses CC0; we could do this, but it doesn't seem worthwhile) in a
98 delay slot. In that case, we point each insn at the other with REG_CC_USER
99 and REG_CC_SETTER notes. Note that these restrictions affect very few
100 machines because most RISC machines with delay slots will not use CC0
101 (the RT is the only known exception at this point). */
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "tm.h"
107 #include "diagnostic-core.h"
108 #include "rtl.h"
109 #include "tm_p.h"
110 #include "expr.h"
111 #include "function.h"
112 #include "insn-config.h"
113 #include "conditions.h"
114 #include "hard-reg-set.h"
115 #include "basic-block.h"
116 #include "regs.h"
117 #include "recog.h"
118 #include "flags.h"
119 #include "obstack.h"
120 #include "insn-attr.h"
121 #include "resource.h"
122 #include "except.h"
123 #include "params.h"
124 #include "target.h"
125 #include "tree-pass.h"
126 #include "emit-rtl.h"
128 #ifdef DELAY_SLOTS
130 #ifndef ANNUL_IFTRUE_SLOTS
131 #define eligible_for_annul_true(INSN, SLOTS, TRIAL, FLAGS) 0
132 #endif
133 #ifndef ANNUL_IFFALSE_SLOTS
134 #define eligible_for_annul_false(INSN, SLOTS, TRIAL, FLAGS) 0
135 #endif
138 /* First, some functions that were used before GCC got a control flow graph.
139 These functions are now only used here in reorg.c, and have therefore
140 been moved here to avoid inadvertent misuse elsewhere in the compiler. */
142 /* Return the last label to mark the same position as LABEL. Return LABEL
143 itself if it is null or any return rtx. */
145 static rtx
146 skip_consecutive_labels (rtx label_or_return)
148 rtx_insn *insn;
150 if (label_or_return && ANY_RETURN_P (label_or_return))
151 return label_or_return;
153 rtx_insn *label = as_a <rtx_insn *> (label_or_return);
155 for (insn = label; insn != 0 && !INSN_P (insn); insn = NEXT_INSN (insn))
156 if (LABEL_P (insn))
157 label = insn;
159 return label;
162 #ifdef HAVE_cc0
163 /* INSN uses CC0 and is being moved into a delay slot. Set up REG_CC_SETTER
164 and REG_CC_USER notes so we can find it. */
166 static void
167 link_cc0_insns (rtx insn)
169 rtx user = next_nonnote_insn (insn);
171 if (NONJUMP_INSN_P (user) && GET_CODE (PATTERN (user)) == SEQUENCE)
172 user = XVECEXP (PATTERN (user), 0, 0);
174 add_reg_note (user, REG_CC_SETTER, insn);
175 add_reg_note (insn, REG_CC_USER, user);
177 #endif
179 /* Insns which have delay slots that have not yet been filled. */
181 static struct obstack unfilled_slots_obstack;
182 static rtx *unfilled_firstobj;
184 /* Define macros to refer to the first and last slot containing unfilled
185 insns. These are used because the list may move and its address
186 should be recomputed at each use. */
188 #define unfilled_slots_base \
189 ((rtx_insn **) obstack_base (&unfilled_slots_obstack))
191 #define unfilled_slots_next \
192 ((rtx_insn **) obstack_next_free (&unfilled_slots_obstack))
194 /* Points to the label before the end of the function, or before a
195 return insn. */
196 static rtx_code_label *function_return_label;
197 /* Likewise for a simple_return. */
198 static rtx_code_label *function_simple_return_label;
200 /* Mapping between INSN_UID's and position in the code since INSN_UID's do
201 not always monotonically increase. */
202 static int *uid_to_ruid;
204 /* Highest valid index in `uid_to_ruid'. */
205 static int max_uid;
207 static int stop_search_p (rtx, int);
208 static int resource_conflicts_p (struct resources *, struct resources *);
209 static int insn_references_resource_p (rtx, struct resources *, bool);
210 static int insn_sets_resource_p (rtx, struct resources *, bool);
211 static rtx_code_label *find_end_label (rtx);
212 static rtx_insn *emit_delay_sequence (rtx_insn *, rtx_insn_list *, int);
213 static rtx_insn_list *add_to_delay_list (rtx_insn *, rtx_insn_list *);
214 static rtx_insn *delete_from_delay_slot (rtx_insn *);
215 static void delete_scheduled_jump (rtx_insn *);
216 static void note_delay_statistics (int, int);
217 #if defined(ANNUL_IFFALSE_SLOTS) || defined(ANNUL_IFTRUE_SLOTS)
218 static rtx_insn_list *optimize_skip (rtx_insn *);
219 #endif
220 static int get_jump_flags (rtx, rtx);
221 static int mostly_true_jump (rtx);
222 static rtx get_branch_condition (rtx, rtx);
223 static int condition_dominates_p (rtx, rtx);
224 static int redirect_with_delay_slots_safe_p (rtx_insn *, rtx, rtx);
225 static int redirect_with_delay_list_safe_p (rtx_insn *, rtx, rtx_insn_list *);
226 static int check_annul_list_true_false (int, rtx);
227 static rtx_insn_list *steal_delay_list_from_target (rtx_insn *, rtx,
228 rtx_sequence *,
229 rtx_insn_list *,
230 struct resources *,
231 struct resources *,
232 struct resources *,
233 int, int *, int *,
234 rtx *);
235 static rtx_insn_list *steal_delay_list_from_fallthrough (rtx_insn *, rtx,
236 rtx_sequence *,
237 rtx_insn_list *,
238 struct resources *,
239 struct resources *,
240 struct resources *,
241 int, int *, int *);
242 static void try_merge_delay_insns (rtx, rtx_insn *);
243 static rtx redundant_insn (rtx, rtx_insn *, rtx);
244 static int own_thread_p (rtx, rtx, int);
245 static void update_block (rtx_insn *, rtx);
246 static int reorg_redirect_jump (rtx_insn *, rtx);
247 static void update_reg_dead_notes (rtx, rtx);
248 static void fix_reg_dead_note (rtx, rtx);
249 static void update_reg_unused_notes (rtx, rtx);
250 static void fill_simple_delay_slots (int);
251 static rtx_insn_list *fill_slots_from_thread (rtx_insn *, rtx, rtx, rtx,
252 int, int, int, int,
253 int *, rtx_insn_list *);
254 static void fill_eager_delay_slots (void);
255 static void relax_delay_slots (rtx_insn *);
256 static void make_return_insns (rtx_insn *);
258 /* A wrapper around next_active_insn which takes care to return ret_rtx
259 unchanged. */
261 static rtx
262 first_active_target_insn (rtx insn)
264 if (ANY_RETURN_P (insn))
265 return insn;
266 return next_active_insn (as_a <rtx_insn *> (insn));
269 /* Return true iff INSN is a simplejump, or any kind of return insn. */
271 static bool
272 simplejump_or_return_p (rtx insn)
274 return (JUMP_P (insn)
275 && (simplejump_p (insn) || ANY_RETURN_P (PATTERN (insn))));
278 /* Return TRUE if this insn should stop the search for insn to fill delay
279 slots. LABELS_P indicates that labels should terminate the search.
280 In all cases, jumps terminate the search. */
282 static int
283 stop_search_p (rtx insn, int labels_p)
285 if (insn == 0)
286 return 1;
288 /* If the insn can throw an exception that is caught within the function,
289 it may effectively perform a jump from the viewpoint of the function.
290 Therefore act like for a jump. */
291 if (can_throw_internal (insn))
292 return 1;
294 switch (GET_CODE (insn))
296 case NOTE:
297 case CALL_INSN:
298 return 0;
300 case CODE_LABEL:
301 return labels_p;
303 case JUMP_INSN:
304 case BARRIER:
305 return 1;
307 case INSN:
308 /* OK unless it contains a delay slot or is an `asm' insn of some type.
309 We don't know anything about these. */
310 return (GET_CODE (PATTERN (insn)) == SEQUENCE
311 || GET_CODE (PATTERN (insn)) == ASM_INPUT
312 || asm_noperands (PATTERN (insn)) >= 0);
314 default:
315 gcc_unreachable ();
319 /* Return TRUE if any resources are marked in both RES1 and RES2 or if either
320 resource set contains a volatile memory reference. Otherwise, return FALSE. */
322 static int
323 resource_conflicts_p (struct resources *res1, struct resources *res2)
325 if ((res1->cc && res2->cc) || (res1->memory && res2->memory)
326 || res1->volatil || res2->volatil)
327 return 1;
329 return hard_reg_set_intersect_p (res1->regs, res2->regs);
332 /* Return TRUE if any resource marked in RES, a `struct resources', is
333 referenced by INSN. If INCLUDE_DELAYED_EFFECTS is set, return if the called
334 routine is using those resources.
336 We compute this by computing all the resources referenced by INSN and
337 seeing if this conflicts with RES. It might be faster to directly check
338 ourselves, and this is the way it used to work, but it means duplicating
339 a large block of complex code. */
341 static int
342 insn_references_resource_p (rtx insn, struct resources *res,
343 bool include_delayed_effects)
345 struct resources insn_res;
347 CLEAR_RESOURCE (&insn_res);
348 mark_referenced_resources (insn, &insn_res, include_delayed_effects);
349 return resource_conflicts_p (&insn_res, res);
352 /* Return TRUE if INSN modifies resources that are marked in RES.
353 INCLUDE_DELAYED_EFFECTS is set if the actions of that routine should be
354 included. CC0 is only modified if it is explicitly set; see comments
355 in front of mark_set_resources for details. */
357 static int
358 insn_sets_resource_p (rtx insn, struct resources *res,
359 bool include_delayed_effects)
361 struct resources insn_sets;
363 CLEAR_RESOURCE (&insn_sets);
364 mark_set_resources (insn, &insn_sets, 0,
365 (include_delayed_effects
366 ? MARK_SRC_DEST_CALL
367 : MARK_SRC_DEST));
368 return resource_conflicts_p (&insn_sets, res);
371 /* Find a label at the end of the function or before a RETURN. If there
372 is none, try to make one. If that fails, returns 0.
374 The property of such a label is that it is placed just before the
375 epilogue or a bare RETURN insn, so that another bare RETURN can be
376 turned into a jump to the label unconditionally. In particular, the
377 label cannot be placed before a RETURN insn with a filled delay slot.
379 ??? There may be a problem with the current implementation. Suppose
380 we start with a bare RETURN insn and call find_end_label. It may set
381 function_return_label just before the RETURN. Suppose the machinery
382 is able to fill the delay slot of the RETURN insn afterwards. Then
383 function_return_label is no longer valid according to the property
384 described above and find_end_label will still return it unmodified.
385 Note that this is probably mitigated by the following observation:
386 once function_return_label is made, it is very likely the target of
387 a jump, so filling the delay slot of the RETURN will be much more
388 difficult.
389 KIND is either simple_return_rtx or ret_rtx, indicating which type of
390 return we're looking for. */
392 static rtx_code_label *
393 find_end_label (rtx kind)
395 rtx_insn *insn;
396 rtx_code_label **plabel;
398 if (kind == ret_rtx)
399 plabel = &function_return_label;
400 else
402 gcc_assert (kind == simple_return_rtx);
403 plabel = &function_simple_return_label;
406 /* If we found one previously, return it. */
407 if (*plabel)
408 return *plabel;
410 /* Otherwise, see if there is a label at the end of the function. If there
411 is, it must be that RETURN insns aren't needed, so that is our return
412 label and we don't have to do anything else. */
414 insn = get_last_insn ();
415 while (NOTE_P (insn)
416 || (NONJUMP_INSN_P (insn)
417 && (GET_CODE (PATTERN (insn)) == USE
418 || GET_CODE (PATTERN (insn)) == CLOBBER)))
419 insn = PREV_INSN (insn);
421 /* When a target threads its epilogue we might already have a
422 suitable return insn. If so put a label before it for the
423 function_return_label. */
424 if (BARRIER_P (insn)
425 && JUMP_P (PREV_INSN (insn))
426 && PATTERN (PREV_INSN (insn)) == kind)
428 rtx_insn *temp = PREV_INSN (PREV_INSN (insn));
429 rtx_code_label *label = gen_label_rtx ();
430 LABEL_NUSES (label) = 0;
432 /* Put the label before any USE insns that may precede the RETURN
433 insn. */
434 while (GET_CODE (temp) == USE)
435 temp = PREV_INSN (temp);
437 emit_label_after (label, temp);
438 *plabel = label;
441 else if (LABEL_P (insn))
442 *plabel = as_a <rtx_code_label *> (insn);
443 else
445 rtx_code_label *label = gen_label_rtx ();
446 LABEL_NUSES (label) = 0;
447 /* If the basic block reorder pass moves the return insn to
448 some other place try to locate it again and put our
449 function_return_label there. */
450 while (insn && ! (JUMP_P (insn) && (PATTERN (insn) == kind)))
451 insn = PREV_INSN (insn);
452 if (insn)
454 insn = PREV_INSN (insn);
456 /* Put the label before any USE insns that may precede the
457 RETURN insn. */
458 while (GET_CODE (insn) == USE)
459 insn = PREV_INSN (insn);
461 emit_label_after (label, insn);
463 else
465 #ifdef HAVE_epilogue
466 if (HAVE_epilogue
467 #ifdef HAVE_return
468 && ! HAVE_return
469 #endif
471 /* The RETURN insn has its delay slot filled so we cannot
472 emit the label just before it. Since we already have
473 an epilogue and cannot emit a new RETURN, we cannot
474 emit the label at all. */
475 return NULL;
476 #endif /* HAVE_epilogue */
478 /* Otherwise, make a new label and emit a RETURN and BARRIER,
479 if needed. */
480 emit_label (label);
481 #ifdef HAVE_return
482 if (HAVE_return)
484 /* The return we make may have delay slots too. */
485 rtx insn = gen_return ();
486 insn = emit_jump_insn (insn);
487 set_return_jump_label (insn);
488 emit_barrier ();
489 if (num_delay_slots (insn) > 0)
490 obstack_ptr_grow (&unfilled_slots_obstack, insn);
492 #endif
494 *plabel = label;
497 /* Show one additional use for this label so it won't go away until
498 we are done. */
499 ++LABEL_NUSES (*plabel);
501 return *plabel;
504 /* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace
505 the pattern of INSN with the SEQUENCE.
507 Returns the insn containing the SEQUENCE that replaces INSN. */
509 static rtx_insn *
510 emit_delay_sequence (rtx_insn *insn, rtx_insn_list *list, int length)
512 /* Allocate the rtvec to hold the insns and the SEQUENCE. */
513 rtvec seqv = rtvec_alloc (length + 1);
514 rtx seq = gen_rtx_SEQUENCE (VOIDmode, seqv);
515 rtx_insn *seq_insn = make_insn_raw (seq);
517 /* If DELAY_INSN has a location, use it for SEQ_INSN. If DELAY_INSN does
518 not have a location, but one of the delayed insns does, we pick up a
519 location from there later. */
520 INSN_LOCATION (seq_insn) = INSN_LOCATION (insn);
522 /* Unlink INSN from the insn chain, so that we can put it into
523 the SEQUENCE. Remember where we want to emit SEQUENCE in AFTER. */
524 rtx after = PREV_INSN (insn);
525 remove_insn (insn);
526 SET_NEXT_INSN (insn) = SET_PREV_INSN (insn) = NULL;
528 /* Build our SEQUENCE and rebuild the insn chain. */
529 int i = 1;
530 start_sequence ();
531 XVECEXP (seq, 0, 0) = emit_insn (insn);
532 for (rtx_insn_list *li = list; li; li = li->next (), i++)
534 rtx_insn *tem = li->insn ();
535 rtx note, next;
537 /* Show that this copy of the insn isn't deleted. */
538 INSN_DELETED_P (tem) = 0;
540 /* Unlink insn from its original place, and re-emit it into
541 the sequence. */
542 SET_NEXT_INSN (tem) = SET_PREV_INSN (tem) = NULL;
543 XVECEXP (seq, 0, i) = emit_insn (tem);
545 /* SPARC assembler, for instance, emit warning when debug info is output
546 into the delay slot. */
547 if (INSN_LOCATION (tem) && !INSN_LOCATION (seq_insn))
548 INSN_LOCATION (seq_insn) = INSN_LOCATION (tem);
549 INSN_LOCATION (tem) = 0;
551 for (note = REG_NOTES (tem); note; note = next)
553 next = XEXP (note, 1);
554 switch (REG_NOTE_KIND (note))
556 case REG_DEAD:
557 /* Remove any REG_DEAD notes because we can't rely on them now
558 that the insn has been moved. */
559 remove_note (tem, note);
560 break;
562 case REG_LABEL_OPERAND:
563 case REG_LABEL_TARGET:
564 /* Keep the label reference count up to date. */
565 if (LABEL_P (XEXP (note, 0)))
566 LABEL_NUSES (XEXP (note, 0)) ++;
567 break;
569 default:
570 break;
574 end_sequence ();
575 gcc_assert (i == length + 1);
577 /* Splice our SEQUENCE into the insn stream where INSN used to be. */
578 add_insn_after (seq_insn, after, NULL);
580 return seq_insn;
583 /* Add INSN to DELAY_LIST and return the head of the new list. The list must
584 be in the order in which the insns are to be executed. */
586 static rtx_insn_list *
587 add_to_delay_list (rtx_insn *insn, rtx_insn_list *delay_list)
589 /* If we have an empty list, just make a new list element. If
590 INSN has its block number recorded, clear it since we may
591 be moving the insn to a new block. */
593 if (delay_list == 0)
595 clear_hashed_info_for_insn (insn);
596 return gen_rtx_INSN_LIST (VOIDmode, insn, NULL_RTX);
599 /* Otherwise this must be an INSN_LIST. Add INSN to the end of the
600 list. */
601 XEXP (delay_list, 1) = add_to_delay_list (insn, delay_list->next ());
603 return delay_list;
606 /* Delete INSN from the delay slot of the insn that it is in, which may
607 produce an insn with no delay slots. Return the new insn. */
609 static rtx_insn *
610 delete_from_delay_slot (rtx_insn *insn)
612 rtx_insn *trial, *seq_insn, *prev;
613 rtx_sequence *seq;
614 rtx_insn_list *delay_list = 0;
615 int i;
616 int had_barrier = 0;
618 /* We first must find the insn containing the SEQUENCE with INSN in its
619 delay slot. Do this by finding an insn, TRIAL, where
620 PREV_INSN (NEXT_INSN (TRIAL)) != TRIAL. */
622 for (trial = insn;
623 PREV_INSN (NEXT_INSN (trial)) == trial;
624 trial = NEXT_INSN (trial))
627 seq_insn = PREV_INSN (NEXT_INSN (trial));
628 seq = as_a <rtx_sequence *> (PATTERN (seq_insn));
630 if (NEXT_INSN (seq_insn) && BARRIER_P (NEXT_INSN (seq_insn)))
631 had_barrier = 1;
633 /* Create a delay list consisting of all the insns other than the one
634 we are deleting (unless we were the only one). */
635 if (seq->len () > 2)
636 for (i = 1; i < seq->len (); i++)
637 if (seq->insn (i) != insn)
638 delay_list = add_to_delay_list (seq->insn (i), delay_list);
640 /* Delete the old SEQUENCE, re-emit the insn that used to have the delay
641 list, and rebuild the delay list if non-empty. */
642 prev = PREV_INSN (seq_insn);
643 trial = seq->insn (0);
644 delete_related_insns (seq_insn);
645 add_insn_after (trial, prev, NULL);
647 /* If there was a barrier after the old SEQUENCE, remit it. */
648 if (had_barrier)
649 emit_barrier_after (trial);
651 /* If there are any delay insns, remit them. Otherwise clear the
652 annul flag. */
653 if (delay_list)
654 trial = emit_delay_sequence (trial, delay_list, XVECLEN (seq, 0) - 2);
655 else if (JUMP_P (trial))
656 INSN_ANNULLED_BRANCH_P (trial) = 0;
658 INSN_FROM_TARGET_P (insn) = 0;
660 /* Show we need to fill this insn again. */
661 obstack_ptr_grow (&unfilled_slots_obstack, trial);
663 return trial;
666 /* Delete INSN, a JUMP_INSN. If it is a conditional jump, we must track down
667 the insn that sets CC0 for it and delete it too. */
669 static void
670 delete_scheduled_jump (rtx_insn *insn)
672 /* Delete the insn that sets cc0 for us. On machines without cc0, we could
673 delete the insn that sets the condition code, but it is hard to find it.
674 Since this case is rare anyway, don't bother trying; there would likely
675 be other insns that became dead anyway, which we wouldn't know to
676 delete. */
678 #ifdef HAVE_cc0
679 if (reg_mentioned_p (cc0_rtx, insn))
681 rtx note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
683 /* If a reg-note was found, it points to an insn to set CC0. This
684 insn is in the delay list of some other insn. So delete it from
685 the delay list it was in. */
686 if (note)
688 if (! FIND_REG_INC_NOTE (XEXP (note, 0), NULL_RTX)
689 && sets_cc0_p (PATTERN (XEXP (note, 0))) == 1)
690 delete_from_delay_slot (as_a <rtx_insn *> (XEXP (note, 0)));
692 else
694 /* The insn setting CC0 is our previous insn, but it may be in
695 a delay slot. It will be the last insn in the delay slot, if
696 it is. */
697 rtx_insn *trial = previous_insn (insn);
698 if (NOTE_P (trial))
699 trial = prev_nonnote_insn (trial);
700 if (sets_cc0_p (PATTERN (trial)) != 1
701 || FIND_REG_INC_NOTE (trial, NULL_RTX))
702 return;
703 if (PREV_INSN (NEXT_INSN (trial)) == trial)
704 delete_related_insns (trial);
705 else
706 delete_from_delay_slot (trial);
709 #endif
711 delete_related_insns (insn);
714 /* Counters for delay-slot filling. */
716 #define NUM_REORG_FUNCTIONS 2
717 #define MAX_DELAY_HISTOGRAM 3
718 #define MAX_REORG_PASSES 2
720 static int num_insns_needing_delays[NUM_REORG_FUNCTIONS][MAX_REORG_PASSES];
722 static int num_filled_delays[NUM_REORG_FUNCTIONS][MAX_DELAY_HISTOGRAM+1][MAX_REORG_PASSES];
724 static int reorg_pass_number;
726 static void
727 note_delay_statistics (int slots_filled, int index)
729 num_insns_needing_delays[index][reorg_pass_number]++;
730 if (slots_filled > MAX_DELAY_HISTOGRAM)
731 slots_filled = MAX_DELAY_HISTOGRAM;
732 num_filled_delays[index][slots_filled][reorg_pass_number]++;
735 #if defined(ANNUL_IFFALSE_SLOTS) || defined(ANNUL_IFTRUE_SLOTS)
737 /* Optimize the following cases:
739 1. When a conditional branch skips over only one instruction,
740 use an annulling branch and put that insn in the delay slot.
741 Use either a branch that annuls when the condition if true or
742 invert the test with a branch that annuls when the condition is
743 false. This saves insns, since otherwise we must copy an insn
744 from the L1 target.
746 (orig) (skip) (otherwise)
747 Bcc.n L1 Bcc',a L1 Bcc,a L1'
748 insn insn insn2
749 L1: L1: L1:
750 insn2 insn2 insn2
751 insn3 insn3 L1':
752 insn3
754 2. When a conditional branch skips over only one instruction,
755 and after that, it unconditionally branches somewhere else,
756 perform the similar optimization. This saves executing the
757 second branch in the case where the inverted condition is true.
759 Bcc.n L1 Bcc',a L2
760 insn insn
761 L1: L1:
762 Bra L2 Bra L2
764 INSN is a JUMP_INSN.
766 This should be expanded to skip over N insns, where N is the number
767 of delay slots required. */
769 static rtx_insn_list *
770 optimize_skip (rtx_insn *insn)
772 rtx_insn *trial = next_nonnote_insn (insn);
773 rtx_insn *next_trial = next_active_insn (trial);
774 rtx_insn_list *delay_list = 0;
775 int flags;
777 flags = get_jump_flags (insn, JUMP_LABEL (insn));
779 if (trial == 0
780 || !NONJUMP_INSN_P (trial)
781 || GET_CODE (PATTERN (trial)) == SEQUENCE
782 || recog_memoized (trial) < 0
783 || (! eligible_for_annul_false (insn, 0, trial, flags)
784 && ! eligible_for_annul_true (insn, 0, trial, flags))
785 || can_throw_internal (trial))
786 return 0;
788 /* There are two cases where we are just executing one insn (we assume
789 here that a branch requires only one insn; this should be generalized
790 at some point): Where the branch goes around a single insn or where
791 we have one insn followed by a branch to the same label we branch to.
792 In both of these cases, inverting the jump and annulling the delay
793 slot give the same effect in fewer insns. */
794 if (next_trial == next_active_insn (JUMP_LABEL (insn))
795 || (next_trial != 0
796 && simplejump_or_return_p (next_trial)
797 && JUMP_LABEL (insn) == JUMP_LABEL (next_trial)))
799 if (eligible_for_annul_false (insn, 0, trial, flags))
801 if (invert_jump (insn, JUMP_LABEL (insn), 1))
802 INSN_FROM_TARGET_P (trial) = 1;
803 else if (! eligible_for_annul_true (insn, 0, trial, flags))
804 return 0;
807 delay_list = add_to_delay_list (trial, NULL);
808 next_trial = next_active_insn (trial);
809 update_block (trial, trial);
810 delete_related_insns (trial);
812 /* Also, if we are targeting an unconditional
813 branch, thread our jump to the target of that branch. Don't
814 change this into a RETURN here, because it may not accept what
815 we have in the delay slot. We'll fix this up later. */
816 if (next_trial && simplejump_or_return_p (next_trial))
818 rtx target_label = JUMP_LABEL (next_trial);
819 if (ANY_RETURN_P (target_label))
820 target_label = find_end_label (target_label);
822 if (target_label)
824 /* Recompute the flags based on TARGET_LABEL since threading
825 the jump to TARGET_LABEL may change the direction of the
826 jump (which may change the circumstances in which the
827 delay slot is nullified). */
828 flags = get_jump_flags (insn, target_label);
829 if (eligible_for_annul_true (insn, 0, trial, flags))
830 reorg_redirect_jump (insn, target_label);
834 INSN_ANNULLED_BRANCH_P (insn) = 1;
837 return delay_list;
839 #endif
841 /* Encode and return branch direction and prediction information for
842 INSN assuming it will jump to LABEL.
844 Non conditional branches return no direction information and
845 are predicted as very likely taken. */
847 static int
848 get_jump_flags (rtx insn, rtx label)
850 int flags;
852 /* get_jump_flags can be passed any insn with delay slots, these may
853 be INSNs, CALL_INSNs, or JUMP_INSNs. Only JUMP_INSNs have branch
854 direction information, and only if they are conditional jumps.
856 If LABEL is a return, then there is no way to determine the branch
857 direction. */
858 if (JUMP_P (insn)
859 && (condjump_p (insn) || condjump_in_parallel_p (insn))
860 && !ANY_RETURN_P (label)
861 && INSN_UID (insn) <= max_uid
862 && INSN_UID (label) <= max_uid)
863 flags
864 = (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)])
865 ? ATTR_FLAG_forward : ATTR_FLAG_backward;
866 /* No valid direction information. */
867 else
868 flags = 0;
870 return flags;
873 /* Return truth value of the statement that this branch
874 is mostly taken. If we think that the branch is extremely likely
875 to be taken, we return 2. If the branch is slightly more likely to be
876 taken, return 1. If the branch is slightly less likely to be taken,
877 return 0 and if the branch is highly unlikely to be taken, return -1. */
879 static int
880 mostly_true_jump (rtx jump_insn)
882 /* If branch probabilities are available, then use that number since it
883 always gives a correct answer. */
884 rtx note = find_reg_note (jump_insn, REG_BR_PROB, 0);
885 if (note)
887 int prob = XINT (note, 0);
889 if (prob >= REG_BR_PROB_BASE * 9 / 10)
890 return 2;
891 else if (prob >= REG_BR_PROB_BASE / 2)
892 return 1;
893 else if (prob >= REG_BR_PROB_BASE / 10)
894 return 0;
895 else
896 return -1;
899 /* If there is no note, assume branches are not taken.
900 This should be rare. */
901 return 0;
904 /* Return the condition under which INSN will branch to TARGET. If TARGET
905 is zero, return the condition under which INSN will return. If INSN is
906 an unconditional branch, return const_true_rtx. If INSN isn't a simple
907 type of jump, or it doesn't go to TARGET, return 0. */
909 static rtx
910 get_branch_condition (rtx insn, rtx target)
912 rtx pat = PATTERN (insn);
913 rtx src;
915 if (condjump_in_parallel_p (insn))
916 pat = XVECEXP (pat, 0, 0);
918 if (ANY_RETURN_P (pat) && pat == target)
919 return const_true_rtx;
921 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
922 return 0;
924 src = SET_SRC (pat);
925 if (GET_CODE (src) == LABEL_REF && XEXP (src, 0) == target)
926 return const_true_rtx;
928 else if (GET_CODE (src) == IF_THEN_ELSE
929 && XEXP (src, 2) == pc_rtx
930 && ((GET_CODE (XEXP (src, 1)) == LABEL_REF
931 && XEXP (XEXP (src, 1), 0) == target)
932 || (ANY_RETURN_P (XEXP (src, 1)) && XEXP (src, 1) == target)))
933 return XEXP (src, 0);
935 else if (GET_CODE (src) == IF_THEN_ELSE
936 && XEXP (src, 1) == pc_rtx
937 && ((GET_CODE (XEXP (src, 2)) == LABEL_REF
938 && XEXP (XEXP (src, 2), 0) == target)
939 || (ANY_RETURN_P (XEXP (src, 2)) && XEXP (src, 2) == target)))
941 enum rtx_code rev;
942 rev = reversed_comparison_code (XEXP (src, 0), insn);
943 if (rev != UNKNOWN)
944 return gen_rtx_fmt_ee (rev, GET_MODE (XEXP (src, 0)),
945 XEXP (XEXP (src, 0), 0),
946 XEXP (XEXP (src, 0), 1));
949 return 0;
952 /* Return nonzero if CONDITION is more strict than the condition of
953 INSN, i.e., if INSN will always branch if CONDITION is true. */
955 static int
956 condition_dominates_p (rtx condition, rtx insn)
958 rtx other_condition = get_branch_condition (insn, JUMP_LABEL (insn));
959 enum rtx_code code = GET_CODE (condition);
960 enum rtx_code other_code;
962 if (rtx_equal_p (condition, other_condition)
963 || other_condition == const_true_rtx)
964 return 1;
966 else if (condition == const_true_rtx || other_condition == 0)
967 return 0;
969 other_code = GET_CODE (other_condition);
970 if (GET_RTX_LENGTH (code) != 2 || GET_RTX_LENGTH (other_code) != 2
971 || ! rtx_equal_p (XEXP (condition, 0), XEXP (other_condition, 0))
972 || ! rtx_equal_p (XEXP (condition, 1), XEXP (other_condition, 1)))
973 return 0;
975 return comparison_dominates_p (code, other_code);
978 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
979 any insns already in the delay slot of JUMP. */
981 static int
982 redirect_with_delay_slots_safe_p (rtx_insn *jump, rtx newlabel, rtx seq)
984 int flags, i;
985 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (seq));
987 /* Make sure all the delay slots of this jump would still
988 be valid after threading the jump. If they are still
989 valid, then return nonzero. */
991 flags = get_jump_flags (jump, newlabel);
992 for (i = 1; i < pat->len (); i++)
993 if (! (
994 #ifdef ANNUL_IFFALSE_SLOTS
995 (INSN_ANNULLED_BRANCH_P (jump)
996 && INSN_FROM_TARGET_P (pat->insn (i)))
997 ? eligible_for_annul_false (jump, i - 1, pat->insn (i), flags) :
998 #endif
999 #ifdef ANNUL_IFTRUE_SLOTS
1000 (INSN_ANNULLED_BRANCH_P (jump)
1001 && ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
1002 ? eligible_for_annul_true (jump, i - 1, pat->insn (i), flags) :
1003 #endif
1004 eligible_for_delay (jump, i - 1, pat->insn (i), flags)))
1005 break;
1007 return (i == pat->len ());
1010 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
1011 any insns we wish to place in the delay slot of JUMP. */
1013 static int
1014 redirect_with_delay_list_safe_p (rtx_insn *jump, rtx newlabel,
1015 rtx_insn_list *delay_list)
1017 int flags, i;
1018 rtx_insn_list *li;
1020 /* Make sure all the insns in DELAY_LIST would still be
1021 valid after threading the jump. If they are still
1022 valid, then return nonzero. */
1024 flags = get_jump_flags (jump, newlabel);
1025 for (li = delay_list, i = 0; li; li = li->next (), i++)
1026 if (! (
1027 #ifdef ANNUL_IFFALSE_SLOTS
1028 (INSN_ANNULLED_BRANCH_P (jump)
1029 && INSN_FROM_TARGET_P (li->insn ()))
1030 ? eligible_for_annul_false (jump, i, li->insn (), flags) :
1031 #endif
1032 #ifdef ANNUL_IFTRUE_SLOTS
1033 (INSN_ANNULLED_BRANCH_P (jump)
1034 && ! INSN_FROM_TARGET_P (XEXP (li, 0)))
1035 ? eligible_for_annul_true (jump, i, li->insn (), flags) :
1036 #endif
1037 eligible_for_delay (jump, i, li->insn (), flags)))
1038 break;
1040 return (li == NULL);
1043 /* DELAY_LIST is a list of insns that have already been placed into delay
1044 slots. See if all of them have the same annulling status as ANNUL_TRUE_P.
1045 If not, return 0; otherwise return 1. */
1047 static int
1048 check_annul_list_true_false (int annul_true_p, rtx delay_list)
1050 rtx temp;
1052 if (delay_list)
1054 for (temp = delay_list; temp; temp = XEXP (temp, 1))
1056 rtx trial = XEXP (temp, 0);
1058 if ((annul_true_p && INSN_FROM_TARGET_P (trial))
1059 || (!annul_true_p && !INSN_FROM_TARGET_P (trial)))
1060 return 0;
1064 return 1;
1067 /* INSN branches to an insn whose pattern SEQ is a SEQUENCE. Given that
1068 the condition tested by INSN is CONDITION and the resources shown in
1069 OTHER_NEEDED are needed after INSN, see whether INSN can take all the insns
1070 from SEQ's delay list, in addition to whatever insns it may execute
1071 (in DELAY_LIST). SETS and NEEDED are denote resources already set and
1072 needed while searching for delay slot insns. Return the concatenated
1073 delay list if possible, otherwise, return 0.
1075 SLOTS_TO_FILL is the total number of slots required by INSN, and
1076 PSLOTS_FILLED points to the number filled so far (also the number of
1077 insns in DELAY_LIST). It is updated with the number that have been
1078 filled from the SEQUENCE, if any.
1080 PANNUL_P points to a nonzero value if we already know that we need
1081 to annul INSN. If this routine determines that annulling is needed,
1082 it may set that value nonzero.
1084 PNEW_THREAD points to a location that is to receive the place at which
1085 execution should continue. */
1087 static rtx_insn_list *
1088 steal_delay_list_from_target (rtx_insn *insn, rtx condition, rtx_sequence *seq,
1089 rtx_insn_list *delay_list, struct resources *sets,
1090 struct resources *needed,
1091 struct resources *other_needed,
1092 int slots_to_fill, int *pslots_filled,
1093 int *pannul_p, rtx *pnew_thread)
1095 int slots_remaining = slots_to_fill - *pslots_filled;
1096 int total_slots_filled = *pslots_filled;
1097 rtx_insn_list *new_delay_list = 0;
1098 int must_annul = *pannul_p;
1099 int used_annul = 0;
1100 int i;
1101 struct resources cc_set;
1102 bool *redundant;
1104 /* We can't do anything if there are more delay slots in SEQ than we
1105 can handle, or if we don't know that it will be a taken branch.
1106 We know that it will be a taken branch if it is either an unconditional
1107 branch or a conditional branch with a stricter branch condition.
1109 Also, exit if the branch has more than one set, since then it is computing
1110 other results that can't be ignored, e.g. the HPPA mov&branch instruction.
1111 ??? It may be possible to move other sets into INSN in addition to
1112 moving the instructions in the delay slots.
1114 We can not steal the delay list if one of the instructions in the
1115 current delay_list modifies the condition codes and the jump in the
1116 sequence is a conditional jump. We can not do this because we can
1117 not change the direction of the jump because the condition codes
1118 will effect the direction of the jump in the sequence. */
1120 CLEAR_RESOURCE (&cc_set);
1121 for (rtx_insn_list *temp = delay_list; temp; temp = temp->next ())
1123 rtx_insn *trial = temp->insn ();
1125 mark_set_resources (trial, &cc_set, 0, MARK_SRC_DEST_CALL);
1126 if (insn_references_resource_p (seq->insn (0), &cc_set, false))
1127 return delay_list;
1130 if (XVECLEN (seq, 0) - 1 > slots_remaining
1131 || ! condition_dominates_p (condition, seq->insn (0))
1132 || ! single_set (seq->insn (0)))
1133 return delay_list;
1135 #ifdef MD_CAN_REDIRECT_BRANCH
1136 /* On some targets, branches with delay slots can have a limited
1137 displacement. Give the back end a chance to tell us we can't do
1138 this. */
1139 if (! MD_CAN_REDIRECT_BRANCH (insn, seq->insn (0)))
1140 return delay_list;
1141 #endif
1143 redundant = XALLOCAVEC (bool, XVECLEN (seq, 0));
1144 for (i = 1; i < seq->len (); i++)
1146 rtx_insn *trial = seq->insn (i);
1147 int flags;
1149 if (insn_references_resource_p (trial, sets, false)
1150 || insn_sets_resource_p (trial, needed, false)
1151 || insn_sets_resource_p (trial, sets, false)
1152 #ifdef HAVE_cc0
1153 /* If TRIAL sets CC0, we can't copy it, so we can't steal this
1154 delay list. */
1155 || find_reg_note (trial, REG_CC_USER, NULL_RTX)
1156 #endif
1157 /* If TRIAL is from the fallthrough code of an annulled branch insn
1158 in SEQ, we cannot use it. */
1159 || (INSN_ANNULLED_BRANCH_P (seq->insn (0))
1160 && ! INSN_FROM_TARGET_P (trial)))
1161 return delay_list;
1163 /* If this insn was already done (usually in a previous delay slot),
1164 pretend we put it in our delay slot. */
1165 redundant[i] = redundant_insn (trial, insn, new_delay_list);
1166 if (redundant[i])
1167 continue;
1169 /* We will end up re-vectoring this branch, so compute flags
1170 based on jumping to the new label. */
1171 flags = get_jump_flags (insn, JUMP_LABEL (seq->insn (0)));
1173 if (! must_annul
1174 && ((condition == const_true_rtx
1175 || (! insn_sets_resource_p (trial, other_needed, false)
1176 && ! may_trap_or_fault_p (PATTERN (trial)))))
1177 ? eligible_for_delay (insn, total_slots_filled, trial, flags)
1178 : (must_annul || (delay_list == NULL && new_delay_list == NULL))
1179 && (must_annul = 1,
1180 check_annul_list_true_false (0, delay_list)
1181 && check_annul_list_true_false (0, new_delay_list)
1182 && eligible_for_annul_false (insn, total_slots_filled,
1183 trial, flags)))
1185 if (must_annul)
1186 used_annul = 1;
1187 rtx_insn *temp = copy_delay_slot_insn (trial);
1188 INSN_FROM_TARGET_P (temp) = 1;
1189 new_delay_list = add_to_delay_list (temp, new_delay_list);
1190 total_slots_filled++;
1192 if (--slots_remaining == 0)
1193 break;
1195 else
1196 return delay_list;
1199 /* Record the effect of the instructions that were redundant and which
1200 we therefore decided not to copy. */
1201 for (i = 1; i < seq->len (); i++)
1202 if (redundant[i])
1203 update_block (seq->insn (i), insn);
1205 /* Show the place to which we will be branching. */
1206 *pnew_thread = first_active_target_insn (JUMP_LABEL (seq->insn (0)));
1208 /* Add any new insns to the delay list and update the count of the
1209 number of slots filled. */
1210 *pslots_filled = total_slots_filled;
1211 if (used_annul)
1212 *pannul_p = 1;
1214 if (delay_list == 0)
1215 return new_delay_list;
1217 for (rtx_insn_list *temp = new_delay_list; temp; temp = temp->next ())
1218 delay_list = add_to_delay_list (temp->insn (), delay_list);
1220 return delay_list;
1223 /* Similar to steal_delay_list_from_target except that SEQ is on the
1224 fallthrough path of INSN. Here we only do something if the delay insn
1225 of SEQ is an unconditional branch. In that case we steal its delay slot
1226 for INSN since unconditional branches are much easier to fill. */
1228 static rtx_insn_list *
1229 steal_delay_list_from_fallthrough (rtx_insn *insn, rtx condition,
1230 rtx_sequence *seq,
1231 rtx_insn_list *delay_list,
1232 struct resources *sets,
1233 struct resources *needed,
1234 struct resources *other_needed,
1235 int slots_to_fill, int *pslots_filled,
1236 int *pannul_p)
1238 int i;
1239 int flags;
1240 int must_annul = *pannul_p;
1241 int used_annul = 0;
1243 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1245 /* We can't do anything if SEQ's delay insn isn't an
1246 unconditional branch. */
1248 if (! simplejump_or_return_p (seq->insn (0)))
1249 return delay_list;
1251 for (i = 1; i < seq->len (); i++)
1253 rtx_insn *trial = seq->insn (i);
1255 /* If TRIAL sets CC0, stealing it will move it too far from the use
1256 of CC0. */
1257 if (insn_references_resource_p (trial, sets, false)
1258 || insn_sets_resource_p (trial, needed, false)
1259 || insn_sets_resource_p (trial, sets, false)
1260 #ifdef HAVE_cc0
1261 || sets_cc0_p (PATTERN (trial))
1262 #endif
1265 break;
1267 /* If this insn was already done, we don't need it. */
1268 if (redundant_insn (trial, insn, delay_list))
1270 update_block (trial, insn);
1271 delete_from_delay_slot (trial);
1272 continue;
1275 if (! must_annul
1276 && ((condition == const_true_rtx
1277 || (! insn_sets_resource_p (trial, other_needed, false)
1278 && ! may_trap_or_fault_p (PATTERN (trial)))))
1279 ? eligible_for_delay (insn, *pslots_filled, trial, flags)
1280 : (must_annul || delay_list == NULL) && (must_annul = 1,
1281 check_annul_list_true_false (1, delay_list)
1282 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
1284 if (must_annul)
1285 used_annul = 1;
1286 delete_from_delay_slot (trial);
1287 delay_list = add_to_delay_list (trial, delay_list);
1289 if (++(*pslots_filled) == slots_to_fill)
1290 break;
1292 else
1293 break;
1296 if (used_annul)
1297 *pannul_p = 1;
1298 return delay_list;
1301 /* Try merging insns starting at THREAD which match exactly the insns in
1302 INSN's delay list.
1304 If all insns were matched and the insn was previously annulling, the
1305 annul bit will be cleared.
1307 For each insn that is merged, if the branch is or will be non-annulling,
1308 we delete the merged insn. */
1310 static void
1311 try_merge_delay_insns (rtx insn, rtx_insn *thread)
1313 rtx_insn *trial, *next_trial;
1314 rtx_insn *delay_insn = as_a <rtx_insn *> (XVECEXP (PATTERN (insn), 0, 0));
1315 int annul_p = JUMP_P (delay_insn) && INSN_ANNULLED_BRANCH_P (delay_insn);
1316 int slot_number = 1;
1317 int num_slots = XVECLEN (PATTERN (insn), 0);
1318 rtx next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1319 struct resources set, needed;
1320 rtx_insn_list *merged_insns = 0;
1321 int i;
1322 int flags;
1324 flags = get_jump_flags (delay_insn, JUMP_LABEL (delay_insn));
1326 CLEAR_RESOURCE (&needed);
1327 CLEAR_RESOURCE (&set);
1329 /* If this is not an annulling branch, take into account anything needed in
1330 INSN's delay slot. This prevents two increments from being incorrectly
1331 folded into one. If we are annulling, this would be the correct
1332 thing to do. (The alternative, looking at things set in NEXT_TO_MATCH
1333 will essentially disable this optimization. This method is somewhat of
1334 a kludge, but I don't see a better way.) */
1335 if (! annul_p)
1336 for (i = 1 ; i < num_slots; i++)
1337 if (XVECEXP (PATTERN (insn), 0, i))
1338 mark_referenced_resources (XVECEXP (PATTERN (insn), 0, i), &needed,
1339 true);
1341 for (trial = thread; !stop_search_p (trial, 1); trial = next_trial)
1343 rtx pat = PATTERN (trial);
1344 rtx oldtrial = trial;
1346 next_trial = next_nonnote_insn (trial);
1348 /* TRIAL must be a CALL_INSN or INSN. Skip USE and CLOBBER. */
1349 if (NONJUMP_INSN_P (trial)
1350 && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER))
1351 continue;
1353 if (GET_CODE (next_to_match) == GET_CODE (trial)
1354 #ifdef HAVE_cc0
1355 /* We can't share an insn that sets cc0. */
1356 && ! sets_cc0_p (pat)
1357 #endif
1358 && ! insn_references_resource_p (trial, &set, true)
1359 && ! insn_sets_resource_p (trial, &set, true)
1360 && ! insn_sets_resource_p (trial, &needed, true)
1361 && (trial = try_split (pat, trial, 0)) != 0
1362 /* Update next_trial, in case try_split succeeded. */
1363 && (next_trial = next_nonnote_insn (trial))
1364 /* Likewise THREAD. */
1365 && (thread = oldtrial == thread ? trial : thread)
1366 && rtx_equal_p (PATTERN (next_to_match), PATTERN (trial))
1367 /* Have to test this condition if annul condition is different
1368 from (and less restrictive than) non-annulling one. */
1369 && eligible_for_delay (delay_insn, slot_number - 1, trial, flags))
1372 if (! annul_p)
1374 update_block (trial, thread);
1375 if (trial == thread)
1376 thread = next_active_insn (thread);
1378 delete_related_insns (trial);
1379 INSN_FROM_TARGET_P (next_to_match) = 0;
1381 else
1382 merged_insns = gen_rtx_INSN_LIST (VOIDmode, trial, merged_insns);
1384 if (++slot_number == num_slots)
1385 break;
1387 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1390 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
1391 mark_referenced_resources (trial, &needed, true);
1394 /* See if we stopped on a filled insn. If we did, try to see if its
1395 delay slots match. */
1396 if (slot_number != num_slots
1397 && trial && NONJUMP_INSN_P (trial)
1398 && GET_CODE (PATTERN (trial)) == SEQUENCE
1399 && !(JUMP_P (XVECEXP (PATTERN (trial), 0, 0))
1400 && INSN_ANNULLED_BRANCH_P (XVECEXP (PATTERN (trial), 0, 0))))
1402 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (trial));
1403 rtx filled_insn = XVECEXP (pat, 0, 0);
1405 /* Account for resources set/needed by the filled insn. */
1406 mark_set_resources (filled_insn, &set, 0, MARK_SRC_DEST_CALL);
1407 mark_referenced_resources (filled_insn, &needed, true);
1409 for (i = 1; i < pat->len (); i++)
1411 rtx_insn *dtrial = pat->insn (i);
1413 if (! insn_references_resource_p (dtrial, &set, true)
1414 && ! insn_sets_resource_p (dtrial, &set, true)
1415 && ! insn_sets_resource_p (dtrial, &needed, true)
1416 #ifdef HAVE_cc0
1417 && ! sets_cc0_p (PATTERN (dtrial))
1418 #endif
1419 && rtx_equal_p (PATTERN (next_to_match), PATTERN (dtrial))
1420 && eligible_for_delay (delay_insn, slot_number - 1, dtrial, flags))
1422 if (! annul_p)
1424 rtx_insn *new_rtx;
1426 update_block (dtrial, thread);
1427 new_rtx = delete_from_delay_slot (dtrial);
1428 if (INSN_DELETED_P (thread))
1429 thread = new_rtx;
1430 INSN_FROM_TARGET_P (next_to_match) = 0;
1432 else
1433 merged_insns = gen_rtx_INSN_LIST (SImode, dtrial,
1434 merged_insns);
1436 if (++slot_number == num_slots)
1437 break;
1439 next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1441 else
1443 /* Keep track of the set/referenced resources for the delay
1444 slots of any trial insns we encounter. */
1445 mark_set_resources (dtrial, &set, 0, MARK_SRC_DEST_CALL);
1446 mark_referenced_resources (dtrial, &needed, true);
1451 /* If all insns in the delay slot have been matched and we were previously
1452 annulling the branch, we need not any more. In that case delete all the
1453 merged insns. Also clear the INSN_FROM_TARGET_P bit of each insn in
1454 the delay list so that we know that it isn't only being used at the
1455 target. */
1456 if (slot_number == num_slots && annul_p)
1458 for (; merged_insns; merged_insns = merged_insns->next ())
1460 if (GET_MODE (merged_insns) == SImode)
1462 rtx_insn *new_rtx;
1464 update_block (merged_insns->insn (), thread);
1465 new_rtx = delete_from_delay_slot (merged_insns->insn ());
1466 if (INSN_DELETED_P (thread))
1467 thread = new_rtx;
1469 else
1471 update_block (merged_insns->insn (), thread);
1472 delete_related_insns (merged_insns->insn ());
1476 INSN_ANNULLED_BRANCH_P (delay_insn) = 0;
1478 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1479 INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i)) = 0;
1483 /* See if INSN is redundant with an insn in front of TARGET. Often this
1484 is called when INSN is a candidate for a delay slot of TARGET.
1485 DELAY_LIST are insns that will be placed in delay slots of TARGET in front
1486 of INSN. Often INSN will be redundant with an insn in a delay slot of
1487 some previous insn. This happens when we have a series of branches to the
1488 same label; in that case the first insn at the target might want to go
1489 into each of the delay slots.
1491 If we are not careful, this routine can take up a significant fraction
1492 of the total compilation time (4%), but only wins rarely. Hence we
1493 speed this routine up by making two passes. The first pass goes back
1494 until it hits a label and sees if it finds an insn with an identical
1495 pattern. Only in this (relatively rare) event does it check for
1496 data conflicts.
1498 We do not split insns we encounter. This could cause us not to find a
1499 redundant insn, but the cost of splitting seems greater than the possible
1500 gain in rare cases. */
1502 static rtx
1503 redundant_insn (rtx insn, rtx_insn *target, rtx delay_list)
1505 rtx target_main = target;
1506 rtx ipat = PATTERN (insn);
1507 rtx_insn *trial;
1508 rtx pat;
1509 struct resources needed, set;
1510 int i;
1511 unsigned insns_to_search;
1513 /* If INSN has any REG_UNUSED notes, it can't match anything since we
1514 are allowed to not actually assign to such a register. */
1515 if (find_reg_note (insn, REG_UNUSED, NULL_RTX) != 0)
1516 return 0;
1518 /* Scan backwards looking for a match. */
1519 for (trial = PREV_INSN (target),
1520 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1521 trial && insns_to_search > 0;
1522 trial = PREV_INSN (trial))
1524 /* (use (insn))s can come immediately after a barrier if the
1525 label that used to precede them has been deleted as dead.
1526 See delete_related_insns. */
1527 if (LABEL_P (trial) || BARRIER_P (trial))
1528 return 0;
1530 if (!INSN_P (trial))
1531 continue;
1532 --insns_to_search;
1534 pat = PATTERN (trial);
1535 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1536 continue;
1538 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1540 /* Stop for a CALL and its delay slots because it is difficult to
1541 track its resource needs correctly. */
1542 if (CALL_P (seq->element (0)))
1543 return 0;
1545 /* Stop for an INSN or JUMP_INSN with delayed effects and its delay
1546 slots because it is difficult to track its resource needs
1547 correctly. */
1549 #ifdef INSN_SETS_ARE_DELAYED
1550 if (INSN_SETS_ARE_DELAYED (seq->element (0)))
1551 return 0;
1552 #endif
1554 #ifdef INSN_REFERENCES_ARE_DELAYED
1555 if (INSN_REFERENCES_ARE_DELAYED (seq->element (0)))
1556 return 0;
1557 #endif
1559 /* See if any of the insns in the delay slot match, updating
1560 resource requirements as we go. */
1561 for (i = seq->len () - 1; i > 0; i--)
1562 if (GET_CODE (seq->element (i)) == GET_CODE (insn)
1563 && rtx_equal_p (PATTERN (seq->element (i)), ipat)
1564 && ! find_reg_note (seq->element (i), REG_UNUSED, NULL_RTX))
1565 break;
1567 /* If found a match, exit this loop early. */
1568 if (i > 0)
1569 break;
1572 else if (GET_CODE (trial) == GET_CODE (insn) && rtx_equal_p (pat, ipat)
1573 && ! find_reg_note (trial, REG_UNUSED, NULL_RTX))
1574 break;
1577 /* If we didn't find an insn that matches, return 0. */
1578 if (trial == 0)
1579 return 0;
1581 /* See what resources this insn sets and needs. If they overlap, or
1582 if this insn references CC0, it can't be redundant. */
1584 CLEAR_RESOURCE (&needed);
1585 CLEAR_RESOURCE (&set);
1586 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
1587 mark_referenced_resources (insn, &needed, true);
1589 /* If TARGET is a SEQUENCE, get the main insn. */
1590 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1591 target_main = XVECEXP (PATTERN (target), 0, 0);
1593 if (resource_conflicts_p (&needed, &set)
1594 #ifdef HAVE_cc0
1595 || reg_mentioned_p (cc0_rtx, ipat)
1596 #endif
1597 /* The insn requiring the delay may not set anything needed or set by
1598 INSN. */
1599 || insn_sets_resource_p (target_main, &needed, true)
1600 || insn_sets_resource_p (target_main, &set, true))
1601 return 0;
1603 /* Insns we pass may not set either NEEDED or SET, so merge them for
1604 simpler tests. */
1605 needed.memory |= set.memory;
1606 IOR_HARD_REG_SET (needed.regs, set.regs);
1608 /* This insn isn't redundant if it conflicts with an insn that either is
1609 or will be in a delay slot of TARGET. */
1611 while (delay_list)
1613 if (insn_sets_resource_p (XEXP (delay_list, 0), &needed, true))
1614 return 0;
1615 delay_list = XEXP (delay_list, 1);
1618 if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1619 for (i = 1; i < XVECLEN (PATTERN (target), 0); i++)
1620 if (insn_sets_resource_p (XVECEXP (PATTERN (target), 0, i), &needed,
1621 true))
1622 return 0;
1624 /* Scan backwards until we reach a label or an insn that uses something
1625 INSN sets or sets something insn uses or sets. */
1627 for (trial = PREV_INSN (target),
1628 insns_to_search = MAX_DELAY_SLOT_INSN_SEARCH;
1629 trial && !LABEL_P (trial) && insns_to_search > 0;
1630 trial = PREV_INSN (trial))
1632 if (!INSN_P (trial))
1633 continue;
1634 --insns_to_search;
1636 pat = PATTERN (trial);
1637 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1638 continue;
1640 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1642 bool annul_p = false;
1643 rtx control = seq->element (0);
1645 /* If this is a CALL_INSN and its delay slots, it is hard to track
1646 the resource needs properly, so give up. */
1647 if (CALL_P (control))
1648 return 0;
1650 /* If this is an INSN or JUMP_INSN with delayed effects, it
1651 is hard to track the resource needs properly, so give up. */
1653 #ifdef INSN_SETS_ARE_DELAYED
1654 if (INSN_SETS_ARE_DELAYED (control))
1655 return 0;
1656 #endif
1658 #ifdef INSN_REFERENCES_ARE_DELAYED
1659 if (INSN_REFERENCES_ARE_DELAYED (control))
1660 return 0;
1661 #endif
1663 if (JUMP_P (control))
1664 annul_p = INSN_ANNULLED_BRANCH_P (control);
1666 /* See if any of the insns in the delay slot match, updating
1667 resource requirements as we go. */
1668 for (i = seq->len () - 1; i > 0; i--)
1670 rtx candidate = seq->element (i);
1672 /* If an insn will be annulled if the branch is false, it isn't
1673 considered as a possible duplicate insn. */
1674 if (rtx_equal_p (PATTERN (candidate), ipat)
1675 && ! (annul_p && INSN_FROM_TARGET_P (candidate)))
1677 /* Show that this insn will be used in the sequel. */
1678 INSN_FROM_TARGET_P (candidate) = 0;
1679 return candidate;
1682 /* Unless this is an annulled insn from the target of a branch,
1683 we must stop if it sets anything needed or set by INSN. */
1684 if ((!annul_p || !INSN_FROM_TARGET_P (candidate))
1685 && insn_sets_resource_p (candidate, &needed, true))
1686 return 0;
1689 /* If the insn requiring the delay slot conflicts with INSN, we
1690 must stop. */
1691 if (insn_sets_resource_p (control, &needed, true))
1692 return 0;
1694 else
1696 /* See if TRIAL is the same as INSN. */
1697 pat = PATTERN (trial);
1698 if (rtx_equal_p (pat, ipat))
1699 return trial;
1701 /* Can't go any further if TRIAL conflicts with INSN. */
1702 if (insn_sets_resource_p (trial, &needed, true))
1703 return 0;
1707 return 0;
1710 /* Return 1 if THREAD can only be executed in one way. If LABEL is nonzero,
1711 it is the target of the branch insn being scanned. If ALLOW_FALLTHROUGH
1712 is nonzero, we are allowed to fall into this thread; otherwise, we are
1713 not.
1715 If LABEL is used more than one or we pass a label other than LABEL before
1716 finding an active insn, we do not own this thread. */
1718 static int
1719 own_thread_p (rtx thread, rtx label, int allow_fallthrough)
1721 rtx_insn *active_insn;
1722 rtx_insn *insn;
1724 /* We don't own the function end. */
1725 if (thread == 0 || ANY_RETURN_P (thread))
1726 return 0;
1728 /* We have a non-NULL insn. */
1729 rtx_insn *thread_insn = as_a <rtx_insn *> (thread);
1731 /* Get the first active insn, or THREAD_INSN, if it is an active insn. */
1732 active_insn = next_active_insn (PREV_INSN (thread_insn));
1734 for (insn = thread_insn; insn != active_insn; insn = NEXT_INSN (insn))
1735 if (LABEL_P (insn)
1736 && (insn != label || LABEL_NUSES (insn) != 1))
1737 return 0;
1739 if (allow_fallthrough)
1740 return 1;
1742 /* Ensure that we reach a BARRIER before any insn or label. */
1743 for (insn = prev_nonnote_insn (thread_insn);
1744 insn == 0 || !BARRIER_P (insn);
1745 insn = prev_nonnote_insn (insn))
1746 if (insn == 0
1747 || LABEL_P (insn)
1748 || (NONJUMP_INSN_P (insn)
1749 && GET_CODE (PATTERN (insn)) != USE
1750 && GET_CODE (PATTERN (insn)) != CLOBBER))
1751 return 0;
1753 return 1;
1756 /* Called when INSN is being moved from a location near the target of a jump.
1757 We leave a marker of the form (use (INSN)) immediately in front
1758 of WHERE for mark_target_live_regs. These markers will be deleted when
1759 reorg finishes.
1761 We used to try to update the live status of registers if WHERE is at
1762 the start of a basic block, but that can't work since we may remove a
1763 BARRIER in relax_delay_slots. */
1765 static void
1766 update_block (rtx_insn *insn, rtx where)
1768 /* Ignore if this was in a delay slot and it came from the target of
1769 a branch. */
1770 if (INSN_FROM_TARGET_P (insn))
1771 return;
1773 emit_insn_before (gen_rtx_USE (VOIDmode, insn), where);
1775 /* INSN might be making a value live in a block where it didn't use to
1776 be. So recompute liveness information for this block. */
1778 incr_ticks_for_insn (insn);
1781 /* Similar to REDIRECT_JUMP except that we update the BB_TICKS entry for
1782 the basic block containing the jump. */
1784 static int
1785 reorg_redirect_jump (rtx_insn *jump, rtx nlabel)
1787 incr_ticks_for_insn (jump);
1788 return redirect_jump (jump, nlabel, 1);
1791 /* Called when INSN is being moved forward into a delay slot of DELAYED_INSN.
1792 We check every instruction between INSN and DELAYED_INSN for REG_DEAD notes
1793 that reference values used in INSN. If we find one, then we move the
1794 REG_DEAD note to INSN.
1796 This is needed to handle the case where a later insn (after INSN) has a
1797 REG_DEAD note for a register used by INSN, and this later insn subsequently
1798 gets moved before a CODE_LABEL because it is a redundant insn. In this
1799 case, mark_target_live_regs may be confused into thinking the register
1800 is dead because it sees a REG_DEAD note immediately before a CODE_LABEL. */
1802 static void
1803 update_reg_dead_notes (rtx insn, rtx delayed_insn)
1805 rtx p, link, next;
1807 for (p = next_nonnote_insn (insn); p != delayed_insn;
1808 p = next_nonnote_insn (p))
1809 for (link = REG_NOTES (p); link; link = next)
1811 next = XEXP (link, 1);
1813 if (REG_NOTE_KIND (link) != REG_DEAD
1814 || !REG_P (XEXP (link, 0)))
1815 continue;
1817 if (reg_referenced_p (XEXP (link, 0), PATTERN (insn)))
1819 /* Move the REG_DEAD note from P to INSN. */
1820 remove_note (p, link);
1821 XEXP (link, 1) = REG_NOTES (insn);
1822 REG_NOTES (insn) = link;
1827 /* Called when an insn redundant with start_insn is deleted. If there
1828 is a REG_DEAD note for the target of start_insn between start_insn
1829 and stop_insn, then the REG_DEAD note needs to be deleted since the
1830 value no longer dies there.
1832 If the REG_DEAD note isn't deleted, then mark_target_live_regs may be
1833 confused into thinking the register is dead. */
1835 static void
1836 fix_reg_dead_note (rtx start_insn, rtx stop_insn)
1838 rtx p, link, next;
1840 for (p = next_nonnote_insn (start_insn); p != stop_insn;
1841 p = next_nonnote_insn (p))
1842 for (link = REG_NOTES (p); link; link = next)
1844 next = XEXP (link, 1);
1846 if (REG_NOTE_KIND (link) != REG_DEAD
1847 || !REG_P (XEXP (link, 0)))
1848 continue;
1850 if (reg_set_p (XEXP (link, 0), PATTERN (start_insn)))
1852 remove_note (p, link);
1853 return;
1858 /* Delete any REG_UNUSED notes that exist on INSN but not on REDUNDANT_INSN.
1860 This handles the case of udivmodXi4 instructions which optimize their
1861 output depending on whether any REG_UNUSED notes are present.
1862 we must make sure that INSN calculates as many results as REDUNDANT_INSN
1863 does. */
1865 static void
1866 update_reg_unused_notes (rtx insn, rtx redundant_insn)
1868 rtx link, next;
1870 for (link = REG_NOTES (insn); link; link = next)
1872 next = XEXP (link, 1);
1874 if (REG_NOTE_KIND (link) != REG_UNUSED
1875 || !REG_P (XEXP (link, 0)))
1876 continue;
1878 if (! find_regno_note (redundant_insn, REG_UNUSED,
1879 REGNO (XEXP (link, 0))))
1880 remove_note (insn, link);
1884 static vec <rtx> sibling_labels;
1886 /* Return the label before INSN, or put a new label there. If SIBLING is
1887 non-zero, it is another label associated with the new label (if any),
1888 typically the former target of the jump that will be redirected to
1889 the new label. */
1891 static rtx_insn *
1892 get_label_before (rtx_insn *insn, rtx sibling)
1894 rtx_insn *label;
1896 /* Find an existing label at this point
1897 or make a new one if there is none. */
1898 label = prev_nonnote_insn (insn);
1900 if (label == 0 || !LABEL_P (label))
1902 rtx prev = PREV_INSN (insn);
1904 label = gen_label_rtx ();
1905 emit_label_after (label, prev);
1906 LABEL_NUSES (label) = 0;
1907 if (sibling)
1909 sibling_labels.safe_push (label);
1910 sibling_labels.safe_push (sibling);
1913 return label;
1916 /* Scan a function looking for insns that need a delay slot and find insns to
1917 put into the delay slot.
1919 NON_JUMPS_P is nonzero if we are to only try to fill non-jump insns (such
1920 as calls). We do these first since we don't want jump insns (that are
1921 easier to fill) to get the only insns that could be used for non-jump insns.
1922 When it is zero, only try to fill JUMP_INSNs.
1924 When slots are filled in this manner, the insns (including the
1925 delay_insn) are put together in a SEQUENCE rtx. In this fashion,
1926 it is possible to tell whether a delay slot has really been filled
1927 or not. `final' knows how to deal with this, by communicating
1928 through FINAL_SEQUENCE. */
1930 static void
1931 fill_simple_delay_slots (int non_jumps_p)
1933 rtx_insn *insn, *trial, *next_trial;
1934 rtx pat;
1935 int i;
1936 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
1937 struct resources needed, set;
1938 int slots_to_fill, slots_filled;
1939 rtx_insn_list *delay_list;
1941 for (i = 0; i < num_unfilled_slots; i++)
1943 int flags;
1944 /* Get the next insn to fill. If it has already had any slots assigned,
1945 we can't do anything with it. Maybe we'll improve this later. */
1947 insn = unfilled_slots_base[i];
1948 if (insn == 0
1949 || INSN_DELETED_P (insn)
1950 || (NONJUMP_INSN_P (insn)
1951 && GET_CODE (PATTERN (insn)) == SEQUENCE)
1952 || (JUMP_P (insn) && non_jumps_p)
1953 || (!JUMP_P (insn) && ! non_jumps_p))
1954 continue;
1956 /* It may have been that this insn used to need delay slots, but
1957 now doesn't; ignore in that case. This can happen, for example,
1958 on the HP PA RISC, where the number of delay slots depends on
1959 what insns are nearby. */
1960 slots_to_fill = num_delay_slots (insn);
1962 /* Some machine description have defined instructions to have
1963 delay slots only in certain circumstances which may depend on
1964 nearby insns (which change due to reorg's actions).
1966 For example, the PA port normally has delay slots for unconditional
1967 jumps.
1969 However, the PA port claims such jumps do not have a delay slot
1970 if they are immediate successors of certain CALL_INSNs. This
1971 allows the port to favor filling the delay slot of the call with
1972 the unconditional jump. */
1973 if (slots_to_fill == 0)
1974 continue;
1976 /* This insn needs, or can use, some delay slots. SLOTS_TO_FILL
1977 says how many. After initialization, first try optimizing
1979 call _foo call _foo
1980 nop add %o7,.-L1,%o7
1981 b,a L1
1984 If this case applies, the delay slot of the call is filled with
1985 the unconditional jump. This is done first to avoid having the
1986 delay slot of the call filled in the backward scan. Also, since
1987 the unconditional jump is likely to also have a delay slot, that
1988 insn must exist when it is subsequently scanned.
1990 This is tried on each insn with delay slots as some machines
1991 have insns which perform calls, but are not represented as
1992 CALL_INSNs. */
1994 slots_filled = 0;
1995 delay_list = 0;
1997 if (JUMP_P (insn))
1998 flags = get_jump_flags (insn, JUMP_LABEL (insn));
1999 else
2000 flags = get_jump_flags (insn, NULL_RTX);
2002 if ((trial = next_active_insn (insn))
2003 && JUMP_P (trial)
2004 && simplejump_p (trial)
2005 && eligible_for_delay (insn, slots_filled, trial, flags)
2006 && no_labels_between_p (insn, trial)
2007 && ! can_throw_internal (trial))
2009 rtx_insn **tmp;
2010 slots_filled++;
2011 delay_list = add_to_delay_list (trial, delay_list);
2013 /* TRIAL may have had its delay slot filled, then unfilled. When
2014 the delay slot is unfilled, TRIAL is placed back on the unfilled
2015 slots obstack. Unfortunately, it is placed on the end of the
2016 obstack, not in its original location. Therefore, we must search
2017 from entry i + 1 to the end of the unfilled slots obstack to
2018 try and find TRIAL. */
2019 tmp = &unfilled_slots_base[i + 1];
2020 while (*tmp != trial && tmp != unfilled_slots_next)
2021 tmp++;
2023 /* Remove the unconditional jump from consideration for delay slot
2024 filling and unthread it. */
2025 if (*tmp == trial)
2026 *tmp = 0;
2028 rtx_insn *next = NEXT_INSN (trial);
2029 rtx_insn *prev = PREV_INSN (trial);
2030 if (prev)
2031 SET_NEXT_INSN (prev) = next;
2032 if (next)
2033 SET_PREV_INSN (next) = prev;
2037 /* Now, scan backwards from the insn to search for a potential
2038 delay-slot candidate. Stop searching when a label or jump is hit.
2040 For each candidate, if it is to go into the delay slot (moved
2041 forward in execution sequence), it must not need or set any resources
2042 that were set by later insns and must not set any resources that
2043 are needed for those insns.
2045 The delay slot insn itself sets resources unless it is a call
2046 (in which case the called routine, not the insn itself, is doing
2047 the setting). */
2049 if (slots_filled < slots_to_fill)
2051 CLEAR_RESOURCE (&needed);
2052 CLEAR_RESOURCE (&set);
2053 mark_set_resources (insn, &set, 0, MARK_SRC_DEST);
2054 mark_referenced_resources (insn, &needed, false);
2056 for (trial = prev_nonnote_insn (insn); ! stop_search_p (trial, 1);
2057 trial = next_trial)
2059 next_trial = prev_nonnote_insn (trial);
2061 /* This must be an INSN or CALL_INSN. */
2062 pat = PATTERN (trial);
2064 /* Stand-alone USE and CLOBBER are just for flow. */
2065 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2066 continue;
2068 /* Check for resource conflict first, to avoid unnecessary
2069 splitting. */
2070 if (! insn_references_resource_p (trial, &set, true)
2071 && ! insn_sets_resource_p (trial, &set, true)
2072 && ! insn_sets_resource_p (trial, &needed, true)
2073 #ifdef HAVE_cc0
2074 /* Can't separate set of cc0 from its use. */
2075 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat))
2076 #endif
2077 && ! can_throw_internal (trial))
2079 trial = try_split (pat, trial, 1);
2080 next_trial = prev_nonnote_insn (trial);
2081 if (eligible_for_delay (insn, slots_filled, trial, flags))
2083 /* In this case, we are searching backward, so if we
2084 find insns to put on the delay list, we want
2085 to put them at the head, rather than the
2086 tail, of the list. */
2088 update_reg_dead_notes (trial, insn);
2089 delay_list = gen_rtx_INSN_LIST (VOIDmode,
2090 trial, delay_list);
2091 update_block (trial, trial);
2092 delete_related_insns (trial);
2093 if (slots_to_fill == ++slots_filled)
2094 break;
2095 continue;
2099 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2100 mark_referenced_resources (trial, &needed, true);
2104 /* If all needed slots haven't been filled, we come here. */
2106 /* Try to optimize case of jumping around a single insn. */
2107 #if defined(ANNUL_IFFALSE_SLOTS) || defined(ANNUL_IFTRUE_SLOTS)
2108 if (slots_filled != slots_to_fill
2109 && delay_list == 0
2110 && JUMP_P (insn)
2111 && (condjump_p (insn) || condjump_in_parallel_p (insn))
2112 && !ANY_RETURN_P (JUMP_LABEL (insn)))
2114 delay_list = optimize_skip (insn);
2115 if (delay_list)
2116 slots_filled += 1;
2118 #endif
2120 /* Try to get insns from beyond the insn needing the delay slot.
2121 These insns can neither set or reference resources set in insns being
2122 skipped, cannot set resources in the insn being skipped, and, if this
2123 is a CALL_INSN (or a CALL_INSN is passed), cannot trap (because the
2124 call might not return).
2126 There used to be code which continued past the target label if
2127 we saw all uses of the target label. This code did not work,
2128 because it failed to account for some instructions which were
2129 both annulled and marked as from the target. This can happen as a
2130 result of optimize_skip. Since this code was redundant with
2131 fill_eager_delay_slots anyways, it was just deleted. */
2133 if (slots_filled != slots_to_fill
2134 /* If this instruction could throw an exception which is
2135 caught in the same function, then it's not safe to fill
2136 the delay slot with an instruction from beyond this
2137 point. For example, consider:
2139 int i = 2;
2141 try {
2142 f();
2143 i = 3;
2144 } catch (...) {}
2146 return i;
2148 Even though `i' is a local variable, we must be sure not
2149 to put `i = 3' in the delay slot if `f' might throw an
2150 exception.
2152 Presumably, we should also check to see if we could get
2153 back to this function via `setjmp'. */
2154 && ! can_throw_internal (insn)
2155 && !JUMP_P (insn))
2157 int maybe_never = 0;
2158 rtx pat, trial_delay;
2160 CLEAR_RESOURCE (&needed);
2161 CLEAR_RESOURCE (&set);
2162 mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
2163 mark_referenced_resources (insn, &needed, true);
2165 if (CALL_P (insn))
2166 maybe_never = 1;
2168 for (trial = next_nonnote_insn (insn); !stop_search_p (trial, 1);
2169 trial = next_trial)
2171 next_trial = next_nonnote_insn (trial);
2173 /* This must be an INSN or CALL_INSN. */
2174 pat = PATTERN (trial);
2176 /* Stand-alone USE and CLOBBER are just for flow. */
2177 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2178 continue;
2180 /* If this already has filled delay slots, get the insn needing
2181 the delay slots. */
2182 if (GET_CODE (pat) == SEQUENCE)
2183 trial_delay = XVECEXP (pat, 0, 0);
2184 else
2185 trial_delay = trial;
2187 /* Stop our search when seeing a jump. */
2188 if (JUMP_P (trial_delay))
2189 break;
2191 /* See if we have a resource problem before we try to split. */
2192 if (GET_CODE (pat) != SEQUENCE
2193 && ! insn_references_resource_p (trial, &set, true)
2194 && ! insn_sets_resource_p (trial, &set, true)
2195 && ! insn_sets_resource_p (trial, &needed, true)
2196 #ifdef HAVE_cc0
2197 && ! (reg_mentioned_p (cc0_rtx, pat) && ! sets_cc0_p (pat))
2198 #endif
2199 && ! (maybe_never && may_trap_or_fault_p (pat))
2200 && (trial = try_split (pat, trial, 0))
2201 && eligible_for_delay (insn, slots_filled, trial, flags)
2202 && ! can_throw_internal (trial))
2204 next_trial = next_nonnote_insn (trial);
2205 delay_list = add_to_delay_list (trial, delay_list);
2206 #ifdef HAVE_cc0
2207 if (reg_mentioned_p (cc0_rtx, pat))
2208 link_cc0_insns (trial);
2209 #endif
2210 delete_related_insns (trial);
2211 if (slots_to_fill == ++slots_filled)
2212 break;
2213 continue;
2216 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2217 mark_referenced_resources (trial, &needed, true);
2219 /* Ensure we don't put insns between the setting of cc and the
2220 comparison by moving a setting of cc into an earlier delay
2221 slot since these insns could clobber the condition code. */
2222 set.cc = 1;
2224 /* If this is a call, we might not get here. */
2225 if (CALL_P (trial_delay))
2226 maybe_never = 1;
2229 /* If there are slots left to fill and our search was stopped by an
2230 unconditional branch, try the insn at the branch target. We can
2231 redirect the branch if it works.
2233 Don't do this if the insn at the branch target is a branch. */
2234 if (slots_to_fill != slots_filled
2235 && trial
2236 && jump_to_label_p (trial)
2237 && simplejump_p (trial)
2238 && (next_trial = next_active_insn (JUMP_LABEL (trial))) != 0
2239 && ! (NONJUMP_INSN_P (next_trial)
2240 && GET_CODE (PATTERN (next_trial)) == SEQUENCE)
2241 && !JUMP_P (next_trial)
2242 && ! insn_references_resource_p (next_trial, &set, true)
2243 && ! insn_sets_resource_p (next_trial, &set, true)
2244 && ! insn_sets_resource_p (next_trial, &needed, true)
2245 #ifdef HAVE_cc0
2246 && ! reg_mentioned_p (cc0_rtx, PATTERN (next_trial))
2247 #endif
2248 && ! (maybe_never && may_trap_or_fault_p (PATTERN (next_trial)))
2249 && (next_trial = try_split (PATTERN (next_trial), next_trial, 0))
2250 && eligible_for_delay (insn, slots_filled, next_trial, flags)
2251 && ! can_throw_internal (trial))
2253 /* See comment in relax_delay_slots about necessity of using
2254 next_real_insn here. */
2255 rtx_insn *new_label = next_real_insn (next_trial);
2257 if (new_label != 0)
2258 new_label = get_label_before (new_label, JUMP_LABEL (trial));
2259 else
2260 new_label = find_end_label (simple_return_rtx);
2262 if (new_label)
2264 delay_list
2265 = add_to_delay_list (copy_delay_slot_insn (next_trial),
2266 delay_list);
2267 slots_filled++;
2268 reorg_redirect_jump (trial, new_label);
2273 /* If this is an unconditional jump, then try to get insns from the
2274 target of the jump. */
2275 if (JUMP_P (insn)
2276 && simplejump_p (insn)
2277 && slots_filled != slots_to_fill)
2278 delay_list
2279 = fill_slots_from_thread (insn, const_true_rtx,
2280 next_active_insn (JUMP_LABEL (insn)),
2281 NULL, 1, 1,
2282 own_thread_p (JUMP_LABEL (insn),
2283 JUMP_LABEL (insn), 0),
2284 slots_to_fill, &slots_filled,
2285 delay_list);
2287 if (delay_list)
2288 unfilled_slots_base[i]
2289 = emit_delay_sequence (insn, delay_list, slots_filled);
2291 if (slots_to_fill == slots_filled)
2292 unfilled_slots_base[i] = 0;
2294 note_delay_statistics (slots_filled, 0);
2298 /* Follow any unconditional jump at LABEL, for the purpose of redirecting JUMP;
2299 return the ultimate label reached by any such chain of jumps.
2300 Return a suitable return rtx if the chain ultimately leads to a
2301 return instruction.
2302 If LABEL is not followed by a jump, return LABEL.
2303 If the chain loops or we can't find end, return LABEL,
2304 since that tells caller to avoid changing the insn.
2305 If the returned label is obtained by following a crossing jump,
2306 set *CROSSING to true, otherwise set it to false. */
2308 static rtx
2309 follow_jumps (rtx label, rtx jump, bool *crossing)
2311 rtx_insn *insn;
2312 rtx_insn *next;
2313 int depth;
2315 *crossing = false;
2316 if (ANY_RETURN_P (label))
2317 return label;
2319 rtx_insn *value = as_a <rtx_insn *> (label);
2321 for (depth = 0;
2322 (depth < 10
2323 && (insn = next_active_insn (value)) != 0
2324 && JUMP_P (insn)
2325 && JUMP_LABEL (insn) != NULL_RTX
2326 && ((any_uncondjump_p (insn) && onlyjump_p (insn))
2327 || ANY_RETURN_P (PATTERN (insn)))
2328 && (next = NEXT_INSN (insn))
2329 && BARRIER_P (next));
2330 depth++)
2332 rtx this_label_or_return = JUMP_LABEL (insn);
2334 /* If we have found a cycle, make the insn jump to itself. */
2335 if (this_label_or_return == label)
2336 return label;
2338 /* Cannot follow returns and cannot look through tablejumps. */
2339 if (ANY_RETURN_P (this_label_or_return))
2340 return this_label_or_return;
2342 rtx_insn *this_label = as_a <rtx_insn *> (this_label_or_return);
2343 if (NEXT_INSN (this_label)
2344 && JUMP_TABLE_DATA_P (NEXT_INSN (this_label)))
2345 break;
2347 if (!targetm.can_follow_jump (jump, insn))
2348 break;
2349 if (!*crossing)
2350 *crossing = CROSSING_JUMP_P (jump);
2351 value = this_label;
2353 if (depth == 10)
2354 return label;
2355 return value;
2358 /* Try to find insns to place in delay slots.
2360 INSN is the jump needing SLOTS_TO_FILL delay slots. It tests CONDITION
2361 or is an unconditional branch if CONDITION is const_true_rtx.
2362 *PSLOTS_FILLED is updated with the number of slots that we have filled.
2364 THREAD is a flow-of-control, either the insns to be executed if the
2365 branch is true or if the branch is false, THREAD_IF_TRUE says which.
2367 OPPOSITE_THREAD is the thread in the opposite direction. It is used
2368 to see if any potential delay slot insns set things needed there.
2370 LIKELY is nonzero if it is extremely likely that the branch will be
2371 taken and THREAD_IF_TRUE is set. This is used for the branch at the
2372 end of a loop back up to the top.
2374 OWN_THREAD and OWN_OPPOSITE_THREAD are true if we are the only user of the
2375 thread. I.e., it is the fallthrough code of our jump or the target of the
2376 jump when we are the only jump going there.
2378 If OWN_THREAD is false, it must be the "true" thread of a jump. In that
2379 case, we can only take insns from the head of the thread for our delay
2380 slot. We then adjust the jump to point after the insns we have taken. */
2382 static rtx_insn_list *
2383 fill_slots_from_thread (rtx_insn *insn, rtx condition, rtx thread_or_return,
2384 rtx opposite_thread, int likely,
2385 int thread_if_true,
2386 int own_thread, int slots_to_fill,
2387 int *pslots_filled, rtx_insn_list *delay_list)
2389 rtx new_thread;
2390 struct resources opposite_needed, set, needed;
2391 rtx_insn *trial;
2392 int lose = 0;
2393 int must_annul = 0;
2394 int flags;
2396 /* Validate our arguments. */
2397 gcc_assert (condition != const_true_rtx || thread_if_true);
2398 gcc_assert (own_thread || thread_if_true);
2400 flags = get_jump_flags (insn, JUMP_LABEL (insn));
2402 /* If our thread is the end of subroutine, we can't get any delay
2403 insns from that. */
2404 if (thread_or_return == NULL_RTX || ANY_RETURN_P (thread_or_return))
2405 return delay_list;
2407 rtx_insn *thread = as_a <rtx_insn *> (thread_or_return);
2409 /* If this is an unconditional branch, nothing is needed at the
2410 opposite thread. Otherwise, compute what is needed there. */
2411 if (condition == const_true_rtx)
2412 CLEAR_RESOURCE (&opposite_needed);
2413 else
2414 mark_target_live_regs (get_insns (), opposite_thread, &opposite_needed);
2416 /* If the insn at THREAD can be split, do it here to avoid having to
2417 update THREAD and NEW_THREAD if it is done in the loop below. Also
2418 initialize NEW_THREAD. */
2420 new_thread = thread = try_split (PATTERN (thread), thread, 0);
2422 /* Scan insns at THREAD. We are looking for an insn that can be removed
2423 from THREAD (it neither sets nor references resources that were set
2424 ahead of it and it doesn't set anything needs by the insns ahead of
2425 it) and that either can be placed in an annulling insn or aren't
2426 needed at OPPOSITE_THREAD. */
2428 CLEAR_RESOURCE (&needed);
2429 CLEAR_RESOURCE (&set);
2431 /* If we do not own this thread, we must stop as soon as we find
2432 something that we can't put in a delay slot, since all we can do
2433 is branch into THREAD at a later point. Therefore, labels stop
2434 the search if this is not the `true' thread. */
2436 for (trial = thread;
2437 ! stop_search_p (trial, ! thread_if_true) && (! lose || own_thread);
2438 trial = next_nonnote_insn (trial))
2440 rtx pat, old_trial;
2442 /* If we have passed a label, we no longer own this thread. */
2443 if (LABEL_P (trial))
2445 own_thread = 0;
2446 continue;
2449 pat = PATTERN (trial);
2450 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2451 continue;
2453 /* If TRIAL conflicts with the insns ahead of it, we lose. Also,
2454 don't separate or copy insns that set and use CC0. */
2455 if (! insn_references_resource_p (trial, &set, true)
2456 && ! insn_sets_resource_p (trial, &set, true)
2457 && ! insn_sets_resource_p (trial, &needed, true)
2458 #ifdef HAVE_cc0
2459 && ! (reg_mentioned_p (cc0_rtx, pat)
2460 && (! own_thread || ! sets_cc0_p (pat)))
2461 #endif
2462 && ! can_throw_internal (trial))
2464 rtx prior_insn;
2466 /* If TRIAL is redundant with some insn before INSN, we don't
2467 actually need to add it to the delay list; we can merely pretend
2468 we did. */
2469 if ((prior_insn = redundant_insn (trial, insn, delay_list)))
2471 fix_reg_dead_note (prior_insn, insn);
2472 if (own_thread)
2474 update_block (trial, thread);
2475 if (trial == thread)
2477 thread = next_active_insn (thread);
2478 if (new_thread == trial)
2479 new_thread = thread;
2482 delete_related_insns (trial);
2484 else
2486 update_reg_unused_notes (prior_insn, trial);
2487 new_thread = next_active_insn (trial);
2490 continue;
2493 /* There are two ways we can win: If TRIAL doesn't set anything
2494 needed at the opposite thread and can't trap, or if it can
2495 go into an annulled delay slot. */
2496 if (!must_annul
2497 && (condition == const_true_rtx
2498 || (! insn_sets_resource_p (trial, &opposite_needed, true)
2499 && ! may_trap_or_fault_p (pat)
2500 && ! RTX_FRAME_RELATED_P (trial))))
2502 old_trial = trial;
2503 trial = try_split (pat, trial, 0);
2504 if (new_thread == old_trial)
2505 new_thread = trial;
2506 if (thread == old_trial)
2507 thread = trial;
2508 pat = PATTERN (trial);
2509 if (eligible_for_delay (insn, *pslots_filled, trial, flags))
2510 goto winner;
2512 else if (0
2513 #ifdef ANNUL_IFTRUE_SLOTS
2514 || ! thread_if_true
2515 #endif
2516 #ifdef ANNUL_IFFALSE_SLOTS
2517 || thread_if_true
2518 #endif
2521 old_trial = trial;
2522 trial = try_split (pat, trial, 0);
2523 if (new_thread == old_trial)
2524 new_thread = trial;
2525 if (thread == old_trial)
2526 thread = trial;
2527 pat = PATTERN (trial);
2528 if ((must_annul || delay_list == NULL) && (thread_if_true
2529 ? check_annul_list_true_false (0, delay_list)
2530 && eligible_for_annul_false (insn, *pslots_filled, trial, flags)
2531 : check_annul_list_true_false (1, delay_list)
2532 && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
2534 rtx_insn *temp;
2536 must_annul = 1;
2537 winner:
2539 #ifdef HAVE_cc0
2540 if (reg_mentioned_p (cc0_rtx, pat))
2541 link_cc0_insns (trial);
2542 #endif
2544 /* If we own this thread, delete the insn. If this is the
2545 destination of a branch, show that a basic block status
2546 may have been updated. In any case, mark the new
2547 starting point of this thread. */
2548 if (own_thread)
2550 rtx note;
2552 update_block (trial, thread);
2553 if (trial == thread)
2555 thread = next_active_insn (thread);
2556 if (new_thread == trial)
2557 new_thread = thread;
2560 /* We are moving this insn, not deleting it. We must
2561 temporarily increment the use count on any referenced
2562 label lest it be deleted by delete_related_insns. */
2563 for (note = REG_NOTES (trial);
2564 note != NULL_RTX;
2565 note = XEXP (note, 1))
2566 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2567 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2569 /* REG_LABEL_OPERAND could be
2570 NOTE_INSN_DELETED_LABEL too. */
2571 if (LABEL_P (XEXP (note, 0)))
2572 LABEL_NUSES (XEXP (note, 0))++;
2573 else
2574 gcc_assert (REG_NOTE_KIND (note)
2575 == REG_LABEL_OPERAND);
2577 if (jump_to_label_p (trial))
2578 LABEL_NUSES (JUMP_LABEL (trial))++;
2580 delete_related_insns (trial);
2582 for (note = REG_NOTES (trial);
2583 note != NULL_RTX;
2584 note = XEXP (note, 1))
2585 if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2586 || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2588 /* REG_LABEL_OPERAND could be
2589 NOTE_INSN_DELETED_LABEL too. */
2590 if (LABEL_P (XEXP (note, 0)))
2591 LABEL_NUSES (XEXP (note, 0))--;
2592 else
2593 gcc_assert (REG_NOTE_KIND (note)
2594 == REG_LABEL_OPERAND);
2596 if (jump_to_label_p (trial))
2597 LABEL_NUSES (JUMP_LABEL (trial))--;
2599 else
2600 new_thread = next_active_insn (trial);
2602 temp = own_thread ? trial : copy_delay_slot_insn (trial);
2603 if (thread_if_true)
2604 INSN_FROM_TARGET_P (temp) = 1;
2606 delay_list = add_to_delay_list (temp, delay_list);
2608 if (slots_to_fill == ++(*pslots_filled))
2610 /* Even though we have filled all the slots, we
2611 may be branching to a location that has a
2612 redundant insn. Skip any if so. */
2613 while (new_thread && ! own_thread
2614 && ! insn_sets_resource_p (new_thread, &set, true)
2615 && ! insn_sets_resource_p (new_thread, &needed,
2616 true)
2617 && ! insn_references_resource_p (new_thread,
2618 &set, true)
2619 && (prior_insn
2620 = redundant_insn (new_thread, insn,
2621 delay_list)))
2623 /* We know we do not own the thread, so no need
2624 to call update_block and delete_insn. */
2625 fix_reg_dead_note (prior_insn, insn);
2626 update_reg_unused_notes (prior_insn, new_thread);
2627 new_thread = next_active_insn (new_thread);
2629 break;
2632 continue;
2637 /* This insn can't go into a delay slot. */
2638 lose = 1;
2639 mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2640 mark_referenced_resources (trial, &needed, true);
2642 /* Ensure we don't put insns between the setting of cc and the comparison
2643 by moving a setting of cc into an earlier delay slot since these insns
2644 could clobber the condition code. */
2645 set.cc = 1;
2647 /* If this insn is a register-register copy and the next insn has
2648 a use of our destination, change it to use our source. That way,
2649 it will become a candidate for our delay slot the next time
2650 through this loop. This case occurs commonly in loops that
2651 scan a list.
2653 We could check for more complex cases than those tested below,
2654 but it doesn't seem worth it. It might also be a good idea to try
2655 to swap the two insns. That might do better.
2657 We can't do this if the next insn modifies our destination, because
2658 that would make the replacement into the insn invalid. We also can't
2659 do this if it modifies our source, because it might be an earlyclobber
2660 operand. This latter test also prevents updating the contents of
2661 a PRE_INC. We also can't do this if there's overlap of source and
2662 destination. Overlap may happen for larger-than-register-size modes. */
2664 if (NONJUMP_INSN_P (trial) && GET_CODE (pat) == SET
2665 && REG_P (SET_SRC (pat))
2666 && REG_P (SET_DEST (pat))
2667 && !reg_overlap_mentioned_p (SET_DEST (pat), SET_SRC (pat)))
2669 rtx next = next_nonnote_insn (trial);
2671 if (next && NONJUMP_INSN_P (next)
2672 && GET_CODE (PATTERN (next)) != USE
2673 && ! reg_set_p (SET_DEST (pat), next)
2674 && ! reg_set_p (SET_SRC (pat), next)
2675 && reg_referenced_p (SET_DEST (pat), PATTERN (next))
2676 && ! modified_in_p (SET_DEST (pat), next))
2677 validate_replace_rtx (SET_DEST (pat), SET_SRC (pat), next);
2681 /* If we stopped on a branch insn that has delay slots, see if we can
2682 steal some of the insns in those slots. */
2683 if (trial && NONJUMP_INSN_P (trial)
2684 && GET_CODE (PATTERN (trial)) == SEQUENCE
2685 && JUMP_P (XVECEXP (PATTERN (trial), 0, 0)))
2687 rtx_sequence *sequence = as_a <rtx_sequence *> (PATTERN (trial));
2688 /* If this is the `true' thread, we will want to follow the jump,
2689 so we can only do this if we have taken everything up to here. */
2690 if (thread_if_true && trial == new_thread)
2692 delay_list
2693 = steal_delay_list_from_target (insn, condition, sequence,
2694 delay_list, &set, &needed,
2695 &opposite_needed, slots_to_fill,
2696 pslots_filled, &must_annul,
2697 &new_thread);
2698 /* If we owned the thread and are told that it branched
2699 elsewhere, make sure we own the thread at the new location. */
2700 if (own_thread && trial != new_thread)
2701 own_thread = own_thread_p (new_thread, new_thread, 0);
2703 else if (! thread_if_true)
2704 delay_list
2705 = steal_delay_list_from_fallthrough (insn, condition,
2706 sequence,
2707 delay_list, &set, &needed,
2708 &opposite_needed, slots_to_fill,
2709 pslots_filled, &must_annul);
2712 /* If we haven't found anything for this delay slot and it is very
2713 likely that the branch will be taken, see if the insn at our target
2714 increments or decrements a register with an increment that does not
2715 depend on the destination register. If so, try to place the opposite
2716 arithmetic insn after the jump insn and put the arithmetic insn in the
2717 delay slot. If we can't do this, return. */
2718 if (delay_list == 0 && likely
2719 && new_thread && !ANY_RETURN_P (new_thread)
2720 && NONJUMP_INSN_P (new_thread)
2721 && !RTX_FRAME_RELATED_P (new_thread)
2722 && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
2723 && asm_noperands (PATTERN (new_thread)) < 0)
2725 rtx pat = PATTERN (new_thread);
2726 rtx dest;
2727 rtx src;
2729 /* We know "new_thread" is an insn due to NONJUMP_INSN_P (new_thread)
2730 above. */
2731 trial = as_a <rtx_insn *> (new_thread);
2732 pat = PATTERN (trial);
2734 if (!NONJUMP_INSN_P (trial)
2735 || GET_CODE (pat) != SET
2736 || ! eligible_for_delay (insn, 0, trial, flags)
2737 || can_throw_internal (trial))
2738 return 0;
2740 dest = SET_DEST (pat), src = SET_SRC (pat);
2741 if ((GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
2742 && rtx_equal_p (XEXP (src, 0), dest)
2743 && (!FLOAT_MODE_P (GET_MODE (src))
2744 || flag_unsafe_math_optimizations)
2745 && ! reg_overlap_mentioned_p (dest, XEXP (src, 1))
2746 && ! side_effects_p (pat))
2748 rtx other = XEXP (src, 1);
2749 rtx new_arith;
2750 rtx_insn *ninsn;
2752 /* If this is a constant adjustment, use the same code with
2753 the negated constant. Otherwise, reverse the sense of the
2754 arithmetic. */
2755 if (CONST_INT_P (other))
2756 new_arith = gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src), dest,
2757 negate_rtx (GET_MODE (src), other));
2758 else
2759 new_arith = gen_rtx_fmt_ee (GET_CODE (src) == PLUS ? MINUS : PLUS,
2760 GET_MODE (src), dest, other);
2762 ninsn = emit_insn_after (gen_rtx_SET (VOIDmode, dest, new_arith),
2763 insn);
2765 if (recog_memoized (ninsn) < 0
2766 || (extract_insn (ninsn), ! constrain_operands (1)))
2768 delete_related_insns (ninsn);
2769 return 0;
2772 if (own_thread)
2774 update_block (trial, thread);
2775 if (trial == thread)
2777 thread = next_active_insn (thread);
2778 if (new_thread == trial)
2779 new_thread = thread;
2781 delete_related_insns (trial);
2783 else
2784 new_thread = next_active_insn (trial);
2786 ninsn = own_thread ? trial : copy_delay_slot_insn (trial);
2787 if (thread_if_true)
2788 INSN_FROM_TARGET_P (ninsn) = 1;
2790 delay_list = add_to_delay_list (ninsn, NULL);
2791 (*pslots_filled)++;
2795 if (delay_list && must_annul)
2796 INSN_ANNULLED_BRANCH_P (insn) = 1;
2798 /* If we are to branch into the middle of this thread, find an appropriate
2799 label or make a new one if none, and redirect INSN to it. If we hit the
2800 end of the function, use the end-of-function label. */
2801 if (new_thread != thread)
2803 rtx label;
2804 bool crossing = false;
2806 gcc_assert (thread_if_true);
2808 if (new_thread && simplejump_or_return_p (new_thread)
2809 && redirect_with_delay_list_safe_p (insn,
2810 JUMP_LABEL (new_thread),
2811 delay_list))
2812 new_thread = follow_jumps (JUMP_LABEL (new_thread), insn,
2813 &crossing);
2815 if (ANY_RETURN_P (new_thread))
2816 label = find_end_label (new_thread);
2817 else if (LABEL_P (new_thread))
2818 label = new_thread;
2819 else
2820 label = get_label_before (as_a <rtx_insn *> (new_thread),
2821 JUMP_LABEL (insn));
2823 if (label)
2825 reorg_redirect_jump (insn, label);
2826 if (crossing)
2827 CROSSING_JUMP_P (insn) = 1;
2831 return delay_list;
2834 /* Make another attempt to find insns to place in delay slots.
2836 We previously looked for insns located in front of the delay insn
2837 and, for non-jump delay insns, located behind the delay insn.
2839 Here only try to schedule jump insns and try to move insns from either
2840 the target or the following insns into the delay slot. If annulling is
2841 supported, we will be likely to do this. Otherwise, we can do this only
2842 if safe. */
2844 static void
2845 fill_eager_delay_slots (void)
2847 rtx_insn *insn;
2848 int i;
2849 int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
2851 for (i = 0; i < num_unfilled_slots; i++)
2853 rtx condition;
2854 rtx target_label, insn_at_target;
2855 rtx_insn *fallthrough_insn;
2856 rtx_insn_list *delay_list = 0;
2857 int own_target;
2858 int own_fallthrough;
2859 int prediction, slots_to_fill, slots_filled;
2861 insn = unfilled_slots_base[i];
2862 if (insn == 0
2863 || INSN_DELETED_P (insn)
2864 || !JUMP_P (insn)
2865 || ! (condjump_p (insn) || condjump_in_parallel_p (insn)))
2866 continue;
2868 slots_to_fill = num_delay_slots (insn);
2869 /* Some machine description have defined instructions to have
2870 delay slots only in certain circumstances which may depend on
2871 nearby insns (which change due to reorg's actions).
2873 For example, the PA port normally has delay slots for unconditional
2874 jumps.
2876 However, the PA port claims such jumps do not have a delay slot
2877 if they are immediate successors of certain CALL_INSNs. This
2878 allows the port to favor filling the delay slot of the call with
2879 the unconditional jump. */
2880 if (slots_to_fill == 0)
2881 continue;
2883 slots_filled = 0;
2884 target_label = JUMP_LABEL (insn);
2885 condition = get_branch_condition (insn, target_label);
2887 if (condition == 0)
2888 continue;
2890 /* Get the next active fallthrough and target insns and see if we own
2891 them. Then see whether the branch is likely true. We don't need
2892 to do a lot of this for unconditional branches. */
2894 insn_at_target = first_active_target_insn (target_label);
2895 own_target = own_thread_p (target_label, target_label, 0);
2897 if (condition == const_true_rtx)
2899 own_fallthrough = 0;
2900 fallthrough_insn = 0;
2901 prediction = 2;
2903 else
2905 fallthrough_insn = next_active_insn (insn);
2906 own_fallthrough = own_thread_p (NEXT_INSN (insn), NULL_RTX, 1);
2907 prediction = mostly_true_jump (insn);
2910 /* If this insn is expected to branch, first try to get insns from our
2911 target, then our fallthrough insns. If it is not expected to branch,
2912 try the other order. */
2914 if (prediction > 0)
2916 delay_list
2917 = fill_slots_from_thread (insn, condition, insn_at_target,
2918 fallthrough_insn, prediction == 2, 1,
2919 own_target,
2920 slots_to_fill, &slots_filled, delay_list);
2922 if (delay_list == 0 && own_fallthrough)
2924 /* Even though we didn't find anything for delay slots,
2925 we might have found a redundant insn which we deleted
2926 from the thread that was filled. So we have to recompute
2927 the next insn at the target. */
2928 target_label = JUMP_LABEL (insn);
2929 insn_at_target = first_active_target_insn (target_label);
2931 delay_list
2932 = fill_slots_from_thread (insn, condition, fallthrough_insn,
2933 insn_at_target, 0, 0,
2934 own_fallthrough,
2935 slots_to_fill, &slots_filled,
2936 delay_list);
2939 else
2941 if (own_fallthrough)
2942 delay_list
2943 = fill_slots_from_thread (insn, condition, fallthrough_insn,
2944 insn_at_target, 0, 0,
2945 own_fallthrough,
2946 slots_to_fill, &slots_filled,
2947 delay_list);
2949 if (delay_list == 0)
2950 delay_list
2951 = fill_slots_from_thread (insn, condition, insn_at_target,
2952 next_active_insn (insn), 0, 1,
2953 own_target,
2954 slots_to_fill, &slots_filled,
2955 delay_list);
2958 if (delay_list)
2959 unfilled_slots_base[i]
2960 = emit_delay_sequence (insn, delay_list, slots_filled);
2962 if (slots_to_fill == slots_filled)
2963 unfilled_slots_base[i] = 0;
2965 note_delay_statistics (slots_filled, 1);
2969 static void delete_computation (rtx insn);
2971 /* Recursively delete prior insns that compute the value (used only by INSN
2972 which the caller is deleting) stored in the register mentioned by NOTE
2973 which is a REG_DEAD note associated with INSN. */
2975 static void
2976 delete_prior_computation (rtx note, rtx insn)
2978 rtx our_prev;
2979 rtx reg = XEXP (note, 0);
2981 for (our_prev = prev_nonnote_insn (insn);
2982 our_prev && (NONJUMP_INSN_P (our_prev)
2983 || CALL_P (our_prev));
2984 our_prev = prev_nonnote_insn (our_prev))
2986 rtx pat = PATTERN (our_prev);
2988 /* If we reach a CALL which is not calling a const function
2989 or the callee pops the arguments, then give up. */
2990 if (CALL_P (our_prev)
2991 && (! RTL_CONST_CALL_P (our_prev)
2992 || GET_CODE (pat) != SET || GET_CODE (SET_SRC (pat)) != CALL))
2993 break;
2995 /* If we reach a SEQUENCE, it is too complex to try to
2996 do anything with it, so give up. We can be run during
2997 and after reorg, so SEQUENCE rtl can legitimately show
2998 up here. */
2999 if (GET_CODE (pat) == SEQUENCE)
3000 break;
3002 if (GET_CODE (pat) == USE
3003 && NONJUMP_INSN_P (XEXP (pat, 0)))
3004 /* reorg creates USEs that look like this. We leave them
3005 alone because reorg needs them for its own purposes. */
3006 break;
3008 if (reg_set_p (reg, pat))
3010 if (side_effects_p (pat) && !CALL_P (our_prev))
3011 break;
3013 if (GET_CODE (pat) == PARALLEL)
3015 /* If we find a SET of something else, we can't
3016 delete the insn. */
3018 int i;
3020 for (i = 0; i < XVECLEN (pat, 0); i++)
3022 rtx part = XVECEXP (pat, 0, i);
3024 if (GET_CODE (part) == SET
3025 && SET_DEST (part) != reg)
3026 break;
3029 if (i == XVECLEN (pat, 0))
3030 delete_computation (our_prev);
3032 else if (GET_CODE (pat) == SET
3033 && REG_P (SET_DEST (pat)))
3035 int dest_regno = REGNO (SET_DEST (pat));
3036 int dest_endregno = END_REGNO (SET_DEST (pat));
3037 int regno = REGNO (reg);
3038 int endregno = END_REGNO (reg);
3040 if (dest_regno >= regno
3041 && dest_endregno <= endregno)
3042 delete_computation (our_prev);
3044 /* We may have a multi-word hard register and some, but not
3045 all, of the words of the register are needed in subsequent
3046 insns. Write REG_UNUSED notes for those parts that were not
3047 needed. */
3048 else if (dest_regno <= regno
3049 && dest_endregno >= endregno)
3051 int i;
3053 add_reg_note (our_prev, REG_UNUSED, reg);
3055 for (i = dest_regno; i < dest_endregno; i++)
3056 if (! find_regno_note (our_prev, REG_UNUSED, i))
3057 break;
3059 if (i == dest_endregno)
3060 delete_computation (our_prev);
3064 break;
3067 /* If PAT references the register that dies here, it is an
3068 additional use. Hence any prior SET isn't dead. However, this
3069 insn becomes the new place for the REG_DEAD note. */
3070 if (reg_overlap_mentioned_p (reg, pat))
3072 XEXP (note, 1) = REG_NOTES (our_prev);
3073 REG_NOTES (our_prev) = note;
3074 break;
3079 /* Delete INSN and recursively delete insns that compute values used only
3080 by INSN. This uses the REG_DEAD notes computed during flow analysis.
3082 Look at all our REG_DEAD notes. If a previous insn does nothing other
3083 than set a register that dies in this insn, we can delete that insn
3084 as well.
3086 On machines with CC0, if CC0 is used in this insn, we may be able to
3087 delete the insn that set it. */
3089 static void
3090 delete_computation (rtx insn)
3092 rtx note, next;
3094 #ifdef HAVE_cc0
3095 if (reg_referenced_p (cc0_rtx, PATTERN (insn)))
3097 rtx prev = prev_nonnote_insn (insn);
3098 /* We assume that at this stage
3099 CC's are always set explicitly
3100 and always immediately before the jump that
3101 will use them. So if the previous insn
3102 exists to set the CC's, delete it
3103 (unless it performs auto-increments, etc.). */
3104 if (prev && NONJUMP_INSN_P (prev)
3105 && sets_cc0_p (PATTERN (prev)))
3107 if (sets_cc0_p (PATTERN (prev)) > 0
3108 && ! side_effects_p (PATTERN (prev)))
3109 delete_computation (prev);
3110 else
3111 /* Otherwise, show that cc0 won't be used. */
3112 add_reg_note (prev, REG_UNUSED, cc0_rtx);
3115 #endif
3117 for (note = REG_NOTES (insn); note; note = next)
3119 next = XEXP (note, 1);
3121 if (REG_NOTE_KIND (note) != REG_DEAD
3122 /* Verify that the REG_NOTE is legitimate. */
3123 || !REG_P (XEXP (note, 0)))
3124 continue;
3126 delete_prior_computation (note, insn);
3129 delete_related_insns (insn);
3132 /* If all INSN does is set the pc, delete it,
3133 and delete the insn that set the condition codes for it
3134 if that's what the previous thing was. */
3136 static void
3137 delete_jump (rtx insn)
3139 rtx set = single_set (insn);
3141 if (set && GET_CODE (SET_DEST (set)) == PC)
3142 delete_computation (insn);
3145 static rtx_insn *
3146 label_before_next_insn (rtx x, rtx scan_limit)
3148 rtx_insn *insn = next_active_insn (x);
3149 while (insn)
3151 insn = PREV_INSN (insn);
3152 if (insn == scan_limit || insn == NULL_RTX)
3153 return NULL;
3154 if (LABEL_P (insn))
3155 break;
3157 return insn;
3161 /* Once we have tried two ways to fill a delay slot, make a pass over the
3162 code to try to improve the results and to do such things as more jump
3163 threading. */
3165 static void
3166 relax_delay_slots (rtx_insn *first)
3168 rtx_insn *insn, *next;
3169 rtx_sequence *pat;
3170 rtx trial;
3171 rtx_insn *delay_insn;
3172 rtx target_label;
3174 /* Look at every JUMP_INSN and see if we can improve it. */
3175 for (insn = first; insn; insn = next)
3177 rtx_insn *other;
3178 bool crossing;
3180 next = next_active_insn (insn);
3182 /* If this is a jump insn, see if it now jumps to a jump, jumps to
3183 the next insn, or jumps to a label that is not the last of a
3184 group of consecutive labels. */
3185 if (JUMP_P (insn)
3186 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3187 && !ANY_RETURN_P (target_label = JUMP_LABEL (insn)))
3189 target_label
3190 = skip_consecutive_labels (follow_jumps (target_label, insn,
3191 &crossing));
3192 if (ANY_RETURN_P (target_label))
3193 target_label = find_end_label (target_label);
3195 if (target_label && next_active_insn (target_label) == next
3196 && ! condjump_in_parallel_p (insn))
3198 delete_jump (insn);
3199 continue;
3202 if (target_label && target_label != JUMP_LABEL (insn))
3204 reorg_redirect_jump (insn, target_label);
3205 if (crossing)
3206 CROSSING_JUMP_P (insn) = 1;
3209 /* See if this jump conditionally branches around an unconditional
3210 jump. If so, invert this jump and point it to the target of the
3211 second jump. */
3212 if (next && simplejump_or_return_p (next)
3213 && any_condjump_p (insn)
3214 && target_label
3215 && next_active_insn (target_label) == next_active_insn (next)
3216 && no_labels_between_p (insn, next))
3218 rtx label = JUMP_LABEL (next);
3220 /* Be careful how we do this to avoid deleting code or
3221 labels that are momentarily dead. See similar optimization
3222 in jump.c.
3224 We also need to ensure we properly handle the case when
3225 invert_jump fails. */
3227 ++LABEL_NUSES (target_label);
3228 if (!ANY_RETURN_P (label))
3229 ++LABEL_NUSES (label);
3231 if (invert_jump (insn, label, 1))
3233 delete_related_insns (next);
3234 next = insn;
3237 if (!ANY_RETURN_P (label))
3238 --LABEL_NUSES (label);
3240 if (--LABEL_NUSES (target_label) == 0)
3241 delete_related_insns (target_label);
3243 continue;
3247 /* If this is an unconditional jump and the previous insn is a
3248 conditional jump, try reversing the condition of the previous
3249 insn and swapping our targets. The next pass might be able to
3250 fill the slots.
3252 Don't do this if we expect the conditional branch to be true, because
3253 we would then be making the more common case longer. */
3255 if (simplejump_or_return_p (insn)
3256 && (other = prev_active_insn (insn)) != 0
3257 && any_condjump_p (other)
3258 && no_labels_between_p (other, insn)
3259 && 0 > mostly_true_jump (other))
3261 rtx other_target = JUMP_LABEL (other);
3262 target_label = JUMP_LABEL (insn);
3264 if (invert_jump (other, target_label, 0))
3265 reorg_redirect_jump (insn, other_target);
3268 /* Now look only at cases where we have a filled delay slot. */
3269 if (!NONJUMP_INSN_P (insn) || GET_CODE (PATTERN (insn)) != SEQUENCE)
3270 continue;
3272 pat = as_a <rtx_sequence *> (PATTERN (insn));
3273 delay_insn = pat->insn (0);
3275 /* See if the first insn in the delay slot is redundant with some
3276 previous insn. Remove it from the delay slot if so; then set up
3277 to reprocess this insn. */
3278 if (redundant_insn (pat->insn (1), delay_insn, 0))
3280 update_block (pat->insn (1), insn);
3281 delete_from_delay_slot (pat->insn (1));
3282 next = prev_active_insn (next);
3283 continue;
3286 /* See if we have a RETURN insn with a filled delay slot followed
3287 by a RETURN insn with an unfilled a delay slot. If so, we can delete
3288 the first RETURN (but not its delay insn). This gives the same
3289 effect in fewer instructions.
3291 Only do so if optimizing for size since this results in slower, but
3292 smaller code. */
3293 if (optimize_function_for_size_p (cfun)
3294 && ANY_RETURN_P (PATTERN (delay_insn))
3295 && next
3296 && JUMP_P (next)
3297 && PATTERN (next) == PATTERN (delay_insn))
3299 rtx after;
3300 int i;
3302 /* Delete the RETURN and just execute the delay list insns.
3304 We do this by deleting the INSN containing the SEQUENCE, then
3305 re-emitting the insns separately, and then deleting the RETURN.
3306 This allows the count of the jump target to be properly
3307 decremented.
3309 Note that we need to change the INSN_UID of the re-emitted insns
3310 since it is used to hash the insns for mark_target_live_regs and
3311 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3313 Clear the from target bit, since these insns are no longer
3314 in delay slots. */
3315 for (i = 0; i < XVECLEN (pat, 0); i++)
3316 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3318 trial = PREV_INSN (insn);
3319 delete_related_insns (insn);
3320 gcc_assert (GET_CODE (pat) == SEQUENCE);
3321 add_insn_after (delay_insn, trial, NULL);
3322 after = delay_insn;
3323 for (i = 1; i < XVECLEN (pat, 0); i++)
3324 after = emit_copy_of_insn_after (XVECEXP (pat, 0, i), after);
3325 delete_scheduled_jump (delay_insn);
3326 continue;
3329 /* Now look only at the cases where we have a filled JUMP_INSN. */
3330 if (!JUMP_P (delay_insn)
3331 || !(condjump_p (delay_insn) || condjump_in_parallel_p (delay_insn)))
3332 continue;
3334 target_label = JUMP_LABEL (delay_insn);
3335 if (target_label && ANY_RETURN_P (target_label))
3336 continue;
3338 /* If this jump goes to another unconditional jump, thread it, but
3339 don't convert a jump into a RETURN here. */
3340 trial = skip_consecutive_labels (follow_jumps (target_label, delay_insn,
3341 &crossing));
3342 if (ANY_RETURN_P (trial))
3343 trial = find_end_label (trial);
3345 if (trial && trial != target_label
3346 && redirect_with_delay_slots_safe_p (delay_insn, trial, insn))
3348 reorg_redirect_jump (delay_insn, trial);
3349 target_label = trial;
3350 if (crossing)
3351 CROSSING_JUMP_P (insn) = 1;
3354 /* If the first insn at TARGET_LABEL is redundant with a previous
3355 insn, redirect the jump to the following insn and process again.
3356 We use next_real_insn instead of next_active_insn so we
3357 don't skip USE-markers, or we'll end up with incorrect
3358 liveness info. */
3359 trial = next_real_insn (target_label);
3360 if (trial && GET_CODE (PATTERN (trial)) != SEQUENCE
3361 && redundant_insn (trial, insn, 0)
3362 && ! can_throw_internal (trial))
3364 /* Figure out where to emit the special USE insn so we don't
3365 later incorrectly compute register live/death info. */
3366 rtx_insn *tmp = next_active_insn (trial);
3367 if (tmp == 0)
3368 tmp = find_end_label (simple_return_rtx);
3370 if (tmp)
3372 /* Insert the special USE insn and update dataflow info.
3373 We know "trial" is an insn here as it is the output of
3374 next_real_insn () above. */
3375 update_block (as_a <rtx_insn *> (trial), tmp);
3377 /* Now emit a label before the special USE insn, and
3378 redirect our jump to the new label. */
3379 target_label = get_label_before (PREV_INSN (tmp), target_label);
3380 reorg_redirect_jump (delay_insn, target_label);
3381 next = insn;
3382 continue;
3386 /* Similarly, if it is an unconditional jump with one insn in its
3387 delay list and that insn is redundant, thread the jump. */
3388 if (trial && GET_CODE (PATTERN (trial)) == SEQUENCE
3389 && XVECLEN (PATTERN (trial), 0) == 2
3390 && JUMP_P (XVECEXP (PATTERN (trial), 0, 0))
3391 && simplejump_or_return_p (XVECEXP (PATTERN (trial), 0, 0))
3392 && redundant_insn (XVECEXP (PATTERN (trial), 0, 1), insn, 0))
3394 rtx_sequence *trial_seq = as_a <rtx_sequence *> (PATTERN (trial));
3395 target_label = JUMP_LABEL (trial_seq->insn (0));
3396 if (ANY_RETURN_P (target_label))
3397 target_label = find_end_label (target_label);
3399 if (target_label
3400 && redirect_with_delay_slots_safe_p (delay_insn, target_label,
3401 insn))
3403 update_block (trial_seq->insn (1), insn);
3404 reorg_redirect_jump (delay_insn, target_label);
3405 next = insn;
3406 continue;
3410 /* See if we have a simple (conditional) jump that is useless. */
3411 if (! INSN_ANNULLED_BRANCH_P (delay_insn)
3412 && ! condjump_in_parallel_p (delay_insn)
3413 && prev_active_insn (target_label) == insn
3414 && ! BARRIER_P (prev_nonnote_insn (target_label))
3415 #ifdef HAVE_cc0
3416 /* If the last insn in the delay slot sets CC0 for some insn,
3417 various code assumes that it is in a delay slot. We could
3418 put it back where it belonged and delete the register notes,
3419 but it doesn't seem worthwhile in this uncommon case. */
3420 && ! find_reg_note (XVECEXP (pat, 0, XVECLEN (pat, 0) - 1),
3421 REG_CC_USER, NULL_RTX)
3422 #endif
3425 rtx after;
3426 int i;
3428 /* All this insn does is execute its delay list and jump to the
3429 following insn. So delete the jump and just execute the delay
3430 list insns.
3432 We do this by deleting the INSN containing the SEQUENCE, then
3433 re-emitting the insns separately, and then deleting the jump.
3434 This allows the count of the jump target to be properly
3435 decremented.
3437 Note that we need to change the INSN_UID of the re-emitted insns
3438 since it is used to hash the insns for mark_target_live_regs and
3439 the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3441 Clear the from target bit, since these insns are no longer
3442 in delay slots. */
3443 for (i = 0; i < XVECLEN (pat, 0); i++)
3444 INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3446 trial = PREV_INSN (insn);
3447 delete_related_insns (insn);
3448 gcc_assert (GET_CODE (pat) == SEQUENCE);
3449 add_insn_after (delay_insn, trial, NULL);
3450 after = delay_insn;
3451 for (i = 1; i < XVECLEN (pat, 0); i++)
3452 after = emit_copy_of_insn_after (XVECEXP (pat, 0, i), after);
3453 delete_scheduled_jump (delay_insn);
3454 continue;
3457 /* See if this is an unconditional jump around a single insn which is
3458 identical to the one in its delay slot. In this case, we can just
3459 delete the branch and the insn in its delay slot. */
3460 if (next && NONJUMP_INSN_P (next)
3461 && label_before_next_insn (next, insn) == target_label
3462 && simplejump_p (insn)
3463 && XVECLEN (pat, 0) == 2
3464 && rtx_equal_p (PATTERN (next), PATTERN (pat->insn (1))))
3466 delete_related_insns (insn);
3467 continue;
3470 /* See if this jump (with its delay slots) conditionally branches
3471 around an unconditional jump (without delay slots). If so, invert
3472 this jump and point it to the target of the second jump. We cannot
3473 do this for annulled jumps, though. Again, don't convert a jump to
3474 a RETURN here. */
3475 if (! INSN_ANNULLED_BRANCH_P (delay_insn)
3476 && any_condjump_p (delay_insn)
3477 && next && simplejump_or_return_p (next)
3478 && next_active_insn (target_label) == next_active_insn (next)
3479 && no_labels_between_p (insn, next))
3481 rtx label = JUMP_LABEL (next);
3482 rtx old_label = JUMP_LABEL (delay_insn);
3484 if (ANY_RETURN_P (label))
3485 label = find_end_label (label);
3487 /* find_end_label can generate a new label. Check this first. */
3488 if (label
3489 && no_labels_between_p (insn, next)
3490 && redirect_with_delay_slots_safe_p (delay_insn, label, insn))
3492 /* Be careful how we do this to avoid deleting code or labels
3493 that are momentarily dead. See similar optimization in
3494 jump.c */
3495 if (old_label)
3496 ++LABEL_NUSES (old_label);
3498 if (invert_jump (delay_insn, label, 1))
3500 int i;
3502 /* Must update the INSN_FROM_TARGET_P bits now that
3503 the branch is reversed, so that mark_target_live_regs
3504 will handle the delay slot insn correctly. */
3505 for (i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
3507 rtx slot = XVECEXP (PATTERN (insn), 0, i);
3508 INSN_FROM_TARGET_P (slot) = ! INSN_FROM_TARGET_P (slot);
3511 delete_related_insns (next);
3512 next = insn;
3515 if (old_label && --LABEL_NUSES (old_label) == 0)
3516 delete_related_insns (old_label);
3517 continue;
3521 /* If we own the thread opposite the way this insn branches, see if we
3522 can merge its delay slots with following insns. */
3523 if (INSN_FROM_TARGET_P (pat->insn (1))
3524 && own_thread_p (NEXT_INSN (insn), 0, 1))
3525 try_merge_delay_insns (insn, next);
3526 else if (! INSN_FROM_TARGET_P (pat->insn (1))
3527 && own_thread_p (target_label, target_label, 0))
3528 try_merge_delay_insns (insn, next_active_insn (target_label));
3530 /* If we get here, we haven't deleted INSN. But we may have deleted
3531 NEXT, so recompute it. */
3532 next = next_active_insn (insn);
3537 /* Look for filled jumps to the end of function label. We can try to convert
3538 them into RETURN insns if the insns in the delay slot are valid for the
3539 RETURN as well. */
3541 static void
3542 make_return_insns (rtx_insn *first)
3544 rtx_insn *insn;
3545 rtx_insn *jump_insn;
3546 rtx real_return_label = function_return_label;
3547 rtx real_simple_return_label = function_simple_return_label;
3548 int slots, i;
3550 /* See if there is a RETURN insn in the function other than the one we
3551 made for END_OF_FUNCTION_LABEL. If so, set up anything we can't change
3552 into a RETURN to jump to it. */
3553 for (insn = first; insn; insn = NEXT_INSN (insn))
3554 if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
3556 rtx t = get_label_before (insn, NULL_RTX);
3557 if (PATTERN (insn) == ret_rtx)
3558 real_return_label = t;
3559 else
3560 real_simple_return_label = t;
3561 break;
3564 /* Show an extra usage of REAL_RETURN_LABEL so it won't go away if it
3565 was equal to END_OF_FUNCTION_LABEL. */
3566 if (real_return_label)
3567 LABEL_NUSES (real_return_label)++;
3568 if (real_simple_return_label)
3569 LABEL_NUSES (real_simple_return_label)++;
3571 /* Clear the list of insns to fill so we can use it. */
3572 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3574 for (insn = first; insn; insn = NEXT_INSN (insn))
3576 int flags;
3577 rtx kind, real_label;
3579 /* Only look at filled JUMP_INSNs that go to the end of function
3580 label. */
3581 if (!NONJUMP_INSN_P (insn)
3582 || GET_CODE (PATTERN (insn)) != SEQUENCE
3583 || !jump_to_label_p (XVECEXP (PATTERN (insn), 0, 0)))
3584 continue;
3586 if (JUMP_LABEL (XVECEXP (PATTERN (insn), 0, 0)) == function_return_label)
3588 kind = ret_rtx;
3589 real_label = real_return_label;
3591 else if (JUMP_LABEL (XVECEXP (PATTERN (insn), 0, 0))
3592 == function_simple_return_label)
3594 kind = simple_return_rtx;
3595 real_label = real_simple_return_label;
3597 else
3598 continue;
3600 rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (insn));
3601 jump_insn = pat->insn (0);
3603 /* If we can't make the jump into a RETURN, try to redirect it to the best
3604 RETURN and go on to the next insn. */
3605 if (!reorg_redirect_jump (jump_insn, kind))
3607 /* Make sure redirecting the jump will not invalidate the delay
3608 slot insns. */
3609 if (redirect_with_delay_slots_safe_p (jump_insn, real_label, insn))
3610 reorg_redirect_jump (jump_insn, real_label);
3611 continue;
3614 /* See if this RETURN can accept the insns current in its delay slot.
3615 It can if it has more or an equal number of slots and the contents
3616 of each is valid. */
3618 flags = get_jump_flags (jump_insn, JUMP_LABEL (jump_insn));
3619 slots = num_delay_slots (jump_insn);
3620 if (slots >= XVECLEN (pat, 0) - 1)
3622 for (i = 1; i < XVECLEN (pat, 0); i++)
3623 if (! (
3624 #ifdef ANNUL_IFFALSE_SLOTS
3625 (INSN_ANNULLED_BRANCH_P (jump_insn)
3626 && INSN_FROM_TARGET_P (pat->insn (i)))
3627 ? eligible_for_annul_false (jump_insn, i - 1,
3628 pat->insn (i), flags) :
3629 #endif
3630 #ifdef ANNUL_IFTRUE_SLOTS
3631 (INSN_ANNULLED_BRANCH_P (jump_insn)
3632 && ! INSN_FROM_TARGET_P (pat->insn (i)))
3633 ? eligible_for_annul_true (jump_insn, i - 1,
3634 pat->insn (i), flags) :
3635 #endif
3636 eligible_for_delay (jump_insn, i - 1,
3637 pat->insn (i), flags)))
3638 break;
3640 else
3641 i = 0;
3643 if (i == XVECLEN (pat, 0))
3644 continue;
3646 /* We have to do something with this insn. If it is an unconditional
3647 RETURN, delete the SEQUENCE and output the individual insns,
3648 followed by the RETURN. Then set things up so we try to find
3649 insns for its delay slots, if it needs some. */
3650 if (ANY_RETURN_P (PATTERN (jump_insn)))
3652 rtx_insn *prev = PREV_INSN (insn);
3654 delete_related_insns (insn);
3655 for (i = 1; i < XVECLEN (pat, 0); i++)
3656 prev = emit_insn_after (PATTERN (XVECEXP (pat, 0, i)), prev);
3658 insn = emit_jump_insn_after (PATTERN (jump_insn), prev);
3659 emit_barrier_after (insn);
3661 if (slots)
3662 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3664 else
3665 /* It is probably more efficient to keep this with its current
3666 delay slot as a branch to a RETURN. */
3667 reorg_redirect_jump (jump_insn, real_label);
3670 /* Now delete REAL_RETURN_LABEL if we never used it. Then try to fill any
3671 new delay slots we have created. */
3672 if (real_return_label != NULL_RTX && --LABEL_NUSES (real_return_label) == 0)
3673 delete_related_insns (real_return_label);
3674 if (real_simple_return_label != NULL_RTX
3675 && --LABEL_NUSES (real_simple_return_label) == 0)
3676 delete_related_insns (real_simple_return_label);
3678 fill_simple_delay_slots (1);
3679 fill_simple_delay_slots (0);
3682 /* Try to find insns to place in delay slots. */
3684 static void
3685 dbr_schedule (rtx_insn *first)
3687 rtx_insn *insn, *next, *epilogue_insn = 0;
3688 int i;
3689 bool need_return_insns;
3691 /* If the current function has no insns other than the prologue and
3692 epilogue, then do not try to fill any delay slots. */
3693 if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS)
3694 return;
3696 /* Find the highest INSN_UID and allocate and initialize our map from
3697 INSN_UID's to position in code. */
3698 for (max_uid = 0, insn = first; insn; insn = NEXT_INSN (insn))
3700 if (INSN_UID (insn) > max_uid)
3701 max_uid = INSN_UID (insn);
3702 if (NOTE_P (insn)
3703 && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
3704 epilogue_insn = insn;
3707 uid_to_ruid = XNEWVEC (int, max_uid + 1);
3708 for (i = 0, insn = first; insn; i++, insn = NEXT_INSN (insn))
3709 uid_to_ruid[INSN_UID (insn)] = i;
3711 /* Initialize the list of insns that need filling. */
3712 if (unfilled_firstobj == 0)
3714 gcc_obstack_init (&unfilled_slots_obstack);
3715 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3718 for (insn = next_active_insn (first); insn; insn = next_active_insn (insn))
3720 rtx target;
3722 /* Skip vector tables. We can't get attributes for them. */
3723 if (JUMP_TABLE_DATA_P (insn))
3724 continue;
3726 if (JUMP_P (insn))
3727 INSN_ANNULLED_BRANCH_P (insn) = 0;
3728 INSN_FROM_TARGET_P (insn) = 0;
3730 if (num_delay_slots (insn) > 0)
3731 obstack_ptr_grow (&unfilled_slots_obstack, insn);
3733 /* Ensure all jumps go to the last of a set of consecutive labels. */
3734 if (JUMP_P (insn)
3735 && (condjump_p (insn) || condjump_in_parallel_p (insn))
3736 && !ANY_RETURN_P (JUMP_LABEL (insn))
3737 && ((target = skip_consecutive_labels (JUMP_LABEL (insn)))
3738 != JUMP_LABEL (insn)))
3739 redirect_jump (insn, target, 1);
3742 init_resource_info (epilogue_insn);
3744 /* Show we haven't computed an end-of-function label yet. */
3745 function_return_label = function_simple_return_label = NULL;
3747 /* Initialize the statistics for this function. */
3748 memset (num_insns_needing_delays, 0, sizeof num_insns_needing_delays);
3749 memset (num_filled_delays, 0, sizeof num_filled_delays);
3751 /* Now do the delay slot filling. Try everything twice in case earlier
3752 changes make more slots fillable. */
3754 for (reorg_pass_number = 0;
3755 reorg_pass_number < MAX_REORG_PASSES;
3756 reorg_pass_number++)
3758 fill_simple_delay_slots (1);
3759 fill_simple_delay_slots (0);
3760 fill_eager_delay_slots ();
3761 relax_delay_slots (first);
3764 /* If we made an end of function label, indicate that it is now
3765 safe to delete it by undoing our prior adjustment to LABEL_NUSES.
3766 If it is now unused, delete it. */
3767 if (function_return_label && --LABEL_NUSES (function_return_label) == 0)
3768 delete_related_insns (function_return_label);
3769 if (function_simple_return_label
3770 && --LABEL_NUSES (function_simple_return_label) == 0)
3771 delete_related_insns (function_simple_return_label);
3773 need_return_insns = false;
3774 #ifdef HAVE_return
3775 need_return_insns |= HAVE_return && function_return_label != 0;
3776 #endif
3777 #ifdef HAVE_simple_return
3778 need_return_insns |= HAVE_simple_return && function_simple_return_label != 0;
3779 #endif
3780 if (need_return_insns)
3781 make_return_insns (first);
3783 /* Delete any USE insns made by update_block; subsequent passes don't need
3784 them or know how to deal with them. */
3785 for (insn = first; insn; insn = next)
3787 next = NEXT_INSN (insn);
3789 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
3790 && INSN_P (XEXP (PATTERN (insn), 0)))
3791 next = delete_related_insns (insn);
3794 obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3796 /* It is not clear why the line below is needed, but it does seem to be. */
3797 unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3799 if (dump_file)
3801 int i, j, need_comma;
3802 int total_delay_slots[MAX_DELAY_HISTOGRAM + 1];
3803 int total_annul_slots[MAX_DELAY_HISTOGRAM + 1];
3805 for (reorg_pass_number = 0;
3806 reorg_pass_number < MAX_REORG_PASSES;
3807 reorg_pass_number++)
3809 fprintf (dump_file, ";; Reorg pass #%d:\n", reorg_pass_number + 1);
3810 for (i = 0; i < NUM_REORG_FUNCTIONS; i++)
3812 need_comma = 0;
3813 fprintf (dump_file, ";; Reorg function #%d\n", i);
3815 fprintf (dump_file, ";; %d insns needing delay slots\n;; ",
3816 num_insns_needing_delays[i][reorg_pass_number]);
3818 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3819 if (num_filled_delays[i][j][reorg_pass_number])
3821 if (need_comma)
3822 fprintf (dump_file, ", ");
3823 need_comma = 1;
3824 fprintf (dump_file, "%d got %d delays",
3825 num_filled_delays[i][j][reorg_pass_number], j);
3827 fprintf (dump_file, "\n");
3830 memset (total_delay_slots, 0, sizeof total_delay_slots);
3831 memset (total_annul_slots, 0, sizeof total_annul_slots);
3832 for (insn = first; insn; insn = NEXT_INSN (insn))
3834 if (! INSN_DELETED_P (insn)
3835 && NONJUMP_INSN_P (insn)
3836 && GET_CODE (PATTERN (insn)) != USE
3837 && GET_CODE (PATTERN (insn)) != CLOBBER)
3839 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
3841 rtx control;
3842 j = XVECLEN (PATTERN (insn), 0) - 1;
3843 if (j > MAX_DELAY_HISTOGRAM)
3844 j = MAX_DELAY_HISTOGRAM;
3845 control = XVECEXP (PATTERN (insn), 0, 0);
3846 if (JUMP_P (control) && INSN_ANNULLED_BRANCH_P (control))
3847 total_annul_slots[j]++;
3848 else
3849 total_delay_slots[j]++;
3851 else if (num_delay_slots (insn) > 0)
3852 total_delay_slots[0]++;
3855 fprintf (dump_file, ";; Reorg totals: ");
3856 need_comma = 0;
3857 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3859 if (total_delay_slots[j])
3861 if (need_comma)
3862 fprintf (dump_file, ", ");
3863 need_comma = 1;
3864 fprintf (dump_file, "%d got %d delays", total_delay_slots[j], j);
3867 fprintf (dump_file, "\n");
3868 #if defined (ANNUL_IFTRUE_SLOTS) || defined (ANNUL_IFFALSE_SLOTS)
3869 fprintf (dump_file, ";; Reorg annuls: ");
3870 need_comma = 0;
3871 for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3873 if (total_annul_slots[j])
3875 if (need_comma)
3876 fprintf (dump_file, ", ");
3877 need_comma = 1;
3878 fprintf (dump_file, "%d got %d delays", total_annul_slots[j], j);
3881 fprintf (dump_file, "\n");
3882 #endif
3883 fprintf (dump_file, "\n");
3886 if (!sibling_labels.is_empty ())
3888 update_alignments (sibling_labels);
3889 sibling_labels.release ();
3892 free_resource_info ();
3893 free (uid_to_ruid);
3894 crtl->dbr_scheduled_p = true;
3896 #endif /* DELAY_SLOTS */
3898 /* Run delay slot optimization. */
3899 static unsigned int
3900 rest_of_handle_delay_slots (void)
3902 #ifdef DELAY_SLOTS
3903 dbr_schedule (get_insns ());
3904 #endif
3905 return 0;
3908 namespace {
3910 const pass_data pass_data_delay_slots =
3912 RTL_PASS, /* type */
3913 "dbr", /* name */
3914 OPTGROUP_NONE, /* optinfo_flags */
3915 TV_DBR_SCHED, /* tv_id */
3916 0, /* properties_required */
3917 0, /* properties_provided */
3918 0, /* properties_destroyed */
3919 0, /* todo_flags_start */
3920 0, /* todo_flags_finish */
3923 class pass_delay_slots : public rtl_opt_pass
3925 public:
3926 pass_delay_slots (gcc::context *ctxt)
3927 : rtl_opt_pass (pass_data_delay_slots, ctxt)
3930 /* opt_pass methods: */
3931 virtual bool gate (function *);
3932 virtual unsigned int execute (function *)
3934 return rest_of_handle_delay_slots ();
3937 }; // class pass_delay_slots
3939 bool
3940 pass_delay_slots::gate (function *)
3942 #ifdef DELAY_SLOTS
3943 /* At -O0 dataflow info isn't updated after RA. */
3944 return optimize > 0 && flag_delayed_branch && !crtl->dbr_scheduled_p;
3945 #else
3946 return 0;
3947 #endif
3950 } // anon namespace
3952 rtl_opt_pass *
3953 make_pass_delay_slots (gcc::context *ctxt)
3955 return new pass_delay_slots (ctxt);
3958 /* Machine dependent reorg pass. */
3960 namespace {
3962 const pass_data pass_data_machine_reorg =
3964 RTL_PASS, /* type */
3965 "mach", /* name */
3966 OPTGROUP_NONE, /* optinfo_flags */
3967 TV_MACH_DEP, /* tv_id */
3968 0, /* properties_required */
3969 0, /* properties_provided */
3970 0, /* properties_destroyed */
3971 0, /* todo_flags_start */
3972 0, /* todo_flags_finish */
3975 class pass_machine_reorg : public rtl_opt_pass
3977 public:
3978 pass_machine_reorg (gcc::context *ctxt)
3979 : rtl_opt_pass (pass_data_machine_reorg, ctxt)
3982 /* opt_pass methods: */
3983 virtual bool gate (function *)
3985 return targetm.machine_dependent_reorg != 0;
3988 virtual unsigned int execute (function *)
3990 targetm.machine_dependent_reorg ();
3991 return 0;
3994 }; // class pass_machine_reorg
3996 } // anon namespace
3998 rtl_opt_pass *
3999 make_pass_machine_reorg (gcc::context *ctxt)
4001 return new pass_machine_reorg (ctxt);