4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
26 * Copyright (c) 2011, Joyent, Inc. All rights reserved.
30 * When the operating system detects that it is in an invalid state, a panic
31 * is initiated in order to minimize potential damage to user data and to
32 * facilitate debugging. There are three major tasks to be performed in
33 * a system panic: recording information about the panic in memory (and thus
34 * making it part of the crash dump), synchronizing the file systems to
35 * preserve user file data, and generating the crash dump. We define the
36 * system to be in one of four states with respect to the panic code:
38 * CALM - the state of the system prior to any thread initiating a panic
40 * QUIESCE - the state of the system when the first thread to initiate
41 * a system panic records information about the cause of the panic
42 * and renders the system quiescent by stopping other processors
44 * SYNC - the state of the system when we synchronize the file systems
45 * DUMP - the state when we generate the crash dump.
47 * The transitions between these states are irreversible: once we begin
48 * panicking, we only make one attempt to perform the actions associated with
51 * The panic code itself must be re-entrant because actions taken during any
52 * state may lead to another system panic. Additionally, any Solaris
53 * thread may initiate a panic at any time, and so we must have synchronization
54 * between threads which attempt to initiate a state transition simultaneously.
55 * The panic code makes use of a special locking primitive, a trigger, to
56 * perform this synchronization. A trigger is simply a word which is set
57 * atomically and can only be set once. We declare three triggers, one for
58 * each transition between the four states. When a thread enters the panic
59 * code it attempts to set each trigger; if it fails it moves on to the
60 * next trigger. A special case is the first trigger: if two threads race
61 * to perform the transition to QUIESCE, the losing thread may execute before
62 * the winner has a chance to stop its CPU. To solve this problem, we have
63 * the loser look ahead to see if any other triggers are set; if not, it
64 * presumes a panic is underway and simply spins. Unfortunately, since we
65 * are panicking, it is not possible to know this with absolute certainty.
67 * There are two common reasons for re-entering the panic code once a panic
68 * has been initiated: (1) after we debug_enter() at the end of QUIESCE,
69 * the operator may type "sync" instead of "go", and the PROM's sync callback
70 * routine will invoke panic(); (2) if the clock routine decides that sync
71 * or dump is not making progress, it will invoke panic() to force a timeout.
72 * The design assumes that a third possibility, another thread causing an
73 * unrelated panic while sync or dump is still underway, is extremely unlikely.
74 * If this situation occurs, we may end up triggering dump while sync is
75 * still in progress. This third case is considered extremely unlikely because
76 * all other CPUs are stopped and low-level interrupts have been blocked.
78 * The panic code is entered via a call directly to the vpanic() function,
79 * or its varargs wrappers panic() and cmn_err(9F). The vpanic routine
80 * is implemented in assembly language to record the current machine
81 * registers, attempt to set the trigger for the QUIESCE state, and
82 * if successful, switch stacks on to the panic_stack before calling into
83 * the common panicsys() routine. The first thread to initiate a panic
84 * is allowed to make use of the reserved panic_stack so that executing
85 * the panic code itself does not overwrite valuable data on that thread's
86 * stack *ahead* of the current stack pointer. This data will be preserved
87 * in the crash dump and may prove invaluable in determining what this
88 * thread has previously been doing. The first thread, saved in panic_thread,
89 * is also responsible for stopping the other CPUs as quickly as possible,
90 * and then setting the various panic_* variables. Most important among
91 * these is panicstr, which allows threads to subsequently bypass held
92 * locks so that we can proceed without ever blocking. We must stop the
93 * other CPUs *prior* to setting panicstr in case threads running there are
94 * currently spinning to acquire a lock; we want that state to be preserved.
95 * Every thread which initiates a panic has its T_PANIC flag set so we can
96 * identify all such threads in the crash dump.
98 * The panic_thread is also allowed to make use of the special memory buffer
99 * panicbuf, which on machines with appropriate hardware is preserved across
100 * reboots. We allow the panic_thread to store its register set and panic
101 * message in this buffer, so even if we fail to obtain a crash dump we will
102 * be able to examine the machine after reboot and determine some of the
103 * state at the time of the panic. If we do get a dump, the panic buffer
104 * data is structured so that a debugger can easily consume the information
105 * therein (see <sys/panic.h>).
107 * Each platform or architecture is required to implement the functions
108 * panic_savetrap() to record trap-specific information to panicbuf,
109 * panic_saveregs() to record a register set to panicbuf, panic_stopcpus()
110 * to halt all CPUs but the panicking CPU, panic_quiesce_hw() to perform
111 * miscellaneous platform-specific tasks *after* panicstr is set,
112 * panic_showtrap() to print trap-specific information to the console,
113 * and panic_dump_hw() to perform platform tasks prior to calling dumpsys().
115 * A Note on Word Formation, courtesy of the Oxford Guide to English Usage:
117 * Words ending in -c interpose k before suffixes which otherwise would
118 * indicate a soft c, and thus the verb and adjective forms of 'panic' are
119 * spelled "panicked", "panicking", and "panicky" respectively. Use of
120 * the ill-conceived "panicing" and "panic'd" is discouraged.
123 #include <sys/types.h>
124 #include <sys/varargs.h>
125 #include <sys/sysmacros.h>
126 #include <sys/cmn_err.h>
127 #include <sys/cpuvar.h>
128 #include <sys/thread.h>
129 #include <sys/t_lock.h>
130 #include <sys/cred.h>
131 #include <sys/systm.h>
132 #include <sys/archsystm.h>
133 #include <sys/uadmin.h>
134 #include <sys/callb.h>
137 #include <sys/disp.h>
138 #include <sys/param.h>
139 #include <sys/dumphdr.h>
140 #include <sys/ftrace.h>
141 #include <sys/reboot.h>
142 #include <sys/debug.h>
143 #include <sys/stack.h>
145 #include <sys/errorq.h>
146 #include <sys/panic.h>
147 #include <sys/fm/util.h>
148 #include <sys/clock_impl.h>
151 * Panic variables which are set once during the QUIESCE state by the
152 * first thread to initiate a panic. These are examined by post-mortem
153 * debugging tools; the inconsistent use of 'panic' versus 'panic_' in
154 * the variable naming is historical and allows legacy tools to work.
156 #pragma align STACK_ALIGN(panic_stack)
157 char panic_stack
[PANICSTKSIZE
]; /* reserved stack for panic_thread */
158 kthread_t
*panic_thread
; /* first thread to call panicsys() */
159 cpu_t panic_cpu
; /* cpu from first call to panicsys() */
160 label_t panic_regs
; /* setjmp label from panic_thread */
161 label_t panic_pcb
; /* t_pcb at time of panic */
162 struct regs
*panic_reg
; /* regs struct from first panicsys() */
163 char *volatile panicstr
; /* format string to first panicsys() */
164 va_list panicargs
; /* arguments to first panicsys() */
165 clock_t panic_lbolt
; /* lbolt at time of panic */
166 int64_t panic_lbolt64
; /* lbolt64 at time of panic */
167 hrtime_t panic_hrtime
; /* hrtime at time of panic */
168 timespec_t panic_hrestime
; /* hrestime at time of panic */
169 int panic_ipl
; /* ipl on panic_cpu at time of panic */
170 ushort_t panic_schedflag
; /* t_schedflag for panic_thread */
171 cpu_t
*panic_bound_cpu
; /* t_bound_cpu for panic_thread */
172 char panic_preempt
; /* t_preempt for panic_thread */
175 * Panic variables which can be set via /etc/system or patched while
176 * the system is in operation. Again, the stupid names are historic.
178 char *panic_bootstr
= NULL
; /* mdboot string to use after panic */
179 int panic_bootfcn
= AD_BOOT
; /* mdboot function to use after panic */
180 int halt_on_panic
= 0; /* halt after dump instead of reboot? */
181 int nopanicdebug
= 0; /* reboot instead of call debugger? */
182 int in_sync
= 0; /* skip vfs_syncall() and just dump? */
185 * The do_polled_io flag is set by the panic code to inform the SCSI subsystem
186 * to use polled mode instead of interrupt-driven i/o.
188 int do_polled_io
= 0;
191 * The panic_forced flag is set by the uadmin A_DUMP code to inform the
192 * panic subsystem that it should not attempt an initial debug_enter.
194 int panic_forced
= 0;
197 * Triggers for panic state transitions:
199 int panic_quiesce
; /* trigger for CALM -> QUIESCE */
200 int panic_sync
; /* trigger for QUIESCE -> SYNC */
201 int panic_dump
; /* trigger for SYNC -> DUMP */
204 * Variable signifying quiesce(9E) is in progress.
206 volatile int quiesce_active
= 0;
209 panicsys(const char *format
, va_list alist
, struct regs
*rp
, int on_panic_stack
)
212 kthread_t
*t
= curthread
;
215 caddr_t intr_stack
= NULL
;
218 ushort_t schedflag
= t
->t_schedflag
;
219 cpu_t
*bound_cpu
= t
->t_bound_cpu
;
220 char preempt
= t
->t_preempt
;
221 label_t pcb
= t
->t_pcb
;
223 (void) setjmp(&t
->t_pcb
);
224 t
->t_flag
|= T_PANIC
;
226 t
->t_schedflag
|= TS_DONT_SWAP
;
233 * If we're on the interrupt stack and an interrupt thread is available
234 * in this CPU's pool, preserve the interrupt stack by detaching an
235 * interrupt thread and making its stack the intr_stack.
237 if (CPU_ON_INTR(cp
) && cp
->cpu_intr_thread
!= NULL
) {
238 kthread_t
*it
= cp
->cpu_intr_thread
;
240 intr_stack
= cp
->cpu_intr_stack
;
241 intr_actv
= cp
->cpu_intr_actv
;
243 cp
->cpu_intr_stack
= thread_stk_init(it
->t_stk
);
244 cp
->cpu_intr_thread
= it
->t_link
;
247 * Clear only the high level bits of cpu_intr_actv.
248 * We want to indicate that high-level interrupts are
249 * not active without destroying the low-level interrupt
250 * information stored there.
252 cp
->cpu_intr_actv
&= ((1 << (LOCK_LEVEL
+ 1)) - 1);
256 * Record one-time panic information and quiesce the other CPUs.
257 * Then print out the panic message and stack trace.
259 if (on_panic_stack
) {
260 panic_data_t
*pdp
= (panic_data_t
*)panicbuf
;
262 pdp
->pd_version
= PANICBUFVERS
;
263 pdp
->pd_msgoff
= sizeof (panic_data_t
) - sizeof (panic_nv_t
);
265 (void) strncpy(pdp
->pd_uuid
, dump_get_uuid(),
266 sizeof (pdp
->pd_uuid
));
268 if (t
->t_panic_trap
!= NULL
)
269 panic_savetrap(pdp
, t
->t_panic_trap
);
271 panic_saveregs(pdp
, rp
);
273 (void) vsnprintf(&panicbuf
[pdp
->pd_msgoff
],
274 PANICBUFSIZE
- pdp
->pd_msgoff
, format
, alist
);
277 * Call into the platform code to stop the other CPUs.
278 * We currently have all interrupts blocked, and expect that
279 * the platform code will lower ipl only as far as needed to
280 * perform cross-calls, and will acquire as *few* locks as is
281 * possible -- panicstr is not set so we can still deadlock.
283 panic_stopcpus(cp
, t
, s
);
285 panicstr
= (char *)format
;
286 va_copy(panicargs
, alist
);
287 panic_lbolt
= LBOLT_NO_ACCOUNT
;
288 panic_lbolt64
= LBOLT_NO_ACCOUNT64
;
289 panic_hrestime
= hrestime
;
290 panic_hrtime
= gethrtime_waitfree();
292 panic_regs
= t
->t_pcb
;
295 panic_ipl
= spltoipl(s
);
296 panic_schedflag
= schedflag
;
297 panic_bound_cpu
= bound_cpu
;
298 panic_preempt
= preempt
;
301 if (intr_stack
!= NULL
) {
302 panic_cpu
.cpu_intr_stack
= intr_stack
;
303 panic_cpu
.cpu_intr_actv
= intr_actv
;
307 * Lower ipl to 10 to keep clock() from running, but allow
308 * keyboard interrupts to enter the debugger. These callbacks
309 * are executed with panicstr set so they can bypass locks.
311 splx(ipltospl(CLOCK_LEVEL
));
312 panic_quiesce_hw(pdp
);
313 (void) FTRACE_STOP();
314 (void) callb_execute_class(CB_CL_PANIC
, NULL
);
316 if (log_intrq
!= NULL
)
317 log_flushq(log_intrq
);
320 * If log_consq has been initialized and syslogd has started,
321 * print any messages in log_consq that haven't been consumed.
323 if (log_consq
!= NULL
&& log_consq
!= log_backlogq
)
324 log_printq(log_consq
);
330 * A hypervisor panic originates outside of Solaris, so we
331 * don't want to prepend the panic message with misleading
332 * pointers from within Solaris.
336 printf("\n\rpanic[cpu%d]/thread=%p: ", cp
->cpu_id
,
338 vprintf(format
, alist
);
341 if (t
->t_panic_trap
!= NULL
) {
342 panic_showtrap(t
->t_panic_trap
);
349 if (((boothowto
& RB_DEBUG
) || obpdebug
) &&
350 !nopanicdebug
&& !panic_forced
) {
351 if (dumpvp
!= NULL
) {
352 debug_enter("panic: entering debugger "
353 "(continue to save dump)");
355 debug_enter("panic: entering debugger "
356 "(no dump device, continue to reboot)");
360 } else if (panic_dump
!= 0 || panic_sync
!= 0 || panicstr
!= NULL
) {
361 printf("\n\rpanic[cpu%d]/thread=%p: ", cp
->cpu_id
, (void *)t
);
362 vprintf(format
, alist
);
368 * Prior to performing sync or dump, we make sure that do_polled_io is
369 * set, but we'll leave ipl at 10; deadman(), a CY_HIGH_LEVEL cyclic,
370 * will re-enter panic if we are not making progress with sync or dump.
374 * Sync the filesystems. Reset t_cred if not set because much of
375 * the filesystem code depends on CRED() being valid.
377 if (!in_sync
&& panic_trigger(&panic_sync
)) {
378 if (t
->t_cred
== NULL
)
380 splx(ipltospl(CLOCK_LEVEL
));
386 * Take the crash dump. If the dump trigger is already set, try to
387 * enter the debugger again before rebooting the system.
389 if (panic_trigger(&panic_dump
)) {
391 splx(ipltospl(CLOCK_LEVEL
));
395 } else if (((boothowto
& RB_DEBUG
) || obpdebug
) && !nopanicdebug
) {
396 debug_enter("panic: entering debugger (continue to reboot)");
398 printf("dump aborted: please record the above information!\n");
401 mdboot(A_REBOOT
, AD_HALT
, NULL
, B_FALSE
);
403 mdboot(A_REBOOT
, panic_bootfcn
, panic_bootstr
, B_FALSE
);
406 * Restore ipl to at most CLOCK_LEVEL so we don't end up spinning
407 * and unable to jump into the debugger.
409 splx(MIN(s
, ipltospl(CLOCK_LEVEL
)));
415 panic(const char *format
, ...)
419 va_start(alist
, format
);
420 vpanic(format
, alist
);