4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
29 * A common problem when handling hardware error traps and interrupts is that
30 * these errors frequently must be handled at high interrupt level, where
31 * reliably producing error messages and safely examining and manipulating
32 * other kernel state may not be possible. The kernel error queue primitive is
33 * a common set of routines that allow a subsystem to maintain a queue of
34 * errors that can be processed by an explicit call from a safe context or by a
35 * soft interrupt that fires at a specific lower interrupt level. The queue
36 * management code also ensures that if the system panics, all in-transit
37 * errors are logged prior to reset. Each queue has an associated kstat for
38 * observing the number of errors dispatched and logged, and mdb(1) debugging
39 * support is provided for live and post-mortem observability.
43 * All of the queue data structures are allocated in advance as part of
44 * the errorq_create() call. No additional memory allocations are
45 * performed as part of errorq_dispatch(), errorq_reserve(),
46 * errorq_commit() or errorq_drain(). This design
47 * facilitates reliable error queue processing even when the system is low
48 * on memory, and ensures that errorq_dispatch() can be called from any
49 * context. When the queue is created, the maximum queue length is
50 * specified as a parameter to errorq_create() and errorq_nvcreate(). This
51 * length should represent a reasonable upper bound on the number of
52 * simultaneous errors. If errorq_dispatch() or errorq_reserve() is
53 * invoked and no free queue elements are available, the error is
54 * dropped and will not be logged. Typically, the queue will only be
55 * exhausted by an error storm, and in this case
56 * the earlier errors provide the most important data for analysis.
57 * When a new error is dispatched, the error data is copied into the
58 * preallocated queue element so that the caller's buffer can be reused.
60 * When a new error is reserved, an element is moved from the free pool
61 * and returned to the caller. The element buffer data, eqe_data, may be
62 * managed by the caller and dispatched to the errorq by calling
63 * errorq_commit(). This is useful for additions to errorq's
64 * created with errorq_nvcreate() to handle name-value pair (nvpair) data.
65 * See below for a discussion on nvlist errorq's.
67 * Queue Drain Callback
69 * When the error queue is drained, the caller's queue drain callback is
70 * invoked with a pointer to the saved error data. This function may be
71 * called from passive kernel context or soft interrupt context at or
72 * below LOCK_LEVEL, or as part of panic(). As such, the callback should
73 * basically only be calling cmn_err (but NOT with the CE_PANIC flag).
74 * The callback must not call panic(), attempt to allocate memory, or wait
75 * on a condition variable. The callback may not call errorq_destroy()
76 * or errorq_drain() on the same error queue that called it.
78 * The queue drain callback will always be called for each pending error
79 * in the order in which errors were enqueued (oldest to newest). The
80 * queue drain callback is guaranteed to provide at *least* once semantics
81 * for all errors that are successfully dispatched (i.e. for which
82 * errorq_dispatch() has successfully completed). If an unrelated panic
83 * occurs while the queue drain callback is running on a vital queue, the
84 * panic subsystem will continue the queue drain and the callback may be
85 * invoked again for the same error. Therefore, the callback should
86 * restrict itself to logging messages and taking other actions that are
87 * not destructive if repeated.
89 * Name-Value Pair Error Queues
91 * During error handling, it may be more convenient to store error
92 * queue element data as a fixed buffer of name-value pairs. The
93 * nvpair library allows construction and destruction of nvlists
94 * in pre-allocated memory buffers.
96 * Error queues created via errorq_nvcreate() store queue element
97 * data as fixed buffer nvlists (ereports). errorq_reserve()
98 * allocates an errorq element from eqp->eq_bitmap and returns a valid
99 * pointer to a errorq_elem_t (queue element) and a pre-allocated
100 * fixed buffer nvlist. errorq_elem_nvl() is used to gain access
101 * to the nvlist to add name-value ereport members prior to
102 * dispatching the error queue element in errorq_commit().
104 * Once dispatched, the drain function will return the element to
105 * eqp->eq_bitmap and reset the associated nv_alloc structure.
106 * error_cancel() may be called to cancel an element reservation
107 * element that was never dispatched (committed). This is useful in
108 * cases where a programming error prevents a queue element from being
113 * The queue element structures and error data buffers are allocated in
114 * two contiguous chunks as part of errorq_create() or errorq_nvcreate().
115 * Each queue element structure contains a next pointer,
116 * a previous pointer, and a pointer to the corresponding error data
117 * buffer. The data buffer for a nvlist errorq is a shared buffer
118 * for the allocation of name-value pair lists. The elements are kept on
121 * Unused elements are kept in the free pool, managed by eqp->eq_bitmap.
122 * The eqe_prev and eqe_next pointers are not used while in the free pool
123 * and will be set to NULL.
125 * Pending errors are kept on the pending list, a singly-linked list
126 * pointed to by eqp->eq_pend, and linked together using eqe_prev. This
127 * list is maintained in order from newest error to oldest. The eqe_next
128 * pointer is not used by the pending list and will be set to NULL.
130 * The processing list is a doubly-linked list pointed to by eqp->eq_phead
131 * (the oldest element) and eqp->eq_ptail (the newest element). The
132 * eqe_next pointer is used to traverse from eq_phead to eq_ptail, and the
133 * eqe_prev pointer is used to traverse from eq_ptail to eq_phead. Once a
134 * queue drain operation begins, the current pending list is moved to the
135 * processing list in a two-phase commit fashion (eq_ptail being cleared
136 * at the beginning but eq_phead only at the end), allowing the panic code
137 * to always locate and process all pending errors in the event that a
138 * panic occurs in the middle of queue processing.
140 * A fourth list is maintained for nvlist errorqs. The dump list,
141 * eq_dump is used to link all errorq elements that should be stored
142 * in a crash dump file in the event of a system panic. During
143 * errorq_panic(), the list is created and subsequently traversed
144 * in errorq_dump() during the final phases of a crash dump.
146 * Platform Considerations
148 * In order to simplify their implementation, error queues make use of the
149 * C wrappers for compare-and-swap. If the platform itself does not
150 * support compare-and-swap in hardware and the kernel emulation routines
151 * are used instead, then the context in which errorq_dispatch() can be
152 * safely invoked is further constrained by the implementation of the
153 * compare-and-swap emulation. Specifically, if errorq_dispatch() is
154 * called from a code path that can be executed above ATOMIC_LEVEL on such
155 * a platform, the dispatch code could potentially deadlock unless the
156 * corresponding error interrupt is blocked or disabled prior to calling
157 * errorq_dispatch(). Error queues should therefore be deployed with
158 * caution on these platforms.
162 * errorq_t *errorq_create(name, func, private, qlen, eltsize, ipl, flags);
163 * errorq_t *errorq_nvcreate(name, func, private, qlen, eltsize, ipl, flags);
165 * Create a new error queue with the specified name, callback, and
166 * properties. A pointer to the new error queue is returned upon success,
167 * or NULL is returned to indicate that the queue could not be created.
168 * This function must be called from passive kernel context with no locks
169 * held that can prevent a sleeping memory allocation from occurring.
170 * errorq_create() will return failure if the queue kstats cannot be
171 * created, or if a soft interrupt handler cannot be registered.
173 * The queue 'name' is a string that is recorded for live and post-mortem
174 * examination by a debugger. The queue callback 'func' will be invoked
175 * for each error drained from the queue, and will receive the 'private'
176 * pointer as its first argument. The callback must obey the rules for
177 * callbacks described above. The queue will have maximum length 'qlen'
178 * and each element will be able to record up to 'eltsize' bytes of data.
179 * The queue's soft interrupt (see errorq_dispatch(), below) will fire
180 * at 'ipl', which should not exceed LOCK_LEVEL. The queue 'flags' may
181 * include the following flag:
183 * ERRORQ_VITAL - This queue contains information that is considered
184 * vital to problem diagnosis. Error queues that are marked vital will
185 * be automatically drained by the panic subsystem prior to printing
186 * the panic messages to the console.
188 * void errorq_destroy(errorq);
190 * Destroy the specified error queue. The queue is drained of any
191 * pending elements and these are logged before errorq_destroy returns.
192 * Once errorq_destroy() begins draining the queue, any simultaneous
193 * calls to dispatch errors will result in the errors being dropped.
194 * The caller must invoke a higher-level abstraction (e.g. disabling
195 * an error interrupt) to ensure that error handling code does not
196 * attempt to dispatch errors to the queue while it is being freed.
198 * void errorq_dispatch(errorq, data, len, flag);
200 * Attempt to enqueue the specified error data. If a free queue element
201 * is available, the data is copied into a free element and placed on a
202 * pending list. If no free queue element is available, the error is
203 * dropped. The data length (len) is specified in bytes and should not
204 * exceed the queue's maximum element size. If the data length is less
205 * than the maximum element size, the remainder of the queue element is
206 * filled with zeroes. The flag parameter should be one of:
208 * ERRORQ_ASYNC - Schedule a soft interrupt at the previously specified
209 * IPL to asynchronously drain the queue on behalf of the caller.
211 * ERRORQ_SYNC - Do not schedule a soft interrupt to drain the queue.
212 * The caller is presumed to be calling errorq_drain() or panic() in
213 * the near future in order to drain the queue and log the error.
215 * The errorq_dispatch() function may be called from any context, subject
216 * to the Platform Considerations described above.
218 * void errorq_drain(errorq);
220 * Drain the error queue of all pending errors. The queue's callback
221 * function is invoked for each error in order from oldest to newest.
222 * This function may be used at or below LOCK_LEVEL or from panic context.
224 * errorq_elem_t *errorq_reserve(errorq);
226 * Reserve an error queue element for later processing and dispatching.
227 * The element is returned to the caller who may add error-specific data
228 * to element. The element is retured to the free pool when either
229 * errorq_commit() is called and the element asynchronously processed
230 * or immediately when errorq_cancel() is called.
232 * void errorq_commit(errorq, errorq_elem, flag);
234 * Commit an errorq element (eqep) for dispatching, see
237 * void errorq_cancel(errorq, errorq_elem);
239 * Cancel a pending errorq element reservation. The errorq element is
240 * returned to the free pool upon cancelation.
243 #include <sys/errorq_impl.h>
244 #include <sys/sysmacros.h>
245 #include <sys/machlock.h>
246 #include <sys/cmn_err.h>
247 #include <sys/atomic.h>
248 #include <sys/systm.h>
249 #include <sys/kmem.h>
250 #include <sys/conf.h>
252 #include <sys/sunddi.h>
253 #include <sys/bootconf.h>
255 #include <sys/dumphdr.h>
256 #include <sys/compress.h>
257 #include <sys/time.h>
258 #include <sys/panic.h>
259 #include <sys/bitmap.h>
260 #include <sys/fm/protocol.h>
261 #include <sys/fm/util.h>
263 static struct errorq_kstat errorq_kstat_template
= {
264 { "dispatched", KSTAT_DATA_UINT64
},
265 { "dropped", KSTAT_DATA_UINT64
},
266 { "logged", KSTAT_DATA_UINT64
},
267 { "reserved", KSTAT_DATA_UINT64
},
268 { "reserve_fail", KSTAT_DATA_UINT64
},
269 { "committed", KSTAT_DATA_UINT64
},
270 { "commit_fail", KSTAT_DATA_UINT64
},
271 { "cancelled", KSTAT_DATA_UINT64
}
274 static uint64_t errorq_lost
= 0;
275 static errorq_t
*errorq_list
= NULL
;
276 static kmutex_t errorq_lock
;
277 static uint64_t errorq_vitalmin
= 5;
280 errorq_intr(caddr_t eqp
)
282 errorq_drain((errorq_t
*)eqp
);
283 return (DDI_INTR_CLAIMED
);
287 * Create a new error queue with the specified properties and add a software
288 * interrupt handler and kstat for it. This function must be called from
289 * passive kernel context with no locks held that can prevent a sleeping
290 * memory allocation from occurring. This function will return NULL if the
291 * softint or kstat for this queue cannot be created.
294 errorq_create(const char *name
, errorq_func_t func
, void *private,
295 ulong_t qlen
, size_t size
, uint_t ipl
, uint_t flags
)
297 errorq_t
*eqp
= kmem_alloc(sizeof (errorq_t
), KM_SLEEP
);
298 ddi_iblock_cookie_t ibc
= (ddi_iblock_cookie_t
)(uintptr_t)ipltospl(ipl
);
299 dev_info_t
*dip
= ddi_root_node();
302 ddi_softintr_t id
= NULL
;
305 ASSERT(qlen
!= 0 && size
!= 0);
306 ASSERT(ipl
> 0 && ipl
<= LOCK_LEVEL
);
309 * If a queue is created very early in boot before device tree services
310 * are available, the queue softint handler cannot be created. We
311 * manually drain these queues and create their softint handlers when
312 * it is safe to do so as part of errorq_init(), below.
314 if (modrootloaded
&& ddi_add_softintr(dip
, DDI_SOFTINT_FIXED
, &id
,
315 &ibc
, NULL
, errorq_intr
, (caddr_t
)eqp
) != DDI_SUCCESS
) {
316 cmn_err(CE_WARN
, "errorq_create: failed to register "
317 "IPL %u softint for queue %s", ipl
, name
);
318 kmem_free(eqp
, sizeof (errorq_t
));
322 if ((eqp
->eq_ksp
= kstat_create("unix", 0, name
, "errorq",
323 KSTAT_TYPE_NAMED
, sizeof (struct errorq_kstat
) /
324 sizeof (kstat_named_t
), KSTAT_FLAG_VIRTUAL
)) == NULL
) {
325 cmn_err(CE_WARN
, "errorq_create: failed to create kstat "
326 "for queue %s", name
);
328 ddi_remove_softintr(id
);
329 kmem_free(eqp
, sizeof (errorq_t
));
333 bcopy(&errorq_kstat_template
, &eqp
->eq_kstat
,
334 sizeof (struct errorq_kstat
));
335 eqp
->eq_ksp
->ks_data
= &eqp
->eq_kstat
;
336 eqp
->eq_ksp
->ks_private
= eqp
;
337 kstat_install(eqp
->eq_ksp
);
339 (void) strncpy(eqp
->eq_name
, name
, ERRORQ_NAMELEN
);
340 eqp
->eq_name
[ERRORQ_NAMELEN
] = '\0';
342 eqp
->eq_private
= private;
343 eqp
->eq_data
= kmem_alloc(qlen
* size
, KM_SLEEP
);
347 eqp
->eq_flags
= flags
| ERRORQ_ACTIVE
;
349 mutex_init(&eqp
->eq_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
350 eqp
->eq_elems
= kmem_alloc(qlen
* sizeof (errorq_elem_t
), KM_SLEEP
);
351 eqp
->eq_phead
= NULL
;
352 eqp
->eq_ptail
= NULL
;
355 eqp
->eq_bitmap
= kmem_zalloc(BT_SIZEOFMAP(qlen
), KM_SLEEP
);
359 * Iterate over the array of errorq_elem_t structures and set its
362 for (eep
= eqp
->eq_elems
, data
= eqp
->eq_data
; qlen
> 1; qlen
--) {
363 eep
->eqe_next
= NULL
;
364 eep
->eqe_dump
= NULL
;
365 eep
->eqe_prev
= NULL
;
366 eep
->eqe_data
= data
;
370 eep
->eqe_next
= NULL
;
371 eep
->eqe_prev
= NULL
;
372 eep
->eqe_data
= data
;
373 eep
->eqe_dump
= NULL
;
376 * Once the errorq is initialized, add it to the global list of queues,
377 * and then return a pointer to the new queue to the caller.
379 mutex_enter(&errorq_lock
);
380 eqp
->eq_next
= errorq_list
;
382 mutex_exit(&errorq_lock
);
388 * Create a new errorq as if by errorq_create(), but set the ERRORQ_NVLIST
389 * flag and initialize each element to have the start of its data region used
390 * as an errorq_nvelem_t with a nvlist allocator that consumes the data region.
393 errorq_nvcreate(const char *name
, errorq_func_t func
, void *private,
394 ulong_t qlen
, size_t size
, uint_t ipl
, uint_t flags
)
399 eqp
= errorq_create(name
, func
, private, qlen
,
400 size
+ sizeof (errorq_nvelem_t
), ipl
, flags
| ERRORQ_NVLIST
);
405 mutex_enter(&eqp
->eq_lock
);
407 for (eep
= eqp
->eq_elems
; qlen
!= 0; eep
++, qlen
--) {
408 errorq_nvelem_t
*eqnp
= eep
->eqe_data
;
409 eqnp
->eqn_buf
= (char *)eqnp
+ sizeof (errorq_nvelem_t
);
410 eqnp
->eqn_nva
= fm_nva_xcreate(eqnp
->eqn_buf
, size
);
413 mutex_exit(&eqp
->eq_lock
);
418 * To destroy an error queue, we mark it as disabled and then explicitly drain
419 * all pending errors. Once the drain is complete, we can remove the queue
420 * from the global list of queues examined by errorq_panic(), and then free
421 * the various queue data structures. The caller must use some higher-level
422 * abstraction (e.g. disabling an error interrupt) to ensure that no one will
423 * attempt to enqueue new errors while we are freeing this queue.
426 errorq_destroy(errorq_t
*eqp
)
433 eqp
->eq_flags
&= ~ERRORQ_ACTIVE
;
436 mutex_enter(&errorq_lock
);
439 for (p
= errorq_list
; p
!= NULL
; p
= p
->eq_next
) {
447 mutex_exit(&errorq_lock
);
450 if (eqp
->eq_flags
& ERRORQ_NVLIST
) {
451 for (eep
= eqp
->eq_elems
, i
= 0; i
< eqp
->eq_qlen
; i
++, eep
++) {
452 errorq_nvelem_t
*eqnp
= eep
->eqe_data
;
453 fm_nva_xdestroy(eqnp
->eqn_nva
);
457 mutex_destroy(&eqp
->eq_lock
);
458 kstat_delete(eqp
->eq_ksp
);
460 if (eqp
->eq_id
!= NULL
)
461 ddi_remove_softintr(eqp
->eq_id
);
463 kmem_free(eqp
->eq_elems
, eqp
->eq_qlen
* sizeof (errorq_elem_t
));
464 kmem_free(eqp
->eq_bitmap
, BT_SIZEOFMAP(eqp
->eq_qlen
));
465 kmem_free(eqp
->eq_data
, eqp
->eq_qlen
* eqp
->eq_size
);
467 kmem_free(eqp
, sizeof (errorq_t
));
471 * private version of bt_availbit which makes a best-efforts attempt
472 * at allocating in a round-robin fashion in order to facilitate post-mortem
476 errorq_availbit(ulong_t
*bitmap
, size_t nbits
, index_t curindex
)
478 ulong_t bit
, maxbit
, bx
;
479 index_t rval
, nextindex
= curindex
+ 1;
480 index_t nextword
= nextindex
>> BT_ULSHIFT
;
481 ulong_t nextbitindex
= nextindex
& BT_ULMASK
;
482 index_t maxindex
= nbits
- 1;
483 index_t maxword
= maxindex
>> BT_ULSHIFT
;
484 ulong_t maxbitindex
= maxindex
& BT_ULMASK
;
487 * First check if there are still some bits remaining in the current
488 * word, and see if any of those are available. We need to do this by
489 * hand as the bt_availbit() function always starts at the beginning
492 if (nextindex
<= maxindex
&& nextbitindex
!= 0) {
493 maxbit
= (nextword
== maxword
) ? maxbitindex
: BT_ULMASK
;
494 for (bx
= 0, bit
= 1; bx
<= maxbit
; bx
++, bit
<<= 1)
495 if (bx
>= nextbitindex
&& !(bitmap
[nextword
] & bit
))
496 return ((nextword
<< BT_ULSHIFT
) + bx
);
500 * Now check if there are any words remaining before the end of the
501 * bitmap. Use bt_availbit() to find any free bits.
503 if (nextword
<= maxword
)
504 if ((rval
= bt_availbit(&bitmap
[nextword
],
505 nbits
- (nextword
<< BT_ULSHIFT
))) != -1)
506 return ((nextword
<< BT_ULSHIFT
) + rval
);
508 * Finally loop back to the start and look for any free bits starting
509 * from the beginning of the bitmap to the current rotor position.
511 return (bt_availbit(bitmap
, nextindex
));
515 * Dispatch a new error into the queue for later processing. The specified
516 * data buffer is copied into a preallocated queue element. If 'len' is
517 * smaller than the queue element size, the remainder of the queue element is
518 * filled with zeroes. This function may be called from any context subject
519 * to the Platform Considerations described above.
522 errorq_dispatch(errorq_t
*eqp
, const void *data
, size_t len
, uint_t flag
)
524 errorq_elem_t
*eep
, *old
;
526 if (eqp
== NULL
|| !(eqp
->eq_flags
& ERRORQ_ACTIVE
)) {
527 atomic_inc_64(&errorq_lost
);
528 return; /* drop error if queue is uninitialized or disabled */
534 if ((i
= errorq_availbit(eqp
->eq_bitmap
, eqp
->eq_qlen
,
535 eqp
->eq_rotor
)) == -1) {
536 atomic_inc_64(&eqp
->eq_kstat
.eqk_dropped
.value
.ui64
);
539 BT_ATOMIC_SET_EXCL(eqp
->eq_bitmap
, i
, rval
);
542 eep
= &eqp
->eq_elems
[i
];
547 ASSERT(len
<= eqp
->eq_size
);
548 bcopy(data
, eep
->eqe_data
, MIN(eqp
->eq_size
, len
));
550 if (len
< eqp
->eq_size
)
551 bzero((caddr_t
)eep
->eqe_data
+ len
, eqp
->eq_size
- len
);
558 if (atomic_cas_ptr(&eqp
->eq_pend
, old
, eep
) == old
)
562 atomic_inc_64(&eqp
->eq_kstat
.eqk_dispatched
.value
.ui64
);
564 if (flag
== ERRORQ_ASYNC
&& eqp
->eq_id
!= NULL
)
565 ddi_trigger_softintr(eqp
->eq_id
);
569 * Drain the specified error queue by calling eq_func() for each pending error.
570 * This function must be called at or below LOCK_LEVEL or from panic context.
571 * In order to synchronize with other attempts to drain the queue, we acquire
572 * the adaptive eq_lock, blocking other consumers. Once this lock is held,
573 * we must use compare-and-swap to move the pending list to the processing
574 * list and to return elements to the free pool in order to synchronize
575 * with producers, who do not acquire any locks and only use atomic set/clear.
577 * An additional constraint on this function is that if the system panics
578 * while this function is running, the panic code must be able to detect and
579 * handle all intermediate states and correctly dequeue all errors. The
580 * errorq_panic() function below will be used for detecting and handling
581 * these intermediate states. The comments in errorq_drain() below explain
582 * how we make sure each intermediate state is distinct and consistent.
585 errorq_drain(errorq_t
*eqp
)
587 errorq_elem_t
*eep
, *dep
;
590 mutex_enter(&eqp
->eq_lock
);
593 * If there are one or more pending errors, set eq_ptail to point to
594 * the first element on the pending list and then attempt to compare-
595 * and-swap NULL to the pending list. We use membar_producer() to
596 * make sure that eq_ptail will be visible to errorq_panic() below
597 * before the pending list is NULLed out. This section is labeled
598 * case (1) for errorq_panic, below. If eq_ptail is not yet set (1A)
599 * eq_pend has all the pending errors. If atomic_cas_ptr fails or
600 * has not been called yet (1B), eq_pend still has all the pending
601 * errors. If atomic_cas_ptr succeeds (1C), eq_ptail has all the
604 while ((eep
= eqp
->eq_pend
) != NULL
) {
608 if (atomic_cas_ptr(&eqp
->eq_pend
, eep
, NULL
) == eep
)
613 * If no errors were pending, assert that eq_ptail is set to NULL,
614 * drop the consumer lock, and return without doing anything.
617 ASSERT(eqp
->eq_ptail
== NULL
);
618 mutex_exit(&eqp
->eq_lock
);
623 * Now iterate from eq_ptail (a.k.a. eep, the newest error) to the
624 * oldest error, setting the eqe_next pointer so that we can iterate
625 * over the errors from oldest to newest. We use membar_producer()
626 * to make sure that these stores are visible before we set eq_phead.
627 * If we panic before, during, or just after this loop (case 2),
628 * errorq_panic() will simply redo this work, as described below.
630 for (eep
->eqe_next
= NULL
; eep
->eqe_prev
!= NULL
; eep
= eep
->eqe_prev
)
631 eep
->eqe_prev
->eqe_next
= eep
;
635 * Now set eq_phead to the head of the processing list (the oldest
636 * error) and issue another membar_producer() to make sure that
637 * eq_phead is seen as non-NULL before we clear eq_ptail. If we panic
638 * after eq_phead is set (case 3), we will detect and log these errors
639 * in errorq_panic(), as described below.
644 eqp
->eq_ptail
= NULL
;
648 * If we enter from errorq_panic_drain(), we may already have
649 * errorq elements on the dump list. Find the tail of
650 * the list ready for append.
652 if (panicstr
&& (dep
= eqp
->eq_dump
) != NULL
) {
653 while (dep
->eqe_dump
!= NULL
)
658 * Now iterate over the processing list from oldest (eq_phead) to
659 * newest and log each error. Once an error is logged, we use
660 * atomic clear to return it to the free pool. If we panic before,
661 * during, or after calling eq_func() (case 4), the error will still be
662 * found on eq_phead and will be logged in errorq_panic below.
665 while ((eep
= eqp
->eq_phead
) != NULL
) {
666 eqp
->eq_func(eqp
->eq_private
, eep
->eqe_data
, eep
);
667 eqp
->eq_kstat
.eqk_logged
.value
.ui64
++;
669 eqp
->eq_phead
= eep
->eqe_next
;
672 eep
->eqe_next
= NULL
;
675 * On panic, we add the element to the dump list for each
676 * nvlist errorq. Elements are stored oldest to newest.
677 * Then continue, so we don't free and subsequently overwrite
678 * any elements which we've put on the dump queue.
680 if (panicstr
&& (eqp
->eq_flags
& ERRORQ_NVLIST
)) {
681 if (eqp
->eq_dump
== NULL
)
682 dep
= eqp
->eq_dump
= eep
;
684 dep
= dep
->eqe_dump
= eep
;
689 eep
->eqe_prev
= NULL
;
690 BT_ATOMIC_CLEAR(eqp
->eq_bitmap
, eep
- eqp
->eq_elems
);
693 mutex_exit(&eqp
->eq_lock
);
697 * Now that device tree services are available, set up the soft interrupt
698 * handlers for any queues that were created early in boot. We then
699 * manually drain these queues to report any pending early errors.
704 dev_info_t
*dip
= ddi_root_node();
708 ASSERT(modrootloaded
!= 0);
711 mutex_enter(&errorq_lock
);
713 for (eqp
= errorq_list
; eqp
!= NULL
; eqp
= eqp
->eq_next
) {
714 ddi_iblock_cookie_t ibc
=
715 (ddi_iblock_cookie_t
)(uintptr_t)ipltospl(eqp
->eq_ipl
);
717 if (eqp
->eq_id
!= NULL
)
718 continue; /* softint already initialized */
720 if (ddi_add_softintr(dip
, DDI_SOFTINT_FIXED
, &id
, &ibc
, NULL
,
721 errorq_intr
, (caddr_t
)eqp
) != DDI_SUCCESS
) {
722 panic("errorq_init: failed to register IPL %u softint "
723 "for queue %s", eqp
->eq_ipl
, eqp
->eq_name
);
730 mutex_exit(&errorq_lock
);
734 * This function is designed to be called from panic context only, and
735 * therefore does not need to acquire errorq_lock when iterating over
736 * errorq_list. This function must be called no more than once for each
737 * 'what' value (if you change this then review the manipulation of 'dep'.
740 errorq_panic_drain(uint_t what
)
742 errorq_elem_t
*eep
, *nep
, *dep
;
747 for (eqp
= errorq_list
; eqp
!= NULL
; eqp
= eqp
->eq_next
) {
748 if ((eqp
->eq_flags
& (ERRORQ_VITAL
| ERRORQ_NVLIST
)) != what
)
749 continue; /* do not drain this queue on this pass */
751 loggedtmp
= eqp
->eq_kstat
.eqk_logged
.value
.ui64
;
754 * In case (1B) above, eq_ptail may be set but the
755 * atomic_cas_ptr may not have been executed yet or may have
756 * failed. Either way, we must log errors in chronological
757 * order. So we search the pending list for the error
758 * pointed to by eq_ptail. If it is found, we know that all
759 * subsequent errors are also still on the pending list, so
760 * just NULL out eq_ptail and let errorq_drain(), below,
761 * take care of the logging.
763 for (eep
= eqp
->eq_pend
; eep
!= NULL
; eep
= eep
->eqe_prev
) {
764 if (eep
== eqp
->eq_ptail
) {
765 ASSERT(eqp
->eq_phead
== NULL
);
766 eqp
->eq_ptail
= NULL
;
772 * In cases (1C) and (2) above, eq_ptail will be set to the
773 * newest error on the processing list but eq_phead will still
774 * be NULL. We set the eqe_next pointers so we can iterate
775 * over the processing list in order from oldest error to the
776 * newest error. We then set eq_phead to point to the oldest
777 * error and fall into the for-loop below.
779 if (eqp
->eq_phead
== NULL
&& (eep
= eqp
->eq_ptail
) != NULL
) {
780 for (eep
->eqe_next
= NULL
; eep
->eqe_prev
!= NULL
;
782 eep
->eqe_prev
->eqe_next
= eep
;
785 eqp
->eq_ptail
= NULL
;
789 * In cases (3) and (4) above (or after case (1C/2) handling),
790 * eq_phead will be set to the oldest error on the processing
791 * list. We log each error and return it to the free pool.
793 * Unlike errorq_drain(), we don't need to worry about updating
794 * eq_phead because errorq_panic() will be called at most once.
795 * However, we must use atomic_cas_ptr to update the
796 * freelist in case errors are still being enqueued during
799 for (eep
= eqp
->eq_phead
; eep
!= NULL
; eep
= nep
) {
800 eqp
->eq_func(eqp
->eq_private
, eep
->eqe_data
, eep
);
801 eqp
->eq_kstat
.eqk_logged
.value
.ui64
++;
804 eep
->eqe_next
= NULL
;
807 * On panic, we add the element to the dump list for
808 * each nvlist errorq, stored oldest to newest. Then
809 * continue, so we don't free and subsequently overwrite
810 * any elements which we've put on the dump queue.
812 if (eqp
->eq_flags
& ERRORQ_NVLIST
) {
813 if (eqp
->eq_dump
== NULL
)
814 dep
= eqp
->eq_dump
= eep
;
816 dep
= dep
->eqe_dump
= eep
;
821 eep
->eqe_prev
= NULL
;
822 BT_ATOMIC_CLEAR(eqp
->eq_bitmap
, eep
- eqp
->eq_elems
);
826 * Now go ahead and drain any other errors on the pending list.
827 * This call transparently handles case (1A) above, as well as
828 * any other errors that were dispatched after errorq_drain()
829 * completed its first compare-and-swap.
833 logged
+= eqp
->eq_kstat
.eqk_logged
.value
.ui64
- loggedtmp
;
839 * Drain all error queues - called only from panic context. Some drain
840 * functions may enqueue errors to ERRORQ_NVLIST error queues so that
841 * they may be written out in the panic dump - so ERRORQ_NVLIST queues
842 * must be drained last. Drain ERRORQ_VITAL queues before nonvital queues
843 * so that vital errors get to fill the ERRORQ_NVLIST queues first, and
844 * do not drain the nonvital queues if there are many vital errors.
849 ASSERT(panicstr
!= NULL
);
851 if (errorq_panic_drain(ERRORQ_VITAL
) <= errorq_vitalmin
)
852 (void) errorq_panic_drain(0);
853 (void) errorq_panic_drain(ERRORQ_VITAL
| ERRORQ_NVLIST
);
854 (void) errorq_panic_drain(ERRORQ_NVLIST
);
858 * Reserve an error queue element for later processing and dispatching. The
859 * element is returned to the caller who may add error-specific data to
860 * element. The element is retured to the free pool when either
861 * errorq_commit() is called and the element asynchronously processed
862 * or immediately when errorq_cancel() is called.
865 errorq_reserve(errorq_t
*eqp
)
869 if (eqp
== NULL
|| !(eqp
->eq_flags
& ERRORQ_ACTIVE
)) {
870 atomic_inc_64(&errorq_lost
);
877 if ((i
= errorq_availbit(eqp
->eq_bitmap
, eqp
->eq_qlen
,
878 eqp
->eq_rotor
)) == -1) {
879 atomic_inc_64(&eqp
->eq_kstat
.eqk_dropped
.value
.ui64
);
882 BT_ATOMIC_SET_EXCL(eqp
->eq_bitmap
, i
, rval
);
885 eqep
= &eqp
->eq_elems
[i
];
890 if (eqp
->eq_flags
& ERRORQ_NVLIST
) {
891 errorq_nvelem_t
*eqnp
= eqep
->eqe_data
;
892 nv_alloc_reset(eqnp
->eqn_nva
);
893 eqnp
->eqn_nvl
= fm_nvlist_create(eqnp
->eqn_nva
);
896 atomic_inc_64(&eqp
->eq_kstat
.eqk_reserved
.value
.ui64
);
901 * Commit an errorq element (eqep) for dispatching.
902 * This function may be called from any context subject
903 * to the Platform Considerations described above.
906 errorq_commit(errorq_t
*eqp
, errorq_elem_t
*eqep
, uint_t flag
)
910 if (eqep
== NULL
|| !(eqp
->eq_flags
& ERRORQ_ACTIVE
)) {
911 atomic_inc_64(&eqp
->eq_kstat
.eqk_commit_fail
.value
.ui64
);
917 eqep
->eqe_prev
= old
;
920 if (atomic_cas_ptr(&eqp
->eq_pend
, old
, eqep
) == old
)
924 atomic_inc_64(&eqp
->eq_kstat
.eqk_committed
.value
.ui64
);
926 if (flag
== ERRORQ_ASYNC
&& eqp
->eq_id
!= NULL
)
927 ddi_trigger_softintr(eqp
->eq_id
);
931 * Cancel an errorq element reservation by returning the specified element
932 * to the free pool. Duplicate or invalid frees are not supported.
935 errorq_cancel(errorq_t
*eqp
, errorq_elem_t
*eqep
)
937 if (eqep
== NULL
|| !(eqp
->eq_flags
& ERRORQ_ACTIVE
))
940 BT_ATOMIC_CLEAR(eqp
->eq_bitmap
, eqep
- eqp
->eq_elems
);
942 atomic_inc_64(&eqp
->eq_kstat
.eqk_cancelled
.value
.ui64
);
946 * Write elements on the dump list of each nvlist errorq to the dump device.
947 * Upon reboot, fmd(1M) will extract and replay them for diagnosis.
955 if (ereport_dumpbuf
== NULL
)
956 return; /* reboot or panic before errorq is even set up */
958 for (eqp
= errorq_list
; eqp
!= NULL
; eqp
= eqp
->eq_next
) {
959 if (!(eqp
->eq_flags
& ERRORQ_NVLIST
) ||
960 !(eqp
->eq_flags
& ERRORQ_ACTIVE
))
961 continue; /* do not dump this queue on panic */
963 for (eep
= eqp
->eq_dump
; eep
!= NULL
; eep
= eep
->eqe_dump
) {
964 errorq_nvelem_t
*eqnp
= eep
->eqe_data
;
969 (void) nvlist_size(eqnp
->eqn_nvl
,
970 &len
, NV_ENCODE_NATIVE
);
972 if (len
> ereport_dumplen
|| len
== 0) {
973 cmn_err(CE_WARN
, "%s: unable to save error "
974 "report %p due to size %lu\n",
975 eqp
->eq_name
, (void *)eep
, len
);
979 if ((err
= nvlist_pack(eqnp
->eqn_nvl
,
980 (char **)&ereport_dumpbuf
, &ereport_dumplen
,
981 NV_ENCODE_NATIVE
, KM_NOSLEEP
)) != 0) {
982 cmn_err(CE_WARN
, "%s: unable to save error "
983 "report %p due to pack error %d\n",
984 eqp
->eq_name
, (void *)eep
, err
);
988 ed
.ed_magic
= ERPT_MAGIC
;
989 ed
.ed_chksum
= checksum32(ereport_dumpbuf
, len
);
990 ed
.ed_size
= (uint32_t)len
;
993 ed
.ed_hrt_base
= panic_hrtime
;
994 ed
.ed_tod_base
.sec
= panic_hrestime
.tv_sec
;
995 ed
.ed_tod_base
.nsec
= panic_hrestime
.tv_nsec
;
997 dumpvp_write(&ed
, sizeof (ed
));
998 dumpvp_write(ereport_dumpbuf
, len
);
1004 errorq_elem_nvl(errorq_t
*eqp
, const errorq_elem_t
*eqep
)
1006 errorq_nvelem_t
*eqnp
= eqep
->eqe_data
;
1008 ASSERT(eqp
->eq_flags
& ERRORQ_ACTIVE
&& eqp
->eq_flags
& ERRORQ_NVLIST
);
1010 return (eqnp
->eqn_nvl
);
1014 errorq_elem_nva(errorq_t
*eqp
, const errorq_elem_t
*eqep
)
1016 errorq_nvelem_t
*eqnp
= eqep
->eqe_data
;
1018 ASSERT(eqp
->eq_flags
& ERRORQ_ACTIVE
&& eqp
->eq_flags
& ERRORQ_NVLIST
);
1020 return (eqnp
->eqn_nva
);
1024 * Reserve a new element and duplicate the data of the original into it.
1027 errorq_elem_dup(errorq_t
*eqp
, const errorq_elem_t
*eqep
, errorq_elem_t
**neqep
)
1029 ASSERT(eqp
->eq_flags
& ERRORQ_ACTIVE
);
1030 ASSERT(!(eqp
->eq_flags
& ERRORQ_NVLIST
));
1032 if ((*neqep
= errorq_reserve(eqp
)) == NULL
)
1035 bcopy(eqep
->eqe_data
, (*neqep
)->eqe_data
, eqp
->eq_size
);
1036 return ((*neqep
)->eqe_data
);