2 * Copyright (c) 2008 Isilon Systems, Inc.
3 * Copyright (c) 2008 Ilya Maykov <ivmaykov@gmail.com>
4 * Copyright (c) 1998 Berkeley Software Design, Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Berkeley Software Design Inc's name may not be used to endorse or
16 * promote products derived from this software without specific prior
19 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
32 * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
36 * Implementation of the `witness' lock verifier. Originally implemented for
37 * mutexes in BSD/OS. Extended to handle generic lock objects and lock
43 * Pronunciation: 'wit-n&s
45 * Etymology: Middle English witnesse, from Old English witnes knowledge,
46 * testimony, witness, from 2wit
47 * Date: before 12th century
48 * 1 : attestation of a fact or event : TESTIMONY
49 * 2 : one that gives evidence; specifically : one who testifies in
50 * a cause or before a judicial tribunal
51 * 3 : one asked to be present at a transaction so as to be able to
52 * testify to its having taken place
53 * 4 : one who has personal knowledge of something
54 * 5 a : something serving as evidence or proof : SIGN
55 * b : public affirmation by word or example of usually
56 * religious faith or conviction <the heroic witness to divine
58 * 6 capitalized : a member of the Jehovah's Witnesses
62 * Special rules concerning Giant and lock orders:
64 * 1) Giant must be acquired before any other mutexes. Stated another way,
65 * no other mutex may be held when Giant is acquired.
67 * 2) Giant must be released when blocking on a sleepable lock.
69 * This rule is less obvious, but is a result of Giant providing the same
70 * semantics as spl(). Basically, when a thread sleeps, it must release
71 * Giant. When a thread blocks on a sleepable lock, it sleeps. Hence rule
74 * 3) Giant may be acquired before or after sleepable locks.
76 * This rule is also not quite as obvious. Giant may be acquired after
77 * a sleepable lock because it is a non-sleepable lock and non-sleepable
78 * locks may always be acquired while holding a sleepable lock. The second
79 * case, Giant before a sleepable lock, follows from rule 2) above. Suppose
80 * you have two threads T1 and T2 and a sleepable lock X. Suppose that T1
81 * acquires X and blocks on Giant. Then suppose that T2 acquires Giant and
82 * blocks on X. When T2 blocks on X, T2 will release Giant allowing T1 to
83 * execute. Thus, acquiring Giant both before and after a sleepable lock
84 * will not result in a lock order reversal.
87 #include <sys/cdefs.h>
88 __FBSDID("$FreeBSD$");
91 #include "opt_hwpmc_hooks.h"
92 #include "opt_stack.h"
93 #include "opt_witness.h"
95 #include <sys/param.h>
98 #include <sys/kernel.h>
100 #include <sys/lock.h>
101 #include <sys/malloc.h>
102 #include <sys/mutex.h>
103 #include <sys/priv.h>
104 #include <sys/proc.h>
105 #include <sys/sbuf.h>
106 #include <sys/stack.h>
107 #include <sys/sysctl.h>
108 #include <sys/systm.h>
114 #include <machine/stdarg.h>
116 #if !defined(DDB) && !defined(STACK)
117 #error "DDB or STACK options are required for WITNESS"
120 /* Note that these traces do not work with KTR_ALQ. */
122 #define KTR_WITNESS KTR_SUBSYS
124 #define KTR_WITNESS 0
127 #define LI_RECURSEMASK 0x0000ffff /* Recursion depth of lock instance. */
128 #define LI_EXCLUSIVE 0x00010000 /* Exclusive lock instance. */
130 /* Define this to check for blessed mutexes */
133 #define WITNESS_COUNT 1024
134 #define WITNESS_CHILDCOUNT (WITNESS_COUNT * 4)
135 #define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */
136 #define WITNESS_PENDLIST 512
138 /* Allocate 256 KB of stack data space */
139 #define WITNESS_LO_DATA_COUNT 2048
141 /* Prime, gives load factor of ~2 at full load */
142 #define WITNESS_LO_HASH_SIZE 1021
145 * XXX: This is somewhat bogus, as we assume here that at most 2048 threads
146 * will hold LOCK_NCHILDREN locks. We handle failure ok, and we should
147 * probably be safe for the most part, but it's still a SWAG.
149 #define LOCK_NCHILDREN 5
150 #define LOCK_CHILDCOUNT 2048
152 #define MAX_W_NAME 64
154 #define BADSTACK_SBUF_SIZE (256 * WITNESS_COUNT)
155 #define CYCLEGRAPH_SBUF_SIZE 8192
156 #define FULLGRAPH_SBUF_SIZE 32768
159 * These flags go in the witness relationship matrix and describe the
160 * relationship between any two struct witness objects.
162 #define WITNESS_UNRELATED 0x00 /* No lock order relation. */
163 #define WITNESS_PARENT 0x01 /* Parent, aka direct ancestor. */
164 #define WITNESS_ANCESTOR 0x02 /* Direct or indirect ancestor. */
165 #define WITNESS_CHILD 0x04 /* Child, aka direct descendant. */
166 #define WITNESS_DESCENDANT 0x08 /* Direct or indirect descendant. */
167 #define WITNESS_ANCESTOR_MASK (WITNESS_PARENT | WITNESS_ANCESTOR)
168 #define WITNESS_DESCENDANT_MASK (WITNESS_CHILD | WITNESS_DESCENDANT)
169 #define WITNESS_RELATED_MASK \
170 (WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK)
171 #define WITNESS_REVERSAL 0x10 /* A lock order reversal has been
173 #define WITNESS_RESERVED1 0x20 /* Unused flag, reserved. */
174 #define WITNESS_RESERVED2 0x40 /* Unused flag, reserved. */
175 #define WITNESS_LOCK_ORDER_KNOWN 0x80 /* This lock order is known. */
177 /* Descendant to ancestor flags */
178 #define WITNESS_DTOA(x) (((x) & WITNESS_RELATED_MASK) >> 2)
180 /* Ancestor to descendant flags */
181 #define WITNESS_ATOD(x) (((x) & WITNESS_RELATED_MASK) << 2)
183 #define WITNESS_INDEX_ASSERT(i) \
184 MPASS((i) > 0 && (i) <= w_max_used_index && (i) < WITNESS_COUNT)
186 MALLOC_DEFINE(M_WITNESS
, "Witness", "Witness");
189 * Lock instances. A lock instance is the data associated with a lock while
190 * it is held by witness. For example, a lock instance will hold the
191 * recursion count of a lock. Lock instances are held in lists. Spin locks
192 * are held in a per-cpu list while sleep locks are held in per-thread list.
194 struct lock_instance
{
195 struct lock_object
*li_lock
;
202 * A simple list type used to build the list of locks held by a thread
203 * or CPU. We can't simply embed the list in struct lock_object since a
204 * lock may be held by more than one thread if it is a shared lock. Locks
205 * are added to the head of the list, so we fill up each list entry from
206 * "the back" logically. To ease some of the arithmetic, we actually fill
207 * in each list entry the normal way (children[0] then children[1], etc.) but
208 * when we traverse the list we read children[count-1] as the first entry
209 * down to children[0] as the final entry.
211 struct lock_list_entry
{
212 struct lock_list_entry
*ll_next
;
213 struct lock_instance ll_children
[LOCK_NCHILDREN
];
218 * The main witness structure. One of these per named lock type in the system
219 * (for example, "vnode interlock").
222 char w_name
[MAX_W_NAME
];
223 uint32_t w_index
; /* Index in the relationship matrix */
224 struct lock_class
*w_class
;
225 STAILQ_ENTRY(witness
) w_list
; /* List of all witnesses. */
226 STAILQ_ENTRY(witness
) w_typelist
; /* Witnesses of a type. */
227 struct witness
*w_hash_next
; /* Linked list in hash buckets. */
228 const char *w_file
; /* File where last acquired */
229 uint32_t w_line
; /* Line where last acquired */
231 uint16_t w_num_ancestors
; /* direct/indirect
233 uint16_t w_num_descendants
; /* direct/indirect
234 * descendant count */
240 STAILQ_HEAD(witness_list
, witness
);
243 * The witness hash table. Keys are witness names (const char *), elements are
244 * witness objects (struct witness *).
246 struct witness_hash
{
247 struct witness
*wh_array
[WITNESS_HASH_SIZE
];
253 * Key type for the lock order data hash table.
255 struct witness_lock_order_key
{
260 struct witness_lock_order_data
{
261 struct stack wlod_stack
;
262 struct witness_lock_order_key wlod_key
;
263 struct witness_lock_order_data
*wlod_next
;
267 * The witness lock order data hash table. Keys are witness index tuples
268 * (struct witness_lock_order_key), elements are lock order data objects
269 * (struct witness_lock_order_data).
271 struct witness_lock_order_hash
{
272 struct witness_lock_order_data
*wloh_array
[WITNESS_LO_HASH_SIZE
];
278 struct witness_blessed
{
284 struct witness_pendhelp
{
286 struct lock_object
*wh_lock
;
289 struct witness_order_list_entry
{
291 struct lock_class
*w_class
;
295 * Returns 0 if one of the locks is a spin lock and the other is not.
296 * Returns 1 otherwise.
299 witness_lock_type_equal(struct witness
*w1
, struct witness
*w2
)
302 return ((w1
->w_class
->lc_flags
& (LC_SLEEPLOCK
| LC_SPINLOCK
)) ==
303 (w2
->w_class
->lc_flags
& (LC_SLEEPLOCK
| LC_SPINLOCK
)));
307 witness_lock_order_key_empty(const struct witness_lock_order_key
*key
)
310 return (key
->from
== 0 && key
->to
== 0);
314 witness_lock_order_key_equal(const struct witness_lock_order_key
*a
,
315 const struct witness_lock_order_key
*b
)
318 return (a
->from
== b
->from
&& a
->to
== b
->to
);
321 static int _isitmyx(struct witness
*w1
, struct witness
*w2
, int rmask
,
324 static void _witness_debugger(int cond
, const char *msg
);
326 static void adopt(struct witness
*parent
, struct witness
*child
);
328 static int blessed(struct witness
*, struct witness
*);
330 static void depart(struct witness
*w
);
331 static struct witness
*enroll(const char *description
,
332 struct lock_class
*lock_class
);
333 static struct lock_instance
*find_instance(struct lock_list_entry
*list
,
334 struct lock_object
*lock
);
335 static int isitmychild(struct witness
*parent
, struct witness
*child
);
336 static int isitmydescendant(struct witness
*parent
, struct witness
*child
);
337 static void itismychild(struct witness
*parent
, struct witness
*child
);
338 static int sysctl_debug_witness_badstacks(SYSCTL_HANDLER_ARGS
);
339 static int sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS
);
340 static int sysctl_debug_witness_fullgraph(SYSCTL_HANDLER_ARGS
);
341 static void witness_add_fullgraph(struct sbuf
*sb
, struct witness
*parent
);
343 static void witness_ddb_compute_levels(void);
344 static void witness_ddb_display(void(*)(const char *fmt
, ...));
345 static void witness_ddb_display_descendants(void(*)(const char *fmt
, ...),
346 struct witness
*, int indent
);
347 static void witness_ddb_display_list(void(*prnt
)(const char *fmt
, ...),
348 struct witness_list
*list
);
349 static void witness_ddb_level_descendants(struct witness
*parent
, int l
);
350 static void witness_ddb_list(struct thread
*td
);
352 static void witness_free(struct witness
*m
);
353 static struct witness
*witness_get(void);
354 static uint32_t witness_hash_djb2(const uint8_t *key
, uint32_t size
);
355 static struct witness
*witness_hash_get(const char *key
);
356 static void witness_hash_put(struct witness
*w
);
357 static void witness_init_hash_tables(void);
358 static void witness_increment_graph_generation(void);
359 static void witness_lock_list_free(struct lock_list_entry
*lle
);
360 static struct lock_list_entry
*witness_lock_list_get(void);
361 static int witness_lock_order_add(struct witness
*parent
,
362 struct witness
*child
);
363 static int witness_lock_order_check(struct witness
*parent
,
364 struct witness
*child
);
365 static struct witness_lock_order_data
*witness_lock_order_get(
366 struct witness
*parent
,
367 struct witness
*child
);
368 static void witness_list_lock(struct lock_instance
*instance
);
371 #define witness_debugger(c) _witness_debugger(c, __func__)
373 #define witness_debugger(c)
376 SYSCTL_NODE(_debug
, OID_AUTO
, witness
, CTLFLAG_RW
, 0, "Witness Locking");
379 * If set to 0, witness is disabled. Otherwise witness performs full lock order
380 * checking for all locks. At runtime, witness is allowed to be turned off.
381 * witness is not allowed be turned on once it is turned off, however.
383 static int witness_watch
= 1;
384 TUNABLE_INT("debug.witness.watch", &witness_watch
);
385 SYSCTL_PROC(_debug_witness
, OID_AUTO
, watch
, CTLFLAG_RW
| CTLTYPE_INT
, NULL
, 0,
386 sysctl_debug_witness_watch
, "I", "witness is watching lock operations");
390 * When KDB is enabled and witness_kdb is 1, it will cause the system
391 * to drop into kdebug() when:
392 * - a lock hierarchy violation occurs
393 * - locks are held when going to sleep.
400 TUNABLE_INT("debug.witness.kdb", &witness_kdb
);
401 SYSCTL_INT(_debug_witness
, OID_AUTO
, kdb
, CTLFLAG_RW
, &witness_kdb
, 0, "");
404 * When KDB is enabled and witness_trace is 1, it will cause the system
405 * to print a stack trace:
406 * - a lock hierarchy violation occurs
407 * - locks are held when going to sleep.
409 int witness_trace
= 1;
410 TUNABLE_INT("debug.witness.trace", &witness_trace
);
411 SYSCTL_INT(_debug_witness
, OID_AUTO
, trace
, CTLFLAG_RW
, &witness_trace
, 0, "");
414 #ifdef WITNESS_SKIPSPIN
415 int witness_skipspin
= 1;
417 int witness_skipspin
= 0;
419 TUNABLE_INT("debug.witness.skipspin", &witness_skipspin
);
420 SYSCTL_INT(_debug_witness
, OID_AUTO
, skipspin
, CTLFLAG_RDTUN
, &witness_skipspin
,
424 * Call this to print out the relations between locks.
426 SYSCTL_PROC(_debug_witness
, OID_AUTO
, fullgraph
, CTLTYPE_STRING
| CTLFLAG_RD
,
427 NULL
, 0, sysctl_debug_witness_fullgraph
, "A", "Show locks relation graphs");
430 * Call this to print out the witness faulty stacks.
432 SYSCTL_PROC(_debug_witness
, OID_AUTO
, badstacks
, CTLTYPE_STRING
| CTLFLAG_RD
,
433 NULL
, 0, sysctl_debug_witness_badstacks
, "A", "Show bad witness stacks");
435 static struct mtx w_mtx
;
438 static struct witness_list w_free
= STAILQ_HEAD_INITIALIZER(w_free
);
439 static struct witness_list w_all
= STAILQ_HEAD_INITIALIZER(w_all
);
442 static struct witness_list w_spin
= STAILQ_HEAD_INITIALIZER(w_spin
);
443 static struct witness_list w_sleep
= STAILQ_HEAD_INITIALIZER(w_sleep
);
446 static struct lock_list_entry
*w_lock_list_free
= NULL
;
447 static struct witness_pendhelp pending_locks
[WITNESS_PENDLIST
];
448 static u_int pending_cnt
;
450 static int w_free_cnt
, w_spin_cnt
, w_sleep_cnt
;
451 SYSCTL_INT(_debug_witness
, OID_AUTO
, free_cnt
, CTLFLAG_RD
, &w_free_cnt
, 0, "");
452 SYSCTL_INT(_debug_witness
, OID_AUTO
, spin_cnt
, CTLFLAG_RD
, &w_spin_cnt
, 0, "");
453 SYSCTL_INT(_debug_witness
, OID_AUTO
, sleep_cnt
, CTLFLAG_RD
, &w_sleep_cnt
, 0,
456 static struct witness
*w_data
;
457 static uint8_t w_rmatrix
[WITNESS_COUNT
+1][WITNESS_COUNT
+1];
458 static struct lock_list_entry w_locklistdata
[LOCK_CHILDCOUNT
];
459 static struct witness_hash w_hash
; /* The witness hash table. */
461 /* The lock order data hash */
462 static struct witness_lock_order_data w_lodata
[WITNESS_LO_DATA_COUNT
];
463 static struct witness_lock_order_data
*w_lofree
= NULL
;
464 static struct witness_lock_order_hash w_lohash
;
465 static int w_max_used_index
= 0;
466 static unsigned int w_generation
= 0;
467 static const char *w_notrunning
= "Witness not running\n";
468 static const char *w_stillcold
= "Witness is still cold\n";
471 static struct witness_order_list_entry order_lists
[] = {
475 { "proctree", &lock_class_sx
},
476 { "allproc", &lock_class_sx
},
477 { "allprison", &lock_class_sx
},
482 { "Giant", &lock_class_mtx_sleep
},
483 { "pipe mutex", &lock_class_mtx_sleep
},
484 { "sigio lock", &lock_class_mtx_sleep
},
485 { "process group", &lock_class_mtx_sleep
},
486 { "process lock", &lock_class_mtx_sleep
},
487 { "session", &lock_class_mtx_sleep
},
488 { "uidinfo hash", &lock_class_rw
},
490 { "pmc-sleep", &lock_class_mtx_sleep
},
496 { "accept", &lock_class_mtx_sleep
},
497 { "so_snd", &lock_class_mtx_sleep
},
498 { "so_rcv", &lock_class_mtx_sleep
},
499 { "sellck", &lock_class_mtx_sleep
},
504 { "so_rcv", &lock_class_mtx_sleep
},
505 { "radix node head", &lock_class_mtx_sleep
},
506 { "rtentry", &lock_class_mtx_sleep
},
507 { "ifaddr", &lock_class_mtx_sleep
},
510 * Multicast - protocol locks before interface locks, after UDP locks.
512 { "udpinp", &lock_class_rw
},
513 { "in_multi_mtx", &lock_class_mtx_sleep
},
514 { "igmp_mtx", &lock_class_mtx_sleep
},
515 { "if_addr_mtx", &lock_class_mtx_sleep
},
518 * UNIX Domain Sockets
520 { "unp", &lock_class_mtx_sleep
},
521 { "so_snd", &lock_class_mtx_sleep
},
526 { "udp", &lock_class_rw
},
527 { "udpinp", &lock_class_rw
},
528 { "so_snd", &lock_class_mtx_sleep
},
533 { "tcp", &lock_class_rw
},
534 { "tcpinp", &lock_class_rw
},
535 { "so_snd", &lock_class_mtx_sleep
},
540 { "slip_mtx", &lock_class_mtx_sleep
},
541 { "slip sc_mtx", &lock_class_mtx_sleep
},
546 { "ddp_list_mtx", &lock_class_mtx_sleep
},
547 { "ddp_mtx", &lock_class_mtx_sleep
},
552 { "bpf global lock", &lock_class_mtx_sleep
},
553 { "bpf interface lock", &lock_class_mtx_sleep
},
554 { "bpf cdev lock", &lock_class_mtx_sleep
},
559 { "nfsd_mtx", &lock_class_mtx_sleep
},
560 { "so_snd", &lock_class_mtx_sleep
},
566 { "802.11 com lock", &lock_class_mtx_sleep
},
571 { "network driver", &lock_class_mtx_sleep
},
577 { "ng_node", &lock_class_mtx_sleep
},
578 { "ng_worklist", &lock_class_mtx_sleep
},
583 { "system map", &lock_class_mtx_sleep
},
584 { "vm page queue mutex", &lock_class_mtx_sleep
},
585 { "vnode interlock", &lock_class_mtx_sleep
},
586 { "cdev", &lock_class_mtx_sleep
},
589 * kqueue/VFS interaction
591 { "kqueue", &lock_class_mtx_sleep
},
592 { "struct mount mtx", &lock_class_mtx_sleep
},
593 { "vnode interlock", &lock_class_mtx_sleep
},
599 { "ap boot", &lock_class_mtx_spin
},
601 { "rm.mutex_mtx", &lock_class_mtx_spin
},
602 { "sio", &lock_class_mtx_spin
},
603 { "scrlock", &lock_class_mtx_spin
},
605 { "cy", &lock_class_mtx_spin
},
608 { "pcib_mtx", &lock_class_mtx_spin
},
609 { "rtc_mtx", &lock_class_mtx_spin
},
611 { "scc_hwmtx", &lock_class_mtx_spin
},
612 { "uart_hwmtx", &lock_class_mtx_spin
},
613 { "fast_taskqueue", &lock_class_mtx_spin
},
614 { "intr table", &lock_class_mtx_spin
},
616 { "pmc-per-proc", &lock_class_mtx_spin
},
618 { "process slock", &lock_class_mtx_spin
},
619 { "sleepq chain", &lock_class_mtx_spin
},
620 { "umtx lock", &lock_class_mtx_spin
},
621 { "rm_spinlock", &lock_class_mtx_spin
},
622 { "turnstile chain", &lock_class_mtx_spin
},
623 { "turnstile lock", &lock_class_mtx_spin
},
624 { "sched lock", &lock_class_mtx_spin
},
625 { "td_contested", &lock_class_mtx_spin
},
626 { "callout", &lock_class_mtx_spin
},
627 { "entropy harvest mutex", &lock_class_mtx_spin
},
628 { "syscons video lock", &lock_class_mtx_spin
},
629 { "time lock", &lock_class_mtx_spin
},
631 { "smp rendezvous", &lock_class_mtx_spin
},
634 { "tlb0", &lock_class_mtx_spin
},
639 { "intrcnt", &lock_class_mtx_spin
},
640 { "icu", &lock_class_mtx_spin
},
641 #if defined(SMP) && defined(__sparc64__)
642 { "ipi", &lock_class_mtx_spin
},
645 { "allpmaps", &lock_class_mtx_spin
},
646 { "descriptor tables", &lock_class_mtx_spin
},
648 { "clk", &lock_class_mtx_spin
},
649 { "cpuset", &lock_class_mtx_spin
},
650 { "mprof lock", &lock_class_mtx_spin
},
651 { "zombie lock", &lock_class_mtx_spin
},
652 { "ALD Queue", &lock_class_mtx_spin
},
654 { "MCA spin lock", &lock_class_mtx_spin
},
656 #if defined(__i386__) || defined(__amd64__)
657 { "pcicfg", &lock_class_mtx_spin
},
658 { "NDIS thread lock", &lock_class_mtx_spin
},
660 { "tw_osl_io_lock", &lock_class_mtx_spin
},
661 { "tw_osl_q_lock", &lock_class_mtx_spin
},
662 { "tw_cl_io_lock", &lock_class_mtx_spin
},
663 { "tw_cl_intr_lock", &lock_class_mtx_spin
},
664 { "tw_cl_gen_lock", &lock_class_mtx_spin
},
666 { "pmc-leaf", &lock_class_mtx_spin
},
668 { "blocked lock", &lock_class_mtx_spin
},
675 * Pairs of locks which have been blessed
676 * Don't complain about order problems with blessed locks
678 static struct witness_blessed blessed_list
[] = {
680 static int blessed_count
=
681 sizeof(blessed_list
) / sizeof(struct witness_blessed
);
685 * This global is set to 0 once it becomes safe to use the witness code.
687 static int witness_cold
= 1;
690 * This global is set to 1 once the static lock orders have been enrolled
691 * so that a warning can be issued for any spin locks enrolled later.
693 static int witness_spin_warn
= 0;
696 * The WITNESS-enabled diagnostic code. Note that the witness code does
697 * assume that the early boot is single-threaded at least until after this
698 * routine is completed.
701 witness_initialize(void *dummy __unused
)
703 struct lock_object
*lock
;
704 struct witness_order_list_entry
*order
;
705 struct witness
*w
, *w1
;
708 MALLOC(w_data
, struct witness
*,
709 sizeof (struct witness
) * WITNESS_COUNT
, M_WITNESS
,
713 * We have to release Giant before initializing its witness
714 * structure so that WITNESS doesn't get confused.
717 mtx_assert(&Giant
, MA_NOTOWNED
);
719 CTR1(KTR_WITNESS
, "%s: initializing witness", __func__
);
720 mtx_init(&w_mtx
, "witness lock", NULL
, MTX_SPIN
| MTX_QUIET
|
721 MTX_NOWITNESS
| MTX_NOPROFILE
);
722 for (i
= WITNESS_COUNT
- 1; i
>= 0; i
--) {
724 memset(w
, 0, sizeof(*w
));
725 w_data
[i
].w_index
= i
; /* Witness index never changes. */
728 KASSERT(STAILQ_FIRST(&w_free
)->w_index
== 0,
729 ("%s: Invalid list of free witness objects", __func__
));
731 /* Witness with index 0 is not used to aid in debugging. */
732 STAILQ_REMOVE_HEAD(&w_free
, w_list
);
736 (sizeof(**w_rmatrix
) * (WITNESS_COUNT
+1) * (WITNESS_COUNT
+1)));
738 for (i
= 0; i
< LOCK_CHILDCOUNT
; i
++)
739 witness_lock_list_free(&w_locklistdata
[i
]);
740 witness_init_hash_tables();
742 /* First add in all the specified order lists. */
743 for (order
= order_lists
; order
->w_name
!= NULL
; order
++) {
744 w
= enroll(order
->w_name
, order
->w_class
);
747 w
->w_file
= "order list";
748 for (order
++; order
->w_name
!= NULL
; order
++) {
749 w1
= enroll(order
->w_name
, order
->w_class
);
752 w1
->w_file
= "order list";
757 witness_spin_warn
= 1;
759 /* Iterate through all locks and add them to witness. */
760 for (i
= 0; pending_locks
[i
].wh_lock
!= NULL
; i
++) {
761 lock
= pending_locks
[i
].wh_lock
;
762 KASSERT(lock
->lo_flags
& LO_WITNESS
,
763 ("%s: lock %s is on pending list but not LO_WITNESS",
764 __func__
, lock
->lo_name
));
765 lock
->lo_witness
= enroll(pending_locks
[i
].wh_type
,
769 /* Mark the witness code as being ready for use. */
774 SYSINIT(witness_init
, SI_SUB_WITNESS
, SI_ORDER_FIRST
, witness_initialize
,
778 witness_init(struct lock_object
*lock
, const char *type
)
780 struct lock_class
*class;
782 /* Various sanity checks. */
783 class = LOCK_CLASS(lock
);
784 if ((lock
->lo_flags
& LO_RECURSABLE
) != 0 &&
785 (class->lc_flags
& LC_RECURSABLE
) == 0)
786 panic("%s: lock (%s) %s can not be recursable", __func__
,
787 class->lc_name
, lock
->lo_name
);
788 if ((lock
->lo_flags
& LO_SLEEPABLE
) != 0 &&
789 (class->lc_flags
& LC_SLEEPABLE
) == 0)
790 panic("%s: lock (%s) %s can not be sleepable", __func__
,
791 class->lc_name
, lock
->lo_name
);
792 if ((lock
->lo_flags
& LO_UPGRADABLE
) != 0 &&
793 (class->lc_flags
& LC_UPGRADABLE
) == 0)
794 panic("%s: lock (%s) %s can not be upgradable", __func__
,
795 class->lc_name
, lock
->lo_name
);
798 * If we shouldn't watch this lock, then just clear lo_witness.
799 * Otherwise, if witness_cold is set, then it is too early to
800 * enroll this lock, so defer it to witness_initialize() by adding
801 * it to the pending_locks list. If it is not too early, then enroll
804 if (witness_watch
< 1 || panicstr
!= NULL
||
805 (lock
->lo_flags
& LO_WITNESS
) == 0)
806 lock
->lo_witness
= NULL
;
807 else if (witness_cold
) {
808 pending_locks
[pending_cnt
].wh_lock
= lock
;
809 pending_locks
[pending_cnt
++].wh_type
= type
;
810 if (pending_cnt
> WITNESS_PENDLIST
)
811 panic("%s: pending locks list is too small, bump it\n",
814 lock
->lo_witness
= enroll(type
, class);
818 witness_destroy(struct lock_object
*lock
)
820 struct lock_class
*class;
823 class = LOCK_CLASS(lock
);
826 panic("lock (%s) %s destroyed while witness_cold",
827 class->lc_name
, lock
->lo_name
);
829 /* XXX: need to verify that no one holds the lock */
830 if ((lock
->lo_flags
& LO_WITNESS
) == 0 || lock
->lo_witness
== NULL
)
832 w
= lock
->lo_witness
;
834 mtx_lock_spin(&w_mtx
);
835 MPASS(w
->w_refcount
> 0);
838 if (w
->w_refcount
== 0)
840 mtx_unlock_spin(&w_mtx
);
845 witness_ddb_compute_levels(void)
850 * First clear all levels.
852 STAILQ_FOREACH(w
, &w_all
, w_list
)
856 * Look for locks with no parents and level all their descendants.
858 STAILQ_FOREACH(w
, &w_all
, w_list
) {
860 /* If the witness has ancestors (is not a root), skip it. */
861 if (w
->w_num_ancestors
> 0)
863 witness_ddb_level_descendants(w
, 0);
868 witness_ddb_level_descendants(struct witness
*w
, int l
)
872 if (w
->w_ddb_level
>= l
)
878 for (i
= 1; i
<= w_max_used_index
; i
++) {
879 if (w_rmatrix
[w
->w_index
][i
] & WITNESS_PARENT
)
880 witness_ddb_level_descendants(&w_data
[i
], l
);
885 witness_ddb_display_descendants(void(*prnt
)(const char *fmt
, ...),
886 struct witness
*w
, int indent
)
890 for (i
= 0; i
< indent
; i
++)
892 prnt("%s (type: %s, depth: %d, active refs: %d)",
893 w
->w_name
, w
->w_class
->lc_name
,
894 w
->w_ddb_level
, w
->w_refcount
);
895 if (w
->w_displayed
) {
896 prnt(" -- (already displayed)\n");
900 if (w
->w_file
!= NULL
&& w
->w_line
!= 0)
901 prnt(" -- last acquired @ %s:%d\n", w
->w_file
,
904 prnt(" -- never acquired\n");
906 WITNESS_INDEX_ASSERT(w
->w_index
);
907 for (i
= 1; i
<= w_max_used_index
; i
++) {
908 if (w_rmatrix
[w
->w_index
][i
] & WITNESS_PARENT
)
909 witness_ddb_display_descendants(prnt
, &w_data
[i
],
915 witness_ddb_display_list(void(*prnt
)(const char *fmt
, ...),
916 struct witness_list
*list
)
920 STAILQ_FOREACH(w
, list
, w_typelist
) {
921 if (w
->w_file
== NULL
|| w
->w_ddb_level
> 0)
924 /* This lock has no anscestors - display its descendants. */
925 witness_ddb_display_descendants(prnt
, w
, 0);
930 witness_ddb_display(void(*prnt
)(const char *fmt
, ...))
934 KASSERT(witness_cold
== 0, ("%s: witness_cold", __func__
));
935 witness_ddb_compute_levels();
937 /* Clear all the displayed flags. */
938 STAILQ_FOREACH(w
, &w_all
, w_list
)
942 * First, handle sleep locks which have been acquired at least
945 prnt("Sleep locks:\n");
946 witness_ddb_display_list(prnt
, &w_sleep
);
949 * Now do spin locks which have been acquired at least once.
951 prnt("\nSpin locks:\n");
952 witness_ddb_display_list(prnt
, &w_spin
);
955 * Finally, any locks which have not been acquired yet.
957 prnt("\nLocks which were never acquired:\n");
958 STAILQ_FOREACH(w
, &w_all
, w_list
) {
959 if (w
->w_file
!= NULL
|| w
->w_refcount
== 0)
961 prnt("%s (type: %s, depth: %d)\n", w
->w_name
,
962 w
->w_class
->lc_name
, w
->w_ddb_level
);
967 /* Trim useless garbage from filenames. */
969 fixup_filename(const char *file
)
974 while (strncmp(file
, "../", 3) == 0)
980 witness_defineorder(struct lock_object
*lock1
, struct lock_object
*lock2
)
983 if (witness_watch
== -1 || panicstr
!= NULL
)
986 /* Require locks that witness knows about. */
987 if (lock1
== NULL
|| lock1
->lo_witness
== NULL
|| lock2
== NULL
||
988 lock2
->lo_witness
== NULL
)
991 mtx_assert(&w_mtx
, MA_NOTOWNED
);
992 mtx_lock_spin(&w_mtx
);
995 * If we already have either an explicit or implied lock order that
996 * is the other way around, then return an error.
999 isitmydescendant(lock2
->lo_witness
, lock1
->lo_witness
)) {
1000 mtx_unlock_spin(&w_mtx
);
1004 /* Try to add the new order. */
1005 CTR3(KTR_WITNESS
, "%s: adding %s as a child of %s", __func__
,
1006 lock2
->lo_witness
->w_name
, lock1
->lo_witness
->w_name
);
1007 itismychild(lock1
->lo_witness
, lock2
->lo_witness
);
1008 mtx_unlock_spin(&w_mtx
);
1013 witness_checkorder(struct lock_object
*lock
, int flags
, const char *file
,
1016 struct lock_list_entry
**lock_list
, *lle
;
1017 struct lock_instance
*lock1
, *lock2
;
1018 struct lock_class
*class;
1019 struct witness
*w
, *w1
;
1023 if (witness_cold
|| witness_watch
< 1 || lock
->lo_witness
== NULL
||
1027 w
= lock
->lo_witness
;
1028 class = LOCK_CLASS(lock
);
1030 file
= fixup_filename(file
);
1032 if (class->lc_flags
& LC_SLEEPLOCK
) {
1035 * Since spin locks include a critical section, this check
1036 * implicitly enforces a lock order of all sleep locks before
1039 if (td
->td_critnest
!= 0 && !kdb_active
)
1040 panic("blockable sleep lock (%s) %s @ %s:%d",
1041 class->lc_name
, lock
->lo_name
, file
, line
);
1044 * If this is the first lock acquired then just return as
1045 * no order checking is needed.
1047 if (td
->td_sleeplocks
== NULL
)
1049 lock_list
= &td
->td_sleeplocks
;
1053 * If this is the first lock, just return as no order
1054 * checking is needed. We check this in both if clauses
1055 * here as unifying the check would require us to use a
1056 * critical section to ensure we don't migrate while doing
1057 * the check. Note that if this is not the first lock, we
1058 * are already in a critical section and are safe for the
1059 * rest of the check.
1061 if (PCPU_GET(spinlocks
) == NULL
)
1063 lock_list
= PCPU_PTR(spinlocks
);
1067 if ((*lock_list
)->ll_count
== 0)
1071 * Check to see if we are recursing on a lock we already own. If
1072 * so, make sure that we don't mismatch exclusive and shared lock
1075 lock1
= find_instance(*lock_list
, lock
);
1076 if (lock1
!= NULL
) {
1077 if ((lock1
->li_flags
& LI_EXCLUSIVE
) != 0 &&
1078 (flags
& LOP_EXCLUSIVE
) == 0) {
1079 printf("shared lock of (%s) %s @ %s:%d\n",
1080 class->lc_name
, lock
->lo_name
, file
, line
);
1081 printf("while exclusively locked from %s:%d\n",
1082 lock1
->li_file
, lock1
->li_line
);
1083 panic("share->excl");
1085 if ((lock1
->li_flags
& LI_EXCLUSIVE
) == 0 &&
1086 (flags
& LOP_EXCLUSIVE
) != 0) {
1087 printf("exclusive lock of (%s) %s @ %s:%d\n",
1088 class->lc_name
, lock
->lo_name
, file
, line
);
1089 printf("while share locked from %s:%d\n",
1090 lock1
->li_file
, lock1
->li_line
);
1091 panic("excl->share");
1097 * Try to perform most checks without a lock. If this succeeds we
1098 * can skip acquiring the lock and return success.
1100 lock1
= &(*lock_list
)->ll_children
[(*lock_list
)->ll_count
- 1];
1101 w1
= lock1
->li_lock
->lo_witness
;
1102 if (witness_lock_order_check(w1
, w
))
1106 * Check for duplicate locks of the same type. Note that we only
1107 * have to check for this on the last lock we just acquired. Any
1108 * other cases will be caught as lock order violations.
1110 mtx_lock_spin(&w_mtx
);
1111 witness_lock_order_add(w1
, w
);
1114 if (!(lock
->lo_flags
& LO_DUPOK
) && !(flags
& LOP_DUPOK
) &&
1115 !(w_rmatrix
[i
][i
] & WITNESS_REVERSAL
)) {
1116 w_rmatrix
[i
][i
] |= WITNESS_REVERSAL
;
1118 mtx_unlock_spin(&w_mtx
);
1119 printf("acquiring duplicate lock of same type: \"%s\"\n",
1121 printf(" 1st %s @ %s:%d\n", lock1
->li_lock
->lo_name
,
1122 lock1
->li_file
, lock1
->li_line
);
1123 printf(" 2nd %s @ %s:%d\n", lock
->lo_name
, file
, line
);
1124 witness_debugger(1);
1126 mtx_unlock_spin(&w_mtx
);
1129 mtx_assert(&w_mtx
, MA_OWNED
);
1132 * If we know that the the lock we are acquiring comes after
1133 * the lock we most recently acquired in the lock order tree,
1134 * then there is no need for any further checks.
1136 if (isitmychild(w1
, w
))
1139 for (j
= 0, lle
= *lock_list
; lle
!= NULL
; lle
= lle
->ll_next
) {
1140 for (i
= lle
->ll_count
- 1; i
>= 0; i
--, j
++) {
1142 MPASS(j
< WITNESS_COUNT
);
1143 lock1
= &lle
->ll_children
[i
];
1144 w1
= lock1
->li_lock
->lo_witness
;
1147 * If this lock doesn't undergo witness checking,
1151 KASSERT((lock1
->li_lock
->lo_flags
& LO_WITNESS
) == 0,
1152 ("lock missing witness structure"));
1157 * If we are locking Giant and this is a sleepable
1158 * lock, then skip it.
1160 if ((lock1
->li_lock
->lo_flags
& LO_SLEEPABLE
) != 0 &&
1161 lock
== &Giant
.lock_object
)
1165 * If we are locking a sleepable lock and this lock
1166 * is Giant, then skip it.
1168 if ((lock
->lo_flags
& LO_SLEEPABLE
) != 0 &&
1169 lock1
->li_lock
== &Giant
.lock_object
)
1173 * If we are locking a sleepable lock and this lock
1174 * isn't sleepable, we want to treat it as a lock
1175 * order violation to enfore a general lock order of
1176 * sleepable locks before non-sleepable locks.
1178 if (((lock
->lo_flags
& LO_SLEEPABLE
) != 0 &&
1179 (lock1
->li_lock
->lo_flags
& LO_SLEEPABLE
) == 0))
1183 * If we are locking Giant and this is a non-sleepable
1184 * lock, then treat it as a reversal.
1186 if ((lock1
->li_lock
->lo_flags
& LO_SLEEPABLE
) == 0 &&
1187 lock
== &Giant
.lock_object
)
1191 * Check the lock order hierarchy for a reveresal.
1193 if (!isitmydescendant(w
, w1
))
1198 * We have a lock order violation, check to see if it
1199 * is allowed or has already been yelled about.
1204 * If the lock order is blessed, just bail. We don't
1205 * look for other lock order violations though, which
1212 /* Bail if this violation is known */
1213 if (w_rmatrix
[w1
->w_index
][w
->w_index
] & WITNESS_REVERSAL
)
1216 /* Record this as a violation */
1217 w_rmatrix
[w1
->w_index
][w
->w_index
] |= WITNESS_REVERSAL
;
1218 w_rmatrix
[w
->w_index
][w1
->w_index
] |= WITNESS_REVERSAL
;
1219 w
->w_reversed
= w1
->w_reversed
= 1;
1220 witness_increment_graph_generation();
1221 mtx_unlock_spin(&w_mtx
);
1224 * Ok, yell about it.
1226 if (((lock
->lo_flags
& LO_SLEEPABLE
) != 0 &&
1227 (lock1
->li_lock
->lo_flags
& LO_SLEEPABLE
) == 0))
1229 "lock order reversal: (sleepable after non-sleepable)\n");
1230 else if ((lock1
->li_lock
->lo_flags
& LO_SLEEPABLE
) == 0
1231 && lock
== &Giant
.lock_object
)
1233 "lock order reversal: (Giant after non-sleepable)\n");
1235 printf("lock order reversal:\n");
1238 * Try to locate an earlier lock with
1239 * witness w in our list.
1242 lock2
= &lle
->ll_children
[i
];
1243 MPASS(lock2
->li_lock
!= NULL
);
1244 if (lock2
->li_lock
->lo_witness
== w
)
1246 if (i
== 0 && lle
->ll_next
!= NULL
) {
1248 i
= lle
->ll_count
- 1;
1249 MPASS(i
>= 0 && i
< LOCK_NCHILDREN
);
1254 printf(" 1st %p %s (%s) @ %s:%d\n",
1255 lock1
->li_lock
, lock1
->li_lock
->lo_name
,
1256 w1
->w_name
, lock1
->li_file
, lock1
->li_line
);
1257 printf(" 2nd %p %s (%s) @ %s:%d\n", lock
,
1258 lock
->lo_name
, w
->w_name
, file
, line
);
1260 printf(" 1st %p %s (%s) @ %s:%d\n",
1261 lock2
->li_lock
, lock2
->li_lock
->lo_name
,
1262 lock2
->li_lock
->lo_witness
->w_name
,
1263 lock2
->li_file
, lock2
->li_line
);
1264 printf(" 2nd %p %s (%s) @ %s:%d\n",
1265 lock1
->li_lock
, lock1
->li_lock
->lo_name
,
1266 w1
->w_name
, lock1
->li_file
, lock1
->li_line
);
1267 printf(" 3rd %p %s (%s) @ %s:%d\n", lock
,
1268 lock
->lo_name
, w
->w_name
, file
, line
);
1270 witness_debugger(1);
1274 lock1
= &(*lock_list
)->ll_children
[(*lock_list
)->ll_count
- 1];
1277 * If requested, build a new lock order. However, don't build a new
1278 * relationship between a sleepable lock and Giant if it is in the
1279 * wrong direction. The correct lock order is that sleepable locks
1280 * always come before Giant.
1282 if (flags
& LOP_NEWORDER
&&
1283 !(lock1
->li_lock
== &Giant
.lock_object
&&
1284 (lock
->lo_flags
& LO_SLEEPABLE
) != 0)) {
1285 CTR3(KTR_WITNESS
, "%s: adding %s as a child of %s", __func__
,
1286 w
->w_name
, lock1
->li_lock
->lo_witness
->w_name
);
1287 itismychild(lock1
->li_lock
->lo_witness
, w
);
1290 mtx_unlock_spin(&w_mtx
);
1294 witness_lock(struct lock_object
*lock
, int flags
, const char *file
, int line
)
1296 struct lock_list_entry
**lock_list
, *lle
;
1297 struct lock_instance
*instance
;
1301 if (witness_cold
|| witness_watch
== -1 || lock
->lo_witness
== NULL
||
1304 w
= lock
->lo_witness
;
1306 file
= fixup_filename(file
);
1308 /* Determine lock list for this lock. */
1309 if (LOCK_CLASS(lock
)->lc_flags
& LC_SLEEPLOCK
)
1310 lock_list
= &td
->td_sleeplocks
;
1312 lock_list
= PCPU_PTR(spinlocks
);
1314 /* Check to see if we are recursing on a lock we already own. */
1315 instance
= find_instance(*lock_list
, lock
);
1316 if (instance
!= NULL
) {
1317 instance
->li_flags
++;
1318 CTR4(KTR_WITNESS
, "%s: pid %d recursed on %s r=%d", __func__
,
1319 td
->td_proc
->p_pid
, lock
->lo_name
,
1320 instance
->li_flags
& LI_RECURSEMASK
);
1321 instance
->li_file
= file
;
1322 instance
->li_line
= line
;
1326 /* Update per-witness last file and line acquire. */
1330 /* Find the next open lock instance in the list and fill it. */
1332 if (lle
== NULL
|| lle
->ll_count
== LOCK_NCHILDREN
) {
1333 lle
= witness_lock_list_get();
1336 lle
->ll_next
= *lock_list
;
1337 CTR3(KTR_WITNESS
, "%s: pid %d added lle %p", __func__
,
1338 td
->td_proc
->p_pid
, lle
);
1341 instance
= &lle
->ll_children
[lle
->ll_count
++];
1342 instance
->li_lock
= lock
;
1343 instance
->li_line
= line
;
1344 instance
->li_file
= file
;
1345 if ((flags
& LOP_EXCLUSIVE
) != 0)
1346 instance
->li_flags
= LI_EXCLUSIVE
;
1348 instance
->li_flags
= 0;
1349 CTR4(KTR_WITNESS
, "%s: pid %d added %s as lle[%d]", __func__
,
1350 td
->td_proc
->p_pid
, lock
->lo_name
, lle
->ll_count
- 1);
1354 witness_upgrade(struct lock_object
*lock
, int flags
, const char *file
, int line
)
1356 struct lock_instance
*instance
;
1357 struct lock_class
*class;
1359 KASSERT(witness_cold
== 0, ("%s: witness_cold", __func__
));
1360 if (lock
->lo_witness
== NULL
|| witness_watch
== -1 || panicstr
!= NULL
)
1362 class = LOCK_CLASS(lock
);
1363 file
= fixup_filename(file
);
1364 if (witness_watch
) {
1365 if ((lock
->lo_flags
& LO_UPGRADABLE
) == 0)
1366 panic("upgrade of non-upgradable lock (%s) %s @ %s:%d",
1367 class->lc_name
, lock
->lo_name
, file
, line
);
1368 if ((class->lc_flags
& LC_SLEEPLOCK
) == 0)
1369 panic("upgrade of non-sleep lock (%s) %s @ %s:%d",
1370 class->lc_name
, lock
->lo_name
, file
, line
);
1372 instance
= find_instance(curthread
->td_sleeplocks
, lock
);
1373 if (instance
== NULL
)
1374 panic("upgrade of unlocked lock (%s) %s @ %s:%d",
1375 class->lc_name
, lock
->lo_name
, file
, line
);
1376 if (witness_watch
) {
1377 if ((instance
->li_flags
& LI_EXCLUSIVE
) != 0)
1378 panic("upgrade of exclusive lock (%s) %s @ %s:%d",
1379 class->lc_name
, lock
->lo_name
, file
, line
);
1380 if ((instance
->li_flags
& LI_RECURSEMASK
) != 0)
1381 panic("upgrade of recursed lock (%s) %s r=%d @ %s:%d",
1382 class->lc_name
, lock
->lo_name
,
1383 instance
->li_flags
& LI_RECURSEMASK
, file
, line
);
1385 instance
->li_flags
|= LI_EXCLUSIVE
;
1389 witness_downgrade(struct lock_object
*lock
, int flags
, const char *file
,
1392 struct lock_instance
*instance
;
1393 struct lock_class
*class;
1395 KASSERT(witness_cold
== 0, ("%s: witness_cold", __func__
));
1396 if (lock
->lo_witness
== NULL
|| witness_watch
== -1 || panicstr
!= NULL
)
1398 class = LOCK_CLASS(lock
);
1399 file
= fixup_filename(file
);
1400 if (witness_watch
) {
1401 if ((lock
->lo_flags
& LO_UPGRADABLE
) == 0)
1402 panic("downgrade of non-upgradable lock (%s) %s @ %s:%d",
1403 class->lc_name
, lock
->lo_name
, file
, line
);
1404 if ((class->lc_flags
& LC_SLEEPLOCK
) == 0)
1405 panic("downgrade of non-sleep lock (%s) %s @ %s:%d",
1406 class->lc_name
, lock
->lo_name
, file
, line
);
1408 instance
= find_instance(curthread
->td_sleeplocks
, lock
);
1409 if (instance
== NULL
)
1410 panic("downgrade of unlocked lock (%s) %s @ %s:%d",
1411 class->lc_name
, lock
->lo_name
, file
, line
);
1412 if (witness_watch
) {
1413 if ((instance
->li_flags
& LI_EXCLUSIVE
) == 0)
1414 panic("downgrade of shared lock (%s) %s @ %s:%d",
1415 class->lc_name
, lock
->lo_name
, file
, line
);
1416 if ((instance
->li_flags
& LI_RECURSEMASK
) != 0)
1417 panic("downgrade of recursed lock (%s) %s r=%d @ %s:%d",
1418 class->lc_name
, lock
->lo_name
,
1419 instance
->li_flags
& LI_RECURSEMASK
, file
, line
);
1421 instance
->li_flags
&= ~LI_EXCLUSIVE
;
1425 witness_unlock(struct lock_object
*lock
, int flags
, const char *file
, int line
)
1427 struct lock_list_entry
**lock_list
, *lle
;
1428 struct lock_instance
*instance
;
1429 struct lock_class
*class;
1434 if (witness_cold
|| lock
->lo_witness
== NULL
|| panicstr
!= NULL
)
1437 class = LOCK_CLASS(lock
);
1438 file
= fixup_filename(file
);
1440 /* Find lock instance associated with this lock. */
1441 if (class->lc_flags
& LC_SLEEPLOCK
)
1442 lock_list
= &td
->td_sleeplocks
;
1444 lock_list
= PCPU_PTR(spinlocks
);
1446 for (; *lock_list
!= NULL
; lock_list
= &(*lock_list
)->ll_next
)
1447 for (i
= 0; i
< (*lock_list
)->ll_count
; i
++) {
1448 instance
= &(*lock_list
)->ll_children
[i
];
1449 if (instance
->li_lock
== lock
)
1454 * When disabling WITNESS through witness_watch we could end up in
1455 * having registered locks in the td_sleeplocks queue.
1456 * We have to make sure we flush these queues, so just search for
1457 * eventual register locks and remove them.
1459 if (witness_watch
> 0)
1460 panic("lock (%s) %s not locked @ %s:%d", class->lc_name
,
1461 lock
->lo_name
, file
, line
);
1466 /* First, check for shared/exclusive mismatches. */
1467 if ((instance
->li_flags
& LI_EXCLUSIVE
) != 0 && witness_watch
> 0 &&
1468 (flags
& LOP_EXCLUSIVE
) == 0) {
1469 printf("shared unlock of (%s) %s @ %s:%d\n", class->lc_name
,
1470 lock
->lo_name
, file
, line
);
1471 printf("while exclusively locked from %s:%d\n",
1472 instance
->li_file
, instance
->li_line
);
1473 panic("excl->ushare");
1475 if ((instance
->li_flags
& LI_EXCLUSIVE
) == 0 && witness_watch
> 0 &&
1476 (flags
& LOP_EXCLUSIVE
) != 0) {
1477 printf("exclusive unlock of (%s) %s @ %s:%d\n", class->lc_name
,
1478 lock
->lo_name
, file
, line
);
1479 printf("while share locked from %s:%d\n", instance
->li_file
,
1481 panic("share->uexcl");
1484 /* If we are recursed, unrecurse. */
1485 if ((instance
->li_flags
& LI_RECURSEMASK
) > 0) {
1486 CTR4(KTR_WITNESS
, "%s: pid %d unrecursed on %s r=%d", __func__
,
1487 td
->td_proc
->p_pid
, instance
->li_lock
->lo_name
,
1488 instance
->li_flags
);
1489 instance
->li_flags
--;
1493 /* Otherwise, remove this item from the list. */
1495 CTR4(KTR_WITNESS
, "%s: pid %d removed %s from lle[%d]", __func__
,
1496 td
->td_proc
->p_pid
, instance
->li_lock
->lo_name
,
1497 (*lock_list
)->ll_count
- 1);
1498 for (j
= i
; j
< (*lock_list
)->ll_count
- 1; j
++)
1499 (*lock_list
)->ll_children
[j
] =
1500 (*lock_list
)->ll_children
[j
+ 1];
1501 (*lock_list
)->ll_count
--;
1505 * If this lock list entry is not the first and is now empty, free it.
1507 if (*lock_list
!= lle
&& (*lock_list
)->ll_count
== 0) {
1509 *lock_list
= lle
->ll_next
;
1510 CTR3(KTR_WITNESS
, "%s: pid %d removed lle %p", __func__
,
1511 td
->td_proc
->p_pid
, lle
);
1512 witness_lock_list_free(lle
);
1517 witness_thread_exit(struct thread
*td
)
1519 struct lock_list_entry
*lle
;
1522 lle
= td
->td_sleeplocks
;
1523 if (lle
== NULL
|| panicstr
!= NULL
)
1525 if (lle
->ll_count
!= 0) {
1526 for (n
= 0; lle
!= NULL
; lle
= lle
->ll_next
)
1527 for (i
= lle
->ll_count
- 1; i
>= 0; i
--) {
1529 printf("Thread %p exiting with the following locks held:\n",
1532 witness_list_lock(&lle
->ll_children
[i
]);
1535 panic("Thread %p cannot exit while holding sleeplocks\n", td
);
1537 witness_lock_list_free(lle
);
1541 * Warn if any locks other than 'lock' are held. Flags can be passed in to
1542 * exempt Giant and sleepable locks from the checks as well. If any
1543 * non-exempt locks are held, then a supplied message is printed to the
1544 * console along with a list of the offending locks. If indicated in the
1545 * flags then a failure results in a panic as well.
1548 witness_warn(int flags
, struct lock_object
*lock
, const char *fmt
, ...)
1550 struct lock_list_entry
**lock_list
, *lle
;
1551 struct lock_instance
*lock1
;
1556 if (witness_cold
|| witness_watch
< 1 || panicstr
!= NULL
)
1560 for (lle
= td
->td_sleeplocks
; lle
!= NULL
; lle
= lle
->ll_next
)
1561 for (i
= lle
->ll_count
- 1; i
>= 0; i
--) {
1562 lock1
= &lle
->ll_children
[i
];
1563 if (lock1
->li_lock
== lock
)
1565 if (flags
& WARN_GIANTOK
&&
1566 lock1
->li_lock
== &Giant
.lock_object
)
1568 if (flags
& WARN_SLEEPOK
&&
1569 (lock1
->li_lock
->lo_flags
& LO_SLEEPABLE
) != 0)
1575 printf(" with the following");
1576 if (flags
& WARN_SLEEPOK
)
1577 printf(" non-sleepable");
1578 printf(" locks held:\n");
1581 witness_list_lock(lock1
);
1583 if (PCPU_GET(spinlocks
) != NULL
) {
1584 lock_list
= PCPU_PTR(spinlocks
);
1587 if ((*lock_list
)->ll_count
== 0)
1591 * Since we already hold a spinlock preemption is
1598 printf(" with the following");
1599 if (flags
& WARN_SLEEPOK
)
1600 printf(" non-sleepable");
1601 printf(" locks held:\n");
1603 n
+= witness_list_locks(PCPU_PTR(spinlocks
));
1605 if (flags
& WARN_PANIC
&& n
)
1606 panic("%s", __func__
);
1608 witness_debugger(n
);
1613 witness_file(struct lock_object
*lock
)
1617 if (witness_cold
|| witness_watch
< 1 || lock
->lo_witness
== NULL
)
1619 w
= lock
->lo_witness
;
1624 witness_line(struct lock_object
*lock
)
1628 if (witness_cold
|| witness_watch
< 1 || lock
->lo_witness
== NULL
)
1630 w
= lock
->lo_witness
;
1634 static struct witness
*
1635 enroll(const char *description
, struct lock_class
*lock_class
)
1638 struct witness_list
*typelist
;
1640 MPASS(description
!= NULL
);
1642 if (witness_watch
== -1 || panicstr
!= NULL
)
1644 if ((lock_class
->lc_flags
& LC_SPINLOCK
)) {
1645 if (witness_skipspin
)
1649 } else if ((lock_class
->lc_flags
& LC_SLEEPLOCK
))
1650 typelist
= &w_sleep
;
1652 panic("lock class %s is not sleep or spin",
1653 lock_class
->lc_name
);
1655 mtx_lock_spin(&w_mtx
);
1656 w
= witness_hash_get(description
);
1659 if ((w
= witness_get()) == NULL
)
1661 MPASS(strlen(description
) < MAX_W_NAME
);
1662 strcpy(w
->w_name
, description
);
1663 w
->w_class
= lock_class
;
1665 STAILQ_INSERT_HEAD(&w_all
, w
, w_list
);
1666 if (lock_class
->lc_flags
& LC_SPINLOCK
) {
1667 STAILQ_INSERT_HEAD(&w_spin
, w
, w_typelist
);
1669 } else if (lock_class
->lc_flags
& LC_SLEEPLOCK
) {
1670 STAILQ_INSERT_HEAD(&w_sleep
, w
, w_typelist
);
1674 /* Insert new witness into the hash */
1675 witness_hash_put(w
);
1676 witness_increment_graph_generation();
1677 mtx_unlock_spin(&w_mtx
);
1681 mtx_unlock_spin(&w_mtx
);
1682 if (lock_class
!= w
->w_class
)
1684 "lock (%s) %s does not match earlier (%s) lock",
1685 description
, lock_class
->lc_name
,
1686 w
->w_class
->lc_name
);
1691 depart(struct witness
*w
)
1693 struct witness_list
*list
;
1695 MPASS(w
->w_refcount
== 0);
1696 if (w
->w_class
->lc_flags
& LC_SLEEPLOCK
) {
1704 * Set file to NULL as it may point into a loadable module.
1708 witness_increment_graph_generation();
1713 adopt(struct witness
*parent
, struct witness
*child
)
1717 if (witness_cold
== 0)
1718 mtx_assert(&w_mtx
, MA_OWNED
);
1720 /* If the relationship is already known, there's no work to be done. */
1721 if (isitmychild(parent
, child
))
1724 /* When the structure of the graph changes, bump up the generation. */
1725 witness_increment_graph_generation();
1728 * The hard part ... create the direct relationship, then propagate all
1729 * indirect relationships.
1731 pi
= parent
->w_index
;
1732 ci
= child
->w_index
;
1733 WITNESS_INDEX_ASSERT(pi
);
1734 WITNESS_INDEX_ASSERT(ci
);
1736 w_rmatrix
[pi
][ci
] |= WITNESS_PARENT
;
1737 w_rmatrix
[ci
][pi
] |= WITNESS_CHILD
;
1740 * If parent was not already an ancestor of child,
1741 * then we increment the descendant and ancestor counters.
1743 if ((w_rmatrix
[pi
][ci
] & WITNESS_ANCESTOR
) == 0) {
1744 parent
->w_num_descendants
++;
1745 child
->w_num_ancestors
++;
1749 * Find each ancestor of 'pi'. Note that 'pi' itself is counted as
1750 * an ancestor of 'pi' during this loop.
1752 for (i
= 1; i
<= w_max_used_index
; i
++) {
1753 if ((w_rmatrix
[i
][pi
] & WITNESS_ANCESTOR_MASK
) == 0 &&
1757 /* Find each descendant of 'i' and mark it as a descendant. */
1758 for (j
= 1; j
<= w_max_used_index
; j
++) {
1761 * Skip children that are already marked as
1762 * descendants of 'i'.
1764 if (w_rmatrix
[i
][j
] & WITNESS_ANCESTOR_MASK
)
1768 * We are only interested in descendants of 'ci'. Note
1769 * that 'ci' itself is counted as a descendant of 'ci'.
1771 if ((w_rmatrix
[ci
][j
] & WITNESS_ANCESTOR_MASK
) == 0 &&
1774 w_rmatrix
[i
][j
] |= WITNESS_ANCESTOR
;
1775 w_rmatrix
[j
][i
] |= WITNESS_DESCENDANT
;
1776 w_data
[i
].w_num_descendants
++;
1777 w_data
[j
].w_num_ancestors
++;
1780 * Make sure we aren't marking a node as both an
1781 * ancestor and descendant. We should have caught
1782 * this as a lock order reversal earlier.
1784 if ((w_rmatrix
[i
][j
] & WITNESS_ANCESTOR_MASK
) &&
1785 (w_rmatrix
[i
][j
] & WITNESS_DESCENDANT_MASK
)) {
1786 printf("witness rmatrix paradox! [%d][%d]=%d "
1787 "both ancestor and descendant\n",
1788 i
, j
, w_rmatrix
[i
][j
]);
1790 printf("Witness disabled.\n");
1793 if ((w_rmatrix
[j
][i
] & WITNESS_ANCESTOR_MASK
) &&
1794 (w_rmatrix
[j
][i
] & WITNESS_DESCENDANT_MASK
)) {
1795 printf("witness rmatrix paradox! [%d][%d]=%d "
1796 "both ancestor and descendant\n",
1797 j
, i
, w_rmatrix
[j
][i
]);
1799 printf("Witness disabled.\n");
1807 itismychild(struct witness
*parent
, struct witness
*child
)
1810 MPASS(child
!= NULL
&& parent
!= NULL
);
1811 if (witness_cold
== 0)
1812 mtx_assert(&w_mtx
, MA_OWNED
);
1814 if (!witness_lock_type_equal(parent
, child
)) {
1815 if (witness_cold
== 0)
1816 mtx_unlock_spin(&w_mtx
);
1817 panic("%s: parent \"%s\" (%s) and child \"%s\" (%s) are not "
1818 "the same lock type", __func__
, parent
->w_name
,
1819 parent
->w_class
->lc_name
, child
->w_name
,
1820 child
->w_class
->lc_name
);
1822 adopt(parent
, child
);
1826 * Generic code for the isitmy*() functions. The rmask parameter is the
1827 * expected relationship of w1 to w2.
1830 _isitmyx(struct witness
*w1
, struct witness
*w2
, int rmask
, const char *fname
)
1832 unsigned char r1
, r2
;
1837 WITNESS_INDEX_ASSERT(i1
);
1838 WITNESS_INDEX_ASSERT(i2
);
1839 r1
= w_rmatrix
[i1
][i2
] & WITNESS_RELATED_MASK
;
1840 r2
= w_rmatrix
[i2
][i1
] & WITNESS_RELATED_MASK
;
1842 /* The flags on one better be the inverse of the flags on the other */
1843 if (!((WITNESS_ATOD(r1
) == r2
&& WITNESS_DTOA(r2
) == r1
) ||
1844 (WITNESS_DTOA(r1
) == r2
&& WITNESS_ATOD(r2
) == r1
))) {
1845 printf("%s: rmatrix mismatch between %s (index %d) and %s "
1846 "(index %d): w_rmatrix[%d][%d] == %hhx but "
1847 "w_rmatrix[%d][%d] == %hhx\n",
1848 fname
, w1
->w_name
, i1
, w2
->w_name
, i2
, i1
, i2
, r1
,
1851 printf("Witness disabled.\n");
1854 return (r1
& rmask
);
1858 * Checks if @child is a direct child of @parent.
1861 isitmychild(struct witness
*parent
, struct witness
*child
)
1864 return (_isitmyx(parent
, child
, WITNESS_PARENT
, __func__
));
1868 * Checks if @descendant is a direct or inderect descendant of @ancestor.
1871 isitmydescendant(struct witness
*ancestor
, struct witness
*descendant
)
1874 return (_isitmyx(ancestor
, descendant
, WITNESS_ANCESTOR_MASK
,
1880 blessed(struct witness
*w1
, struct witness
*w2
)
1883 struct witness_blessed
*b
;
1885 for (i
= 0; i
< blessed_count
; i
++) {
1886 b
= &blessed_list
[i
];
1887 if (strcmp(w1
->w_name
, b
->b_lock1
) == 0) {
1888 if (strcmp(w2
->w_name
, b
->b_lock2
) == 0)
1892 if (strcmp(w1
->w_name
, b
->b_lock2
) == 0)
1893 if (strcmp(w2
->w_name
, b
->b_lock1
) == 0)
1900 static struct witness
*
1906 if (witness_cold
== 0)
1907 mtx_assert(&w_mtx
, MA_OWNED
);
1909 if (witness_watch
== -1) {
1910 mtx_unlock_spin(&w_mtx
);
1913 if (STAILQ_EMPTY(&w_free
)) {
1915 mtx_unlock_spin(&w_mtx
);
1916 printf("WITNESS: unable to allocate a new witness object\n");
1919 w
= STAILQ_FIRST(&w_free
);
1920 STAILQ_REMOVE_HEAD(&w_free
, w_list
);
1923 MPASS(index
> 0 && index
== w_max_used_index
+1 &&
1924 index
< WITNESS_COUNT
);
1925 bzero(w
, sizeof(*w
));
1927 if (index
> w_max_used_index
)
1928 w_max_used_index
= index
;
1933 witness_free(struct witness
*w
)
1936 STAILQ_INSERT_HEAD(&w_free
, w
, w_list
);
1940 static struct lock_list_entry
*
1941 witness_lock_list_get(void)
1943 struct lock_list_entry
*lle
;
1945 if (witness_watch
== -1)
1947 mtx_lock_spin(&w_mtx
);
1948 lle
= w_lock_list_free
;
1951 mtx_unlock_spin(&w_mtx
);
1952 printf("%s: witness exhausted\n", __func__
);
1955 w_lock_list_free
= lle
->ll_next
;
1956 mtx_unlock_spin(&w_mtx
);
1957 bzero(lle
, sizeof(*lle
));
1962 witness_lock_list_free(struct lock_list_entry
*lle
)
1965 mtx_lock_spin(&w_mtx
);
1966 lle
->ll_next
= w_lock_list_free
;
1967 w_lock_list_free
= lle
;
1968 mtx_unlock_spin(&w_mtx
);
1971 static struct lock_instance
*
1972 find_instance(struct lock_list_entry
*list
, struct lock_object
*lock
)
1974 struct lock_list_entry
*lle
;
1975 struct lock_instance
*instance
;
1978 for (lle
= list
; lle
!= NULL
; lle
= lle
->ll_next
)
1979 for (i
= lle
->ll_count
- 1; i
>= 0; i
--) {
1980 instance
= &lle
->ll_children
[i
];
1981 if (instance
->li_lock
== lock
)
1988 witness_list_lock(struct lock_instance
*instance
)
1990 struct lock_object
*lock
;
1992 lock
= instance
->li_lock
;
1993 printf("%s %s %s", (instance
->li_flags
& LI_EXCLUSIVE
) != 0 ?
1994 "exclusive" : "shared", LOCK_CLASS(lock
)->lc_name
, lock
->lo_name
);
1995 if (lock
->lo_witness
->w_name
!= lock
->lo_name
)
1996 printf(" (%s)", lock
->lo_witness
->w_name
);
1997 printf(" r = %d (%p) locked @ %s:%d\n",
1998 instance
->li_flags
& LI_RECURSEMASK
, lock
, instance
->li_file
,
2004 witness_thread_has_locks(struct thread
*td
)
2007 return (td
->td_sleeplocks
!= NULL
);
2011 witness_proc_has_locks(struct proc
*p
)
2015 FOREACH_THREAD_IN_PROC(p
, td
) {
2016 if (witness_thread_has_locks(td
))
2024 witness_list_locks(struct lock_list_entry
**lock_list
)
2026 struct lock_list_entry
*lle
;
2030 for (lle
= *lock_list
; lle
!= NULL
; lle
= lle
->ll_next
)
2031 for (i
= lle
->ll_count
- 1; i
>= 0; i
--) {
2032 witness_list_lock(&lle
->ll_children
[i
]);
2039 * This is a bit risky at best. We call this function when we have timed
2040 * out acquiring a spin lock, and we assume that the other CPU is stuck
2041 * with this lock held. So, we go groveling around in the other CPU's
2042 * per-cpu data to try to find the lock instance for this spin lock to
2043 * see when it was last acquired.
2046 witness_display_spinlock(struct lock_object
*lock
, struct thread
*owner
)
2048 struct lock_instance
*instance
;
2051 if (owner
->td_critnest
== 0 || owner
->td_oncpu
== NOCPU
)
2053 pc
= pcpu_find(owner
->td_oncpu
);
2054 instance
= find_instance(pc
->pc_spinlocks
, lock
);
2055 if (instance
!= NULL
)
2056 witness_list_lock(instance
);
2060 witness_save(struct lock_object
*lock
, const char **filep
, int *linep
)
2062 struct lock_list_entry
*lock_list
;
2063 struct lock_instance
*instance
;
2064 struct lock_class
*class;
2066 KASSERT(witness_cold
== 0, ("%s: witness_cold", __func__
));
2067 if (lock
->lo_witness
== NULL
|| witness_watch
== -1 || panicstr
!= NULL
)
2069 class = LOCK_CLASS(lock
);
2070 if (class->lc_flags
& LC_SLEEPLOCK
)
2071 lock_list
= curthread
->td_sleeplocks
;
2073 if (witness_skipspin
)
2075 lock_list
= PCPU_GET(spinlocks
);
2077 instance
= find_instance(lock_list
, lock
);
2078 if (instance
== NULL
)
2079 panic("%s: lock (%s) %s not locked", __func__
,
2080 class->lc_name
, lock
->lo_name
);
2081 *filep
= instance
->li_file
;
2082 *linep
= instance
->li_line
;
2086 witness_restore(struct lock_object
*lock
, const char *file
, int line
)
2088 struct lock_list_entry
*lock_list
;
2089 struct lock_instance
*instance
;
2090 struct lock_class
*class;
2092 KASSERT(witness_cold
== 0, ("%s: witness_cold", __func__
));
2093 if (lock
->lo_witness
== NULL
|| witness_watch
== -1 || panicstr
!= NULL
)
2095 class = LOCK_CLASS(lock
);
2096 if (class->lc_flags
& LC_SLEEPLOCK
)
2097 lock_list
= curthread
->td_sleeplocks
;
2099 if (witness_skipspin
)
2101 lock_list
= PCPU_GET(spinlocks
);
2103 instance
= find_instance(lock_list
, lock
);
2104 if (instance
== NULL
)
2105 panic("%s: lock (%s) %s not locked", __func__
,
2106 class->lc_name
, lock
->lo_name
);
2107 lock
->lo_witness
->w_file
= file
;
2108 lock
->lo_witness
->w_line
= line
;
2109 instance
->li_file
= file
;
2110 instance
->li_line
= line
;
2114 witness_assert(struct lock_object
*lock
, int flags
, const char *file
, int line
)
2116 #ifdef INVARIANT_SUPPORT
2117 struct lock_instance
*instance
;
2118 struct lock_class
*class;
2120 if (lock
->lo_witness
== NULL
|| witness_watch
< 1 || panicstr
!= NULL
)
2122 class = LOCK_CLASS(lock
);
2123 if ((class->lc_flags
& LC_SLEEPLOCK
) != 0)
2124 instance
= find_instance(curthread
->td_sleeplocks
, lock
);
2125 else if ((class->lc_flags
& LC_SPINLOCK
) != 0)
2126 instance
= find_instance(PCPU_GET(spinlocks
), lock
);
2128 panic("Lock (%s) %s is not sleep or spin!",
2129 class->lc_name
, lock
->lo_name
);
2131 file
= fixup_filename(file
);
2134 if (instance
!= NULL
)
2135 panic("Lock (%s) %s locked @ %s:%d.",
2136 class->lc_name
, lock
->lo_name
, file
, line
);
2139 case LA_LOCKED
| LA_RECURSED
:
2140 case LA_LOCKED
| LA_NOTRECURSED
:
2142 case LA_SLOCKED
| LA_RECURSED
:
2143 case LA_SLOCKED
| LA_NOTRECURSED
:
2145 case LA_XLOCKED
| LA_RECURSED
:
2146 case LA_XLOCKED
| LA_NOTRECURSED
:
2147 if (instance
== NULL
) {
2148 panic("Lock (%s) %s not locked @ %s:%d.",
2149 class->lc_name
, lock
->lo_name
, file
, line
);
2152 if ((flags
& LA_XLOCKED
) != 0 &&
2153 (instance
->li_flags
& LI_EXCLUSIVE
) == 0)
2154 panic("Lock (%s) %s not exclusively locked @ %s:%d.",
2155 class->lc_name
, lock
->lo_name
, file
, line
);
2156 if ((flags
& LA_SLOCKED
) != 0 &&
2157 (instance
->li_flags
& LI_EXCLUSIVE
) != 0)
2158 panic("Lock (%s) %s exclusively locked @ %s:%d.",
2159 class->lc_name
, lock
->lo_name
, file
, line
);
2160 if ((flags
& LA_RECURSED
) != 0 &&
2161 (instance
->li_flags
& LI_RECURSEMASK
) == 0)
2162 panic("Lock (%s) %s not recursed @ %s:%d.",
2163 class->lc_name
, lock
->lo_name
, file
, line
);
2164 if ((flags
& LA_NOTRECURSED
) != 0 &&
2165 (instance
->li_flags
& LI_RECURSEMASK
) != 0)
2166 panic("Lock (%s) %s recursed @ %s:%d.",
2167 class->lc_name
, lock
->lo_name
, file
, line
);
2170 panic("Invalid lock assertion at %s:%d.", file
, line
);
2173 #endif /* INVARIANT_SUPPORT */
2178 witness_ddb_list(struct thread
*td
)
2181 KASSERT(witness_cold
== 0, ("%s: witness_cold", __func__
));
2182 KASSERT(kdb_active
, ("%s: not in the debugger", __func__
));
2184 if (witness_watch
< 1)
2187 witness_list_locks(&td
->td_sleeplocks
);
2190 * We only handle spinlocks if td == curthread. This is somewhat broken
2191 * if td is currently executing on some other CPU and holds spin locks
2192 * as we won't display those locks. If we had a MI way of getting
2193 * the per-cpu data for a given cpu then we could use
2194 * td->td_oncpu to get the list of spinlocks for this thread
2197 * That still wouldn't really fix this unless we locked the scheduler
2198 * lock or stopped the other CPU to make sure it wasn't changing the
2199 * list out from under us. It is probably best to just not try to
2200 * handle threads on other CPU's for now.
2202 if (td
== curthread
&& PCPU_GET(spinlocks
) != NULL
)
2203 witness_list_locks(PCPU_PTR(spinlocks
));
2206 DB_SHOW_COMMAND(locks
, db_witness_list
)
2211 td
= db_lookup_thread(addr
, TRUE
);
2214 witness_ddb_list(td
);
2217 DB_SHOW_COMMAND(alllocks
, db_witness_list_all
)
2223 * It would be nice to list only threads and processes that actually
2224 * held sleep locks, but that information is currently not exported
2227 FOREACH_PROC_IN_SYSTEM(p
) {
2228 if (!witness_proc_has_locks(p
))
2230 FOREACH_THREAD_IN_PROC(p
, td
) {
2231 if (!witness_thread_has_locks(td
))
2233 db_printf("Process %d (%s) thread %p (%d)\n", p
->p_pid
,
2234 td
->td_name
, td
, td
->td_tid
);
2235 witness_ddb_list(td
);
2240 DB_SHOW_COMMAND(witness
, db_witness_display
)
2243 witness_ddb_display(db_printf
);
2248 sysctl_debug_witness_badstacks(SYSCTL_HANDLER_ARGS
)
2250 struct witness_lock_order_data
*data1
, *data2
, *tmp_data1
, *tmp_data2
;
2251 struct witness
*tmp_w1
, *tmp_w2
, *w1
, *w2
;
2253 u_int w_rmatrix1
, w_rmatrix2
;
2254 int error
, generation
, i
, j
;
2260 if (witness_watch
< 1) {
2261 error
= SYSCTL_OUT(req
, w_notrunning
, sizeof(w_notrunning
));
2265 error
= SYSCTL_OUT(req
, w_stillcold
, sizeof(w_stillcold
));
2269 sb
= sbuf_new(NULL
, NULL
, BADSTACK_SBUF_SIZE
, SBUF_AUTOEXTEND
);
2273 /* Allocate and init temporary storage space. */
2274 tmp_w1
= malloc(sizeof(struct witness
), M_TEMP
, M_WAITOK
| M_ZERO
);
2275 tmp_w2
= malloc(sizeof(struct witness
), M_TEMP
, M_WAITOK
| M_ZERO
);
2276 tmp_data1
= malloc(sizeof(struct witness_lock_order_data
), M_TEMP
,
2278 tmp_data2
= malloc(sizeof(struct witness_lock_order_data
), M_TEMP
,
2280 stack_zero(&tmp_data1
->wlod_stack
);
2281 stack_zero(&tmp_data2
->wlod_stack
);
2284 mtx_lock_spin(&w_mtx
);
2285 generation
= w_generation
;
2286 mtx_unlock_spin(&w_mtx
);
2287 sbuf_printf(sb
, "Number of known direct relationships is %d\n",
2288 w_lohash
.wloh_count
);
2289 for (i
= 1; i
< w_max_used_index
; i
++) {
2290 mtx_lock_spin(&w_mtx
);
2291 if (generation
!= w_generation
) {
2292 mtx_unlock_spin(&w_mtx
);
2294 /* The graph has changed, try again. */
2301 if (w1
->w_reversed
== 0) {
2302 mtx_unlock_spin(&w_mtx
);
2306 /* Copy w1 locally so we can release the spin lock. */
2308 mtx_unlock_spin(&w_mtx
);
2310 if (tmp_w1
->w_reversed
== 0)
2312 for (j
= 1; j
< w_max_used_index
; j
++) {
2313 if ((w_rmatrix
[i
][j
] & WITNESS_REVERSAL
) == 0 || i
> j
)
2316 mtx_lock_spin(&w_mtx
);
2317 if (generation
!= w_generation
) {
2318 mtx_unlock_spin(&w_mtx
);
2320 /* The graph has changed, try again. */
2327 data1
= witness_lock_order_get(w1
, w2
);
2328 data2
= witness_lock_order_get(w2
, w1
);
2331 * Copy information locally so we can release the
2335 w_rmatrix1
= (unsigned int)w_rmatrix
[i
][j
];
2336 w_rmatrix2
= (unsigned int)w_rmatrix
[j
][i
];
2339 stack_zero(&tmp_data1
->wlod_stack
);
2340 stack_copy(&data1
->wlod_stack
,
2341 &tmp_data1
->wlod_stack
);
2343 if (data2
&& data2
!= data1
) {
2344 stack_zero(&tmp_data2
->wlod_stack
);
2345 stack_copy(&data2
->wlod_stack
,
2346 &tmp_data2
->wlod_stack
);
2348 mtx_unlock_spin(&w_mtx
);
2351 "\nLock order reversal between \"%s\"(%s) and \"%s\"(%s)!\n",
2352 tmp_w1
->w_name
, tmp_w1
->w_class
->lc_name
,
2353 tmp_w2
->w_name
, tmp_w2
->w_class
->lc_name
);
2356 "w_rmatrix[%s][%s] == %x, w_rmatrix[%s][%s] == %x\n",
2357 tmp_w1
->name
, tmp_w2
->w_name
, w_rmatrix1
,
2358 tmp_w2
->name
, tmp_w1
->w_name
, w_rmatrix2
);
2362 "Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n",
2363 tmp_w1
->w_name
, tmp_w1
->w_class
->lc_name
,
2364 tmp_w2
->w_name
, tmp_w2
->w_class
->lc_name
);
2365 stack_sbuf_print(sb
, &tmp_data1
->wlod_stack
);
2366 sbuf_printf(sb
, "\n");
2368 if (data2
&& data2
!= data1
) {
2370 "Lock order \"%s\"(%s) -> \"%s\"(%s) first seen at:\n",
2371 tmp_w2
->w_name
, tmp_w2
->w_class
->lc_name
,
2372 tmp_w1
->w_name
, tmp_w1
->w_class
->lc_name
);
2373 stack_sbuf_print(sb
, &tmp_data2
->wlod_stack
);
2374 sbuf_printf(sb
, "\n");
2378 mtx_lock_spin(&w_mtx
);
2379 if (generation
!= w_generation
) {
2380 mtx_unlock_spin(&w_mtx
);
2383 * The graph changed while we were printing stack data,
2390 mtx_unlock_spin(&w_mtx
);
2392 /* Free temporary storage space. */
2393 free(tmp_data1
, M_TEMP
);
2394 free(tmp_data2
, M_TEMP
);
2395 free(tmp_w1
, M_TEMP
);
2396 free(tmp_w2
, M_TEMP
);
2399 error
= SYSCTL_OUT(req
, sbuf_data(sb
), sbuf_len(sb
) + 1);
2406 sysctl_debug_witness_fullgraph(SYSCTL_HANDLER_ARGS
)
2412 if (witness_watch
< 1) {
2413 error
= SYSCTL_OUT(req
, w_notrunning
, sizeof(w_notrunning
));
2417 error
= SYSCTL_OUT(req
, w_stillcold
, sizeof(w_stillcold
));
2421 sb
= sbuf_new(NULL
, NULL
, FULLGRAPH_SBUF_SIZE
, SBUF_FIXEDLEN
);
2424 sbuf_printf(sb
, "\n");
2426 mtx_lock_spin(&w_mtx
);
2427 STAILQ_FOREACH(w
, &w_all
, w_list
)
2429 STAILQ_FOREACH(w
, &w_all
, w_list
)
2430 witness_add_fullgraph(sb
, w
);
2431 mtx_unlock_spin(&w_mtx
);
2434 * While using SBUF_FIXEDLEN, check if the sbuf overflowed.
2436 if (sbuf_overflowed(sb
)) {
2438 panic("%s: sbuf overflowed, bump FULLGRAPH_SBUF_SIZE value\n",
2443 * Close the sbuf and return to userland.
2446 error
= SYSCTL_OUT(req
, sbuf_data(sb
), sbuf_len(sb
) + 1);
2453 sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS
)
2457 value
= witness_watch
;
2458 error
= sysctl_handle_int(oidp
, &value
, 0, req
);
2459 if (error
!= 0 || req
->newptr
== NULL
)
2461 if (value
> 1 || value
< -1 ||
2462 (witness_watch
== -1 && value
!= witness_watch
))
2464 witness_watch
= value
;
2469 witness_add_fullgraph(struct sbuf
*sb
, struct witness
*w
)
2473 if (w
->w_displayed
!= 0 || (w
->w_file
== NULL
&& w
->w_line
== 0))
2477 WITNESS_INDEX_ASSERT(w
->w_index
);
2478 for (i
= 1; i
<= w_max_used_index
; i
++) {
2479 if (w_rmatrix
[w
->w_index
][i
] & WITNESS_PARENT
) {
2480 sbuf_printf(sb
, "\"%s\",\"%s\"\n", w
->w_name
,
2482 witness_add_fullgraph(sb
, &w_data
[i
]);
2488 * A simple hash function. Takes a key pointer and a key size. If size == 0,
2489 * interprets the key as a string and reads until the null
2490 * terminator. Otherwise, reads the first size bytes. Returns an unsigned 32-bit
2491 * hash value computed from the key.
2494 witness_hash_djb2(const uint8_t *key
, uint32_t size
)
2496 unsigned int hash
= 5381;
2499 /* hash = hash * 33 + key[i] */
2501 for (i
= 0; i
< size
; i
++)
2502 hash
= ((hash
<< 5) + hash
) + (unsigned int)key
[i
];
2504 for (i
= 0; key
[i
] != 0; i
++)
2505 hash
= ((hash
<< 5) + hash
) + (unsigned int)key
[i
];
2512 * Initializes the two witness hash tables. Called exactly once from
2513 * witness_initialize().
2516 witness_init_hash_tables(void)
2520 MPASS(witness_cold
);
2522 /* Initialize the hash tables. */
2523 for (i
= 0; i
< WITNESS_HASH_SIZE
; i
++)
2524 w_hash
.wh_array
[i
] = NULL
;
2526 w_hash
.wh_size
= WITNESS_HASH_SIZE
;
2527 w_hash
.wh_count
= 0;
2529 /* Initialize the lock order data hash. */
2531 for (i
= 0; i
< WITNESS_LO_DATA_COUNT
; i
++) {
2532 memset(&w_lodata
[i
], 0, sizeof(w_lodata
[i
]));
2533 w_lodata
[i
].wlod_next
= w_lofree
;
2534 w_lofree
= &w_lodata
[i
];
2536 w_lohash
.wloh_size
= WITNESS_LO_HASH_SIZE
;
2537 w_lohash
.wloh_count
= 0;
2538 for (i
= 0; i
< WITNESS_LO_HASH_SIZE
; i
++)
2539 w_lohash
.wloh_array
[i
] = NULL
;
2542 static struct witness
*
2543 witness_hash_get(const char *key
)
2549 if (witness_cold
== 0)
2550 mtx_assert(&w_mtx
, MA_OWNED
);
2551 hash
= witness_hash_djb2(key
, 0) % w_hash
.wh_size
;
2552 w
= w_hash
.wh_array
[hash
];
2554 if (strcmp(w
->w_name
, key
) == 0)
2564 witness_hash_put(struct witness
*w
)
2569 MPASS(w
->w_name
!= NULL
);
2570 if (witness_cold
== 0)
2571 mtx_assert(&w_mtx
, MA_OWNED
);
2572 KASSERT(witness_hash_get(w
->w_name
) == NULL
,
2573 ("%s: trying to add a hash entry that already exists!", __func__
));
2574 KASSERT(w
->w_hash_next
== NULL
,
2575 ("%s: w->w_hash_next != NULL", __func__
));
2577 hash
= witness_hash_djb2(w
->w_name
, 0) % w_hash
.wh_size
;
2578 w
->w_hash_next
= w_hash
.wh_array
[hash
];
2579 w_hash
.wh_array
[hash
] = w
;
2584 static struct witness_lock_order_data
*
2585 witness_lock_order_get(struct witness
*parent
, struct witness
*child
)
2587 struct witness_lock_order_data
*data
= NULL
;
2588 struct witness_lock_order_key key
;
2591 MPASS(parent
!= NULL
&& child
!= NULL
);
2592 key
.from
= parent
->w_index
;
2593 key
.to
= child
->w_index
;
2594 WITNESS_INDEX_ASSERT(key
.from
);
2595 WITNESS_INDEX_ASSERT(key
.to
);
2596 if ((w_rmatrix
[parent
->w_index
][child
->w_index
]
2597 & WITNESS_LOCK_ORDER_KNOWN
) == 0)
2600 hash
= witness_hash_djb2((const char*)&key
,
2601 sizeof(key
)) % w_lohash
.wloh_size
;
2602 data
= w_lohash
.wloh_array
[hash
];
2603 while (data
!= NULL
) {
2604 if (witness_lock_order_key_equal(&data
->wlod_key
, &key
))
2606 data
= data
->wlod_next
;
2614 * Verify that parent and child have a known relationship, are not the same,
2615 * and child is actually a child of parent. This is done without w_mtx
2616 * to avoid contention in the common case.
2619 witness_lock_order_check(struct witness
*parent
, struct witness
*child
)
2622 if (parent
!= child
&&
2623 w_rmatrix
[parent
->w_index
][child
->w_index
]
2624 & WITNESS_LOCK_ORDER_KNOWN
&&
2625 isitmychild(parent
, child
))
2632 witness_lock_order_add(struct witness
*parent
, struct witness
*child
)
2634 struct witness_lock_order_data
*data
= NULL
;
2635 struct witness_lock_order_key key
;
2638 MPASS(parent
!= NULL
&& child
!= NULL
);
2639 key
.from
= parent
->w_index
;
2640 key
.to
= child
->w_index
;
2641 WITNESS_INDEX_ASSERT(key
.from
);
2642 WITNESS_INDEX_ASSERT(key
.to
);
2643 if (w_rmatrix
[parent
->w_index
][child
->w_index
]
2644 & WITNESS_LOCK_ORDER_KNOWN
)
2647 hash
= witness_hash_djb2((const char*)&key
,
2648 sizeof(key
)) % w_lohash
.wloh_size
;
2649 w_rmatrix
[parent
->w_index
][child
->w_index
] |= WITNESS_LOCK_ORDER_KNOWN
;
2653 w_lofree
= data
->wlod_next
;
2654 data
->wlod_next
= w_lohash
.wloh_array
[hash
];
2655 data
->wlod_key
= key
;
2656 w_lohash
.wloh_array
[hash
] = data
;
2657 w_lohash
.wloh_count
++;
2658 stack_zero(&data
->wlod_stack
);
2659 stack_save(&data
->wlod_stack
);
2663 /* Call this whenver the structure of the witness graph changes. */
2665 witness_increment_graph_generation(void)
2668 if (witness_cold
== 0)
2669 mtx_assert(&w_mtx
, MA_OWNED
);
2675 _witness_debugger(int cond
, const char *msg
)
2678 if (witness_trace
&& cond
)
2680 if (witness_kdb
&& cond
)
2681 kdb_enter(KDB_WHY_WITNESS
, msg
);