2 * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * This module holds the global variables and functions used to maintain
29 * lock_object structures.
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
36 #include "opt_mprof.h"
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
43 #include <sys/lock_profile.h>
44 #include <sys/malloc.h>
45 #include <sys/mutex.h>
49 #include <sys/sched.h>
51 #include <sys/sysctl.h>
57 #include <machine/cpufunc.h>
59 CTASSERT(LOCK_CLASS_MAX
== 15);
61 struct lock_class
*lock_classes
[LOCK_CLASS_MAX
+ 1] = {
63 &lock_class_mtx_sleep
,
66 &lock_class_rm_sleepable
,
72 lock_init(struct lock_object
*lock
, struct lock_class
*class, const char *name
,
73 const char *type
, int flags
)
77 /* Check for double-init and zero object. */
78 KASSERT(flags
& LO_NEW
|| !lock_initialized(lock
),
79 ("lock \"%s\" %p already initialized", name
, lock
));
81 /* Look up lock class to find its index. */
82 for (i
= 0; i
< LOCK_CLASS_MAX
; i
++)
83 if (lock_classes
[i
] == class) {
84 lock
->lo_flags
= i
<< LO_CLASSSHIFT
;
87 KASSERT(i
< LOCK_CLASS_MAX
, ("unknown lock class %p", class));
89 /* Initialize the lock object. */
91 lock
->lo_flags
|= flags
| LO_INITIALIZED
;
92 LOCK_LOG_INIT(lock
, 0);
93 WITNESS_INIT(lock
, (type
!= NULL
) ? type
: name
);
97 lock_destroy(struct lock_object
*lock
)
100 KASSERT(lock_initialized(lock
), ("lock %p is not initialized", lock
));
101 WITNESS_DESTROY(lock
);
102 LOCK_LOG_DESTROY(lock
, 0);
103 lock
->lo_flags
&= ~LO_INITIALIZED
;
107 lock_delay(struct lock_delay_arg
*la
)
109 u_int i
, delay
, backoff
, min
, max
;
110 struct lock_delay_config
*lc
= la
->config
;
123 backoff
= cpu_ticks() % delay
;
127 for (i
= 0; i
< backoff
; i
++)
131 la
->spin_cnt
+= backoff
;
135 DB_SHOW_COMMAND(lock
, db_show_lock
)
137 struct lock_object
*lock
;
138 struct lock_class
*class;
142 lock
= (struct lock_object
*)addr
;
143 if (LO_CLASSINDEX(lock
) > LOCK_CLASS_MAX
) {
144 db_printf("Unknown lock class: %d\n", LO_CLASSINDEX(lock
));
147 class = LOCK_CLASS(lock
);
148 db_printf(" class: %s\n", class->lc_name
);
149 db_printf(" name: %s\n", lock
->lo_name
);
150 class->lc_ddb_show(lock
);
154 #ifdef LOCK_PROFILING
157 * One object per-thread for each lock the thread owns. Tracks individual
160 struct lock_profile_object
{
161 LIST_ENTRY(lock_profile_object
) lpo_link
;
162 struct lock_object
*lpo_obj
;
163 const char *lpo_file
;
167 uint64_t lpo_acqtime
;
168 uint64_t lpo_waittime
;
169 u_int lpo_contest_locking
;
173 * One lock_prof for each (file, line, lock object) triple.
176 SLIST_ENTRY(lock_prof
) link
;
177 struct lock_class
*class;
182 uintmax_t cnt_wait_max
;
187 uintmax_t cnt_contest_locking
;
190 SLIST_HEAD(lphead
, lock_prof
);
192 #define LPROF_HASH_SIZE 4096
193 #define LPROF_HASH_MASK (LPROF_HASH_SIZE - 1)
194 #define LPROF_CACHE_SIZE 4096
197 * Array of objects and profs for each type of object for each cpu. Spinlocks
198 * are handled separately because a thread may be preempted and acquire a
199 * spinlock while in the lock profiling code of a non-spinlock. In this way
200 * we only need a critical section to protect the per-cpu lists.
202 struct lock_prof_type
{
203 struct lphead lpt_lpalloc
;
204 struct lpohead lpt_lpoalloc
;
205 struct lphead lpt_hash
[LPROF_HASH_SIZE
];
206 struct lock_prof lpt_prof
[LPROF_CACHE_SIZE
];
207 struct lock_profile_object lpt_objs
[LPROF_CACHE_SIZE
];
210 struct lock_prof_cpu
{
211 struct lock_prof_type lpc_types
[2]; /* One for spin one for other. */
214 struct lock_prof_cpu
*lp_cpu
[MAXCPU
];
216 volatile int lock_prof_enable
= 0;
217 static volatile int lock_prof_resetting
;
219 #define LPROF_SBUF_SIZE 256
221 static int lock_prof_rejected
;
222 static int lock_prof_skipspin
;
223 static int lock_prof_skipcount
;
225 #ifndef USE_CPU_NANOSECONDS
233 /* From bintime2timespec */
234 ns
= bt
.sec
* (uint64_t)1000000000;
235 ns
+= ((uint64_t)1000000000 * (uint32_t)(bt
.frac
>> 32)) >> 32;
241 lock_prof_init_type(struct lock_prof_type
*type
)
245 SLIST_INIT(&type
->lpt_lpalloc
);
246 LIST_INIT(&type
->lpt_lpoalloc
);
247 for (i
= 0; i
< LPROF_CACHE_SIZE
; i
++) {
248 SLIST_INSERT_HEAD(&type
->lpt_lpalloc
, &type
->lpt_prof
[i
],
250 LIST_INSERT_HEAD(&type
->lpt_lpoalloc
, &type
->lpt_objs
[i
],
256 lock_prof_init(void *arg
)
260 for (cpu
= 0; cpu
<= mp_maxid
; cpu
++) {
261 lp_cpu
[cpu
] = malloc(sizeof(*lp_cpu
[cpu
]), M_DEVBUF
,
263 lock_prof_init_type(&lp_cpu
[cpu
]->lpc_types
[0]);
264 lock_prof_init_type(&lp_cpu
[cpu
]->lpc_types
[1]);
267 SYSINIT(lockprof
, SI_SUB_SMP
, SI_ORDER_ANY
, lock_prof_init
, NULL
);
270 lock_prof_reset_wait(void)
274 * Spin relinquishing our cpu so that quiesce_all_cpus may
277 while (lock_prof_resetting
)
278 sched_relinquish(curthread
);
282 lock_prof_reset(void)
284 struct lock_prof_cpu
*lpc
;
288 * We not only race with acquiring and releasing locks but also
289 * thread exit. To be certain that threads exit without valid head
290 * pointers they must see resetting set before enabled is cleared.
291 * Otherwise a lock may not be removed from a per-thread list due
292 * to disabled being set but not wait for reset() to remove it below.
294 atomic_store_rel_int(&lock_prof_resetting
, 1);
295 enabled
= lock_prof_enable
;
296 lock_prof_enable
= 0;
297 quiesce_all_cpus("profreset", 0);
299 * Some objects may have migrated between CPUs. Clear all links
300 * before we zero the structures. Some items may still be linked
301 * into per-thread lists as well.
303 for (cpu
= 0; cpu
<= mp_maxid
; cpu
++) {
305 for (i
= 0; i
< LPROF_CACHE_SIZE
; i
++) {
306 LIST_REMOVE(&lpc
->lpc_types
[0].lpt_objs
[i
], lpo_link
);
307 LIST_REMOVE(&lpc
->lpc_types
[1].lpt_objs
[i
], lpo_link
);
310 for (cpu
= 0; cpu
<= mp_maxid
; cpu
++) {
312 bzero(lpc
, sizeof(*lpc
));
313 lock_prof_init_type(&lpc
->lpc_types
[0]);
314 lock_prof_init_type(&lpc
->lpc_types
[1]);
316 atomic_store_rel_int(&lock_prof_resetting
, 0);
317 lock_prof_enable
= enabled
;
321 lock_prof_output(struct lock_prof
*lp
, struct sbuf
*sb
)
325 for (p
= lp
->file
; p
!= NULL
&& strncmp(p
, "../", 3) == 0; p
+= 3);
327 "%8ju %9ju %11ju %11ju %11ju %6ju %6ju %2ju %6ju %s:%d (%s:%s)\n",
328 lp
->cnt_max
/ 1000, lp
->cnt_wait_max
/ 1000, lp
->cnt_tot
/ 1000,
329 lp
->cnt_wait
/ 1000, lp
->cnt_cur
,
330 lp
->cnt_cur
== 0 ? (uintmax_t)0 :
331 lp
->cnt_tot
/ (lp
->cnt_cur
* 1000),
332 lp
->cnt_cur
== 0 ? (uintmax_t)0 :
333 lp
->cnt_wait
/ (lp
->cnt_cur
* 1000),
334 (uintmax_t)0, lp
->cnt_contest_locking
,
335 p
, lp
->line
, lp
->class->lc_name
, lp
->name
);
339 lock_prof_sum(struct lock_prof
*match
, struct lock_prof
*dst
, int hash
,
342 struct lock_prof_type
*type
;
346 dst
->file
= match
->file
;
347 dst
->line
= match
->line
;
348 dst
->class = match
->class;
349 dst
->name
= match
->name
;
351 for (cpu
= 0; cpu
<= mp_maxid
; cpu
++) {
352 if (lp_cpu
[cpu
] == NULL
)
354 type
= &lp_cpu
[cpu
]->lpc_types
[spin
];
355 SLIST_FOREACH(l
, &type
->lpt_hash
[hash
], link
) {
358 if (l
->file
!= match
->file
|| l
->line
!= match
->line
||
359 l
->name
!= match
->name
)
362 if (l
->cnt_max
> dst
->cnt_max
)
363 dst
->cnt_max
= l
->cnt_max
;
364 if (l
->cnt_wait_max
> dst
->cnt_wait_max
)
365 dst
->cnt_wait_max
= l
->cnt_wait_max
;
366 dst
->cnt_tot
+= l
->cnt_tot
;
367 dst
->cnt_wait
+= l
->cnt_wait
;
368 dst
->cnt_cur
+= l
->cnt_cur
;
369 dst
->cnt_contest_locking
+= l
->cnt_contest_locking
;
376 lock_prof_type_stats(struct lock_prof_type
*type
, struct sbuf
*sb
, int spin
,
382 for (i
= 0; i
< LPROF_HASH_SIZE
; ++i
) {
383 SLIST_FOREACH(l
, &type
->lpt_hash
[i
], link
) {
384 struct lock_prof lp
= {};
388 lock_prof_sum(l
, &lp
, i
, spin
, t
);
389 lock_prof_output(&lp
, sb
);
395 dump_lock_prof_stats(SYSCTL_HANDLER_ARGS
)
401 error
= sysctl_wire_old_buffer(req
, 0);
404 sb
= sbuf_new_for_sysctl(NULL
, NULL
, LPROF_SBUF_SIZE
, req
);
405 sbuf_printf(sb
, "\n%8s %9s %11s %11s %11s %6s %6s %2s %6s %s\n",
406 "max", "wait_max", "total", "wait_total", "count", "avg", "wait_avg", "cnt_hold", "cnt_lock", "name");
407 enabled
= lock_prof_enable
;
408 lock_prof_enable
= 0;
409 quiesce_all_cpus("profstat", 0);
411 for (cpu
= 0; cpu
<= mp_maxid
; cpu
++) {
412 if (lp_cpu
[cpu
] == NULL
)
414 lock_prof_type_stats(&lp_cpu
[cpu
]->lpc_types
[0], sb
, 0, t
);
415 lock_prof_type_stats(&lp_cpu
[cpu
]->lpc_types
[1], sb
, 1, t
);
417 lock_prof_enable
= enabled
;
419 error
= sbuf_finish(sb
);
420 /* Output a trailing NUL. */
422 error
= SYSCTL_OUT(req
, "", 1);
428 enable_lock_prof(SYSCTL_HANDLER_ARGS
)
432 v
= lock_prof_enable
;
433 error
= sysctl_handle_int(oidp
, &v
, v
, req
);
436 if (req
->newptr
== NULL
)
438 if (v
== lock_prof_enable
)
442 lock_prof_enable
= !!v
;
448 reset_lock_prof_stats(SYSCTL_HANDLER_ARGS
)
453 error
= sysctl_handle_int(oidp
, &v
, 0, req
);
456 if (req
->newptr
== NULL
)
465 static struct lock_prof
*
466 lock_profile_lookup(struct lock_object
*lo
, int spin
, const char *file
,
469 const char *unknown
= "(unknown)";
470 struct lock_prof_type
*type
;
471 struct lock_prof
*lp
;
477 if (p
== NULL
|| *p
== '\0')
479 hash
= (uintptr_t)lo
->lo_name
* 31 + (uintptr_t)p
* 31 + line
;
480 hash
&= LPROF_HASH_MASK
;
481 type
= &lp_cpu
[PCPU_GET(cpuid
)]->lpc_types
[spin
];
482 head
= &type
->lpt_hash
[hash
];
483 SLIST_FOREACH(lp
, head
, link
) {
484 if (lp
->line
== line
&& lp
->file
== p
&&
485 lp
->name
== lo
->lo_name
)
489 lp
= SLIST_FIRST(&type
->lpt_lpalloc
);
491 lock_prof_rejected
++;
494 SLIST_REMOVE_HEAD(&type
->lpt_lpalloc
, link
);
497 lp
->class = LOCK_CLASS(lo
);
498 lp
->name
= lo
->lo_name
;
499 SLIST_INSERT_HEAD(&type
->lpt_hash
[hash
], lp
, link
);
503 static struct lock_profile_object
*
504 lock_profile_object_lookup(struct lock_object
*lo
, int spin
, const char *file
,
507 struct lock_profile_object
*l
;
508 struct lock_prof_type
*type
;
509 struct lpohead
*head
;
511 head
= &curthread
->td_lprof
[spin
];
512 LIST_FOREACH(l
, head
, lpo_link
)
513 if (l
->lpo_obj
== lo
&& l
->lpo_file
== file
&&
516 type
= &lp_cpu
[PCPU_GET(cpuid
)]->lpc_types
[spin
];
517 l
= LIST_FIRST(&type
->lpt_lpoalloc
);
519 lock_prof_rejected
++;
522 LIST_REMOVE(l
, lpo_link
);
527 LIST_INSERT_HEAD(head
, l
, lpo_link
);
533 lock_profile_obtain_lock_success(struct lock_object
*lo
, int contested
,
534 uint64_t waittime
, const char *file
, int line
)
536 static int lock_prof_count
;
537 struct lock_profile_object
*l
;
540 if (SCHEDULER_STOPPED())
543 /* don't reset the timer when/if recursing */
544 if (!lock_prof_enable
|| (lo
->lo_flags
& LO_NOPROFILE
))
546 if (lock_prof_skipcount
&&
547 (++lock_prof_count
% lock_prof_skipcount
) != 0)
549 spin
= (LOCK_CLASS(lo
)->lc_flags
& LC_SPINLOCK
) ? 1 : 0;
550 if (spin
&& lock_prof_skipspin
== 1)
553 /* Recheck enabled now that we're in a critical section. */
554 if (lock_prof_enable
== 0)
556 l
= lock_profile_object_lookup(lo
, spin
, file
, line
);
560 if (++l
->lpo_ref
> 1)
562 l
->lpo_contest_locking
= contested
;
563 l
->lpo_acqtime
= nanoseconds();
564 if (waittime
&& (l
->lpo_acqtime
> waittime
))
565 l
->lpo_waittime
= l
->lpo_acqtime
- waittime
;
573 lock_profile_thread_exit(struct thread
*td
)
576 struct lock_profile_object
*l
;
578 MPASS(curthread
->td_critnest
== 0);
581 * If lock profiling was disabled we have to wait for reset to
582 * clear our pointers before we can exit safely.
584 lock_prof_reset_wait();
586 LIST_FOREACH(l
, &td
->td_lprof
[0], lpo_link
)
587 printf("thread still holds lock acquired at %s:%d\n",
588 l
->lpo_file
, l
->lpo_line
);
589 LIST_FOREACH(l
, &td
->td_lprof
[1], lpo_link
)
590 printf("thread still holds lock acquired at %s:%d\n",
591 l
->lpo_file
, l
->lpo_line
);
593 MPASS(LIST_FIRST(&td
->td_lprof
[0]) == NULL
);
594 MPASS(LIST_FIRST(&td
->td_lprof
[1]) == NULL
);
598 lock_profile_release_lock(struct lock_object
*lo
)
600 struct lock_profile_object
*l
;
601 struct lock_prof_type
*type
;
602 struct lock_prof
*lp
;
603 uint64_t curtime
, holdtime
;
604 struct lpohead
*head
;
607 if (SCHEDULER_STOPPED())
609 if (lo
->lo_flags
& LO_NOPROFILE
)
611 spin
= (LOCK_CLASS(lo
)->lc_flags
& LC_SPINLOCK
) ? 1 : 0;
612 head
= &curthread
->td_lprof
[spin
];
613 if (LIST_FIRST(head
) == NULL
)
616 /* Recheck enabled now that we're in a critical section. */
617 if (lock_prof_enable
== 0 && lock_prof_resetting
== 1)
620 * If lock profiling is not enabled we still want to remove the
621 * lpo from our queue.
623 LIST_FOREACH(l
, head
, lpo_link
)
624 if (l
->lpo_obj
== lo
)
628 if (--l
->lpo_ref
> 0)
630 lp
= lock_profile_lookup(lo
, spin
, l
->lpo_file
, l
->lpo_line
);
633 curtime
= nanoseconds();
634 if (curtime
< l
->lpo_acqtime
)
636 holdtime
= curtime
- l
->lpo_acqtime
;
639 * Record if the lock has been held longer now than ever
642 if (holdtime
> lp
->cnt_max
)
643 lp
->cnt_max
= holdtime
;
644 if (l
->lpo_waittime
> lp
->cnt_wait_max
)
645 lp
->cnt_wait_max
= l
->lpo_waittime
;
646 lp
->cnt_tot
+= holdtime
;
647 lp
->cnt_wait
+= l
->lpo_waittime
;
648 lp
->cnt_contest_locking
+= l
->lpo_contest_locking
;
649 lp
->cnt_cur
+= l
->lpo_cnt
;
651 LIST_REMOVE(l
, lpo_link
);
652 type
= &lp_cpu
[PCPU_GET(cpuid
)]->lpc_types
[spin
];
653 LIST_INSERT_HEAD(&type
->lpt_lpoalloc
, l
, lpo_link
);
658 static SYSCTL_NODE(_debug
, OID_AUTO
, lock
, CTLFLAG_RD
, NULL
, "lock debugging");
659 static SYSCTL_NODE(_debug_lock
, OID_AUTO
, prof
, CTLFLAG_RD
, NULL
,
661 SYSCTL_INT(_debug_lock_prof
, OID_AUTO
, skipspin
, CTLFLAG_RW
,
662 &lock_prof_skipspin
, 0, "Skip profiling on spinlocks.");
663 SYSCTL_INT(_debug_lock_prof
, OID_AUTO
, skipcount
, CTLFLAG_RW
,
664 &lock_prof_skipcount
, 0, "Sample approximately every N lock acquisitions.");
665 SYSCTL_INT(_debug_lock_prof
, OID_AUTO
, rejected
, CTLFLAG_RD
,
666 &lock_prof_rejected
, 0, "Number of rejected profiling records");
667 SYSCTL_PROC(_debug_lock_prof
, OID_AUTO
, stats
, CTLTYPE_STRING
| CTLFLAG_RD
,
668 NULL
, 0, dump_lock_prof_stats
, "A", "Lock profiling statistics");
669 SYSCTL_PROC(_debug_lock_prof
, OID_AUTO
, reset
, CTLTYPE_INT
| CTLFLAG_RW
,
670 NULL
, 0, reset_lock_prof_stats
, "I", "Reset lock profiling statistics");
671 SYSCTL_PROC(_debug_lock_prof
, OID_AUTO
, enable
, CTLTYPE_INT
| CTLFLAG_RW
,
672 NULL
, 0, enable_lock_prof
, "I", "Enable lock profiling");