2 * Copyright (c) 2009 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * Implement fast persistent locks based on atomic_cmpset_int() with
36 * semantics similar to lockmgr locks but faster and taking up much less
37 * space. Taken from HAMMER's lock implementation.
39 * These are meant to complement our LWKT tokens. Tokens are only held
40 * while the thread is running. Mutexes can be held across blocking
43 * Most of the support is in sys/mutex[2].h. We mostly provide backoff
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/kernel.h>
50 #include <sys/sysctl.h>
51 #include <sys/thread.h>
52 #include <sys/mutex.h>
54 #include <machine/cpufunc.h>
56 #include <sys/thread2.h>
57 #include <sys/mutex2.h>
59 static __int64_t mtx_contention_count
;
60 static __int64_t mtx_collision_count
;
61 static __int64_t mtx_wakeup_count
;
63 SYSCTL_QUAD(_kern
, OID_AUTO
, mtx_contention_count
, CTLFLAG_RW
,
64 &mtx_contention_count
, 0, "");
65 SYSCTL_QUAD(_kern
, OID_AUTO
, mtx_collision_count
, CTLFLAG_RW
,
66 &mtx_collision_count
, 0, "");
67 SYSCTL_QUAD(_kern
, OID_AUTO
, mtx_wakeup_count
, CTLFLAG_RW
,
68 &mtx_wakeup_count
, 0, "");
70 static void mtx_chain_link(mtx_t mtx
);
71 static void mtx_delete_link(mtx_t mtx
, mtx_link_t link
);
74 * Exclusive-lock a mutex, block until acquired. Recursion is allowed.
76 * Returns 0 on success, or the tsleep() return code on failure.
77 * An error can only be returned if PCATCH is specified in the flags.
80 __mtx_lock_ex(mtx_t mtx
, mtx_link_t link
, const char *ident
, int flags
, int to
)
89 nlock
= MTX_EXCLUSIVE
| 1;
90 if (atomic_cmpset_int(&mtx
->mtx_lock
, 0, nlock
)) {
91 mtx
->mtx_owner
= curthread
;
95 } else if ((lock
& MTX_EXCLUSIVE
) &&
96 mtx
->mtx_owner
== curthread
) {
97 KKASSERT((lock
& MTX_MASK
) != MTX_MASK
);
99 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
105 * Clearing MTX_EXLINK in lock causes us to loop until
106 * MTX_EXLINK is available. However, to avoid
107 * unnecessary cpu cache traffic we poll instead.
109 * Setting MTX_EXLINK in nlock causes us to loop until
110 * we can acquire MTX_EXLINK.
112 * Also set MTX_EXWANTED coincident with EXLINK, if
115 if (lock
& MTX_EXLINK
) {
117 ++mtx_collision_count
;
120 /*lock &= ~MTX_EXLINK;*/
121 nlock
= lock
| MTX_EXWANTED
| MTX_EXLINK
;
122 ++mycpu
->gd_spinlocks_wr
;
123 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
125 * Check for early abort
127 if (link
->state
== MTX_LINK_ABORTED
) {
128 atomic_clear_int(&mtx
->mtx_lock
,
130 --mycpu
->gd_spinlocks_wr
;
132 if (mtx
->mtx_link
== NULL
) {
133 atomic_clear_int(&mtx
->mtx_lock
,
140 * Success. Link in our structure then
141 * release EXLINK and sleep.
143 link
->owner
= curthread
;
144 link
->state
= MTX_LINK_LINKED
;
146 link
->next
= mtx
->mtx_link
;
147 link
->prev
= link
->next
->prev
;
148 link
->next
->prev
= link
;
149 link
->prev
->next
= link
;
153 mtx
->mtx_link
= link
;
155 tsleep_interlock(link
, 0);
156 atomic_clear_int(&mtx
->mtx_lock
, MTX_EXLINK
);
157 --mycpu
->gd_spinlocks_wr
;
159 error
= tsleep(link
, flags
, ident
, to
);
160 ++mtx_contention_count
;
163 * Normal unlink, we should own the exclusive
166 if (link
->state
== MTX_LINK_LINKED
)
167 mtx_delete_link(mtx
, link
);
168 if (link
->state
== MTX_LINK_ACQUIRED
) {
169 KKASSERT(mtx
->mtx_owner
== link
->owner
);
175 * Aborted lock (mtx_abort_ex called).
177 if (link
->state
== MTX_LINK_ABORTED
) {
183 * tsleep error, else retry.
188 --mycpu
->gd_spinlocks_wr
;
191 ++mtx_collision_count
;
197 _mtx_lock_ex_link(mtx_t mtx
, mtx_link_t link
,
198 const char *ident
, int flags
, int to
)
200 return(__mtx_lock_ex(mtx
, link
, ident
, flags
, to
));
204 _mtx_lock_ex(mtx_t mtx
, const char *ident
, int flags
, int to
)
206 struct mtx_link link
;
208 mtx_link_init(&link
);
209 return(__mtx_lock_ex(mtx
, &link
, ident
, flags
, to
));
213 _mtx_lock_ex_quick(mtx_t mtx
, const char *ident
)
215 struct mtx_link link
;
217 mtx_link_init(&link
);
218 return(__mtx_lock_ex(mtx
, &link
, ident
, 0, 0));
222 * Share-lock a mutex, block until acquired. Recursion is allowed.
224 * Returns 0 on success, or the tsleep() return code on failure.
225 * An error can only be returned if PCATCH is specified in the flags.
227 * NOTE: Shared locks get a mass-wakeup so if the tsleep fails we
228 * do not have to chain the wakeup().
231 __mtx_lock_sh(mtx_t mtx
, const char *ident
, int flags
, int to
)
238 lock
= mtx
->mtx_lock
;
239 if ((lock
& MTX_EXCLUSIVE
) == 0) {
240 KKASSERT((lock
& MTX_MASK
) != MTX_MASK
);
242 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
247 nlock
= lock
| MTX_SHWANTED
;
248 tsleep_interlock(mtx
, 0);
249 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
250 error
= tsleep(mtx
, flags
, ident
, to
);
253 ++mtx_contention_count
;
256 tsleep_remove(curthread
);
259 ++mtx_collision_count
;
265 _mtx_lock_sh(mtx_t mtx
, const char *ident
, int flags
, int to
)
267 return (__mtx_lock_sh(mtx
, ident
, flags
, to
));
271 _mtx_lock_sh_quick(mtx_t mtx
, const char *ident
)
273 return (__mtx_lock_sh(mtx
, ident
, 0, 0));
277 _mtx_spinlock_ex(mtx_t mtx
)
285 lock
= mtx
->mtx_lock
;
287 nlock
= MTX_EXCLUSIVE
| 1;
288 if (atomic_cmpset_int(&mtx
->mtx_lock
, 0, nlock
)) {
289 mtx
->mtx_owner
= curthread
;
292 } else if ((lock
& MTX_EXCLUSIVE
) &&
293 mtx
->mtx_owner
== curthread
) {
294 KKASSERT((lock
& MTX_MASK
) != MTX_MASK
);
296 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
))
303 for (bo
= 0; bo
< bb
; ++bo
)
305 ++mtx_contention_count
;
308 ++mtx_collision_count
;
313 _mtx_spinlock_sh(mtx_t mtx
)
321 lock
= mtx
->mtx_lock
;
322 if ((lock
& MTX_EXCLUSIVE
) == 0) {
323 KKASSERT((lock
& MTX_MASK
) != MTX_MASK
);
325 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
))
332 for (bo
= 0; bo
< bb
; ++bo
)
334 ++mtx_contention_count
;
337 ++mtx_collision_count
;
342 _mtx_lock_ex_try(mtx_t mtx
)
349 lock
= mtx
->mtx_lock
;
351 nlock
= MTX_EXCLUSIVE
| 1;
352 if (atomic_cmpset_int(&mtx
->mtx_lock
, 0, nlock
)) {
353 mtx
->mtx_owner
= curthread
;
356 } else if ((lock
& MTX_EXCLUSIVE
) &&
357 mtx
->mtx_owner
== curthread
) {
358 KKASSERT((lock
& MTX_MASK
) != MTX_MASK
);
360 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
))
367 ++mtx_collision_count
;
373 _mtx_lock_sh_try(mtx_t mtx
)
380 lock
= mtx
->mtx_lock
;
381 if ((lock
& MTX_EXCLUSIVE
) == 0) {
382 KKASSERT((lock
& MTX_MASK
) != MTX_MASK
);
384 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
))
391 ++mtx_collision_count
;
397 * If the lock is held exclusively it must be owned by the caller. If the
398 * lock is already a shared lock this operation is a NOP. A panic will
399 * occur if the lock is not held either shared or exclusive.
401 * The exclusive count is converted to a shared count.
404 _mtx_downgrade(mtx_t mtx
)
410 lock
= mtx
->mtx_lock
;
411 if ((lock
& MTX_EXCLUSIVE
) == 0) {
412 KKASSERT((lock
& MTX_MASK
) > 0);
415 KKASSERT(mtx
->mtx_owner
== curthread
);
416 nlock
= lock
& ~(MTX_EXCLUSIVE
| MTX_SHWANTED
);
417 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
418 if (lock
& MTX_SHWANTED
) {
425 ++mtx_collision_count
;
430 * Upgrade a shared lock to an exclusive lock. The upgrade will fail if
431 * the shared lock has a count other then 1. Optimize the most likely case
432 * but note that a single cmpset can fail due to WANTED races.
434 * If the lock is held exclusively it must be owned by the caller and
435 * this function will simply return without doing anything. A panic will
436 * occur if the lock is held exclusively by someone other then the caller.
438 * Returns 0 on success, EDEADLK on failure.
441 _mtx_upgrade_try(mtx_t mtx
)
448 lock
= mtx
->mtx_lock
;
450 if ((lock
& ~MTX_EXWANTED
) == 1) {
451 nlock
= lock
| MTX_EXCLUSIVE
;
452 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
453 mtx
->mtx_owner
= curthread
;
456 } else if (lock
& MTX_EXCLUSIVE
) {
457 KKASSERT(mtx
->mtx_owner
== curthread
);
464 ++mtx_collision_count
;
470 * Unlock a lock. The caller must hold the lock either shared or exclusive.
472 * Any release which makes the lock available when others want an exclusive
473 * lock causes us to chain the owner to the next exclusive lock instead of
474 * releasing the lock.
477 _mtx_unlock(mtx_t mtx
)
483 lock
= mtx
->mtx_lock
;
484 nlock
= lock
& ~(MTX_SHWANTED
| MTX_EXLINK
);
488 * Last release, shared lock, no exclusive waiters.
490 nlock
= lock
& MTX_EXLINK
;
491 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
))
493 } else if (nlock
== (MTX_EXCLUSIVE
| 1)) {
495 * Last release, exclusive lock, no exclusive waiters.
496 * Wake up any shared waiters.
498 mtx
->mtx_owner
= NULL
;
499 nlock
= lock
& MTX_EXLINK
;
500 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
501 if (lock
& MTX_SHWANTED
) {
507 } else if (nlock
== (MTX_EXWANTED
| 1)) {
509 * Last release, shared lock, with exclusive
512 * Wait for EXLINK to clear, then acquire it.
513 * We could use the cmpset for this but polling
514 * is better on the cpu caches.
516 * Acquire an exclusive lock leaving the lockcount
517 * set to 1, and get EXLINK for access to mtx_link.
519 if (lock
& MTX_EXLINK
) {
521 ++mtx_collision_count
;
524 /*lock &= ~MTX_EXLINK;*/
525 nlock
|= MTX_EXLINK
| MTX_EXCLUSIVE
;
526 nlock
|= (lock
& MTX_SHWANTED
);
527 ++mycpu
->gd_spinlocks_wr
;
528 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
530 --mycpu
->gd_spinlocks_wr
;
533 --mycpu
->gd_spinlocks_wr
;
534 } else if (nlock
== (MTX_EXCLUSIVE
| MTX_EXWANTED
| 1)) {
536 * Last release, exclusive lock, with exclusive
539 * leave the exclusive lock intact and the lockcount
540 * set to 1, and get EXLINK for access to mtx_link.
542 if (lock
& MTX_EXLINK
) {
544 ++mtx_collision_count
;
547 /*lock &= ~MTX_EXLINK;*/
549 nlock
|= (lock
& MTX_SHWANTED
);
550 ++mycpu
->gd_spinlocks_wr
;
551 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
553 --mycpu
->gd_spinlocks_wr
;
556 --mycpu
->gd_spinlocks_wr
;
559 * Not the last release (shared or exclusive)
562 KKASSERT((nlock
& MTX_MASK
) != MTX_MASK
);
563 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
))
567 ++mtx_collision_count
;
572 * Chain mtx_chain_link. Called with the lock held exclusively with a
573 * single ref count, and also with MTX_EXLINK held.
576 mtx_chain_link(mtx_t mtx
)
581 u_int clock
; /* bits we own and want to clear */
584 * Chain the exclusive lock to the next link. The caller cleared
585 * SHWANTED so if there is no link we have to wake up any shared
589 if ((link
= mtx
->mtx_link
) != NULL
) {
590 KKASSERT(link
->state
== MTX_LINK_LINKED
);
591 if (link
->next
== link
) {
592 mtx
->mtx_link
= NULL
;
593 clock
|= MTX_EXWANTED
;
595 mtx
->mtx_link
= link
->next
;
596 link
->next
->prev
= link
->prev
;
597 link
->prev
->next
= link
->next
;
599 link
->state
= MTX_LINK_ACQUIRED
;
600 mtx
->mtx_owner
= link
->owner
;
603 * Chain was empty, release the exclusive lock's last count
604 * as well the bits shown.
606 clock
|= MTX_EXCLUSIVE
| MTX_EXWANTED
| MTX_SHWANTED
| 1;
610 * We have to uset cmpset here to deal with MTX_SHWANTED. If
611 * we just clear the bits we can miss a wakeup or, worse,
612 * leave mtx_lock unlocked with MTX_SHWANTED still set.
615 lock
= mtx
->mtx_lock
;
616 nlock
= lock
& ~clock
;
618 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
621 * Wakeup new exclusive holder. Leave
625 } else if (lock
& MTX_SHWANTED
) {
627 * Signal any shared waiters (and we also
630 mtx
->mtx_owner
= NULL
;
637 ++mtx_collision_count
;
642 * Delete a link structure after tsleep has failed. This code is not
643 * in the critical path as most exclusive waits are chained.
647 mtx_delete_link(mtx_t mtx
, mtx_link_t link
)
653 * Acquire MTX_EXLINK.
655 * Do not use cmpxchg to wait for EXLINK to clear as this might
656 * result in too much cpu cache traffic.
658 ++mycpu
->gd_spinlocks_wr
;
660 lock
= mtx
->mtx_lock
;
661 if (lock
& MTX_EXLINK
) {
663 ++mtx_collision_count
;
666 /* lock &= ~MTX_EXLINK; */
667 nlock
= lock
| MTX_EXLINK
;
668 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
))
671 ++mtx_collision_count
;
675 * Delete the link and release EXLINK.
677 if (link
->state
== MTX_LINK_LINKED
) {
678 if (link
->next
== link
) {
679 mtx
->mtx_link
= NULL
;
681 mtx
->mtx_link
= link
->next
;
682 link
->next
->prev
= link
->prev
;
683 link
->prev
->next
= link
->next
;
685 link
->state
= MTX_LINK_IDLE
;
687 atomic_clear_int(&mtx
->mtx_lock
, MTX_EXLINK
);
688 --mycpu
->gd_spinlocks_wr
;
692 * Abort a mutex locking operation, causing mtx_lock_ex_link() to
693 * return ENOLCK. This may be called at any time after the
694 * mtx_link is initialized, including both before and after the call
695 * to mtx_lock_ex_link().
698 mtx_abort_ex_link(mtx_t mtx
, mtx_link_t link
)
706 ++mycpu
->gd_spinlocks_wr
;
708 lock
= mtx
->mtx_lock
;
709 if (lock
& MTX_EXLINK
) {
711 ++mtx_collision_count
;
714 /* lock &= ~MTX_EXLINK; */
715 nlock
= lock
| MTX_EXLINK
;
716 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
))
719 ++mtx_collision_count
;
725 switch(link
->state
) {
728 * Link not started yet
730 link
->state
= MTX_LINK_ABORTED
;
732 case MTX_LINK_LINKED
:
734 * de-link, mark aborted, and wakeup the thread.
736 if (link
->next
== link
) {
737 mtx
->mtx_link
= NULL
;
739 mtx
->mtx_link
= link
->next
;
740 link
->next
->prev
= link
->prev
;
741 link
->prev
->next
= link
->next
;
743 link
->state
= MTX_LINK_ABORTED
;
746 case MTX_LINK_ACQUIRED
:
748 * Too late, the lock was acquired. Let it complete.
753 * link already aborted, do nothing.
757 atomic_clear_int(&mtx
->mtx_lock
, MTX_EXLINK
);
758 --mycpu
->gd_spinlocks_wr
;