2 * Copyright (c) 2009 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * Implement fast persistent locks based on atomic_cmpset_int() with
36 * semantics similar to lockmgr locks but faster and taking up much less
37 * space. Taken from HAMMER's lock implementation.
39 * These are meant to complement our LWKT tokens. Tokens are only held
40 * while the thread is running. Mutexes can be held across blocking
43 * Most of the support is in sys/mutex[2].h. We mostly provide backoff
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/kernel.h>
50 #include <sys/sysctl.h>
51 #include <sys/thread.h>
53 #include <machine/cpufunc.h>
55 #include <sys/thread2.h>
56 #include <sys/mutex2.h>
58 static __int64_t mtx_contention_count
;
59 static __int64_t mtx_collision_count
;
60 static __int64_t mtx_wakeup_count
;
62 SYSCTL_QUAD(_kern
, OID_AUTO
, mtx_contention_count
, CTLFLAG_RW
,
63 &mtx_contention_count
, 0, "");
64 SYSCTL_QUAD(_kern
, OID_AUTO
, mtx_collision_count
, CTLFLAG_RW
,
65 &mtx_collision_count
, 0, "");
66 SYSCTL_QUAD(_kern
, OID_AUTO
, mtx_wakeup_count
, CTLFLAG_RW
,
67 &mtx_wakeup_count
, 0, "");
69 static void mtx_chain_link(mtx_t mtx
);
70 static void mtx_delete_link(mtx_t mtx
, mtx_link_t link
);
73 * Exclusive-lock a mutex, block until acquired. Recursion is allowed.
75 * Returns 0 on success, or the tsleep() return code on failure.
76 * An error can only be returned if PCATCH is specified in the flags.
79 __mtx_lock_ex(mtx_t mtx
, mtx_link_t link
, const char *ident
, int flags
, int to
)
88 nlock
= MTX_EXCLUSIVE
| 1;
89 if (atomic_cmpset_int(&mtx
->mtx_lock
, 0, nlock
)) {
90 mtx
->mtx_owner
= curthread
;
94 } else if ((lock
& MTX_EXCLUSIVE
) &&
95 mtx
->mtx_owner
== curthread
) {
96 KKASSERT((lock
& MTX_MASK
) != MTX_MASK
);
98 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
104 * Clearing MTX_EXLINK in lock causes us to loop until
105 * MTX_EXLINK is available. However, to avoid
106 * unnecessary cpu cache traffic we poll instead.
108 * Setting MTX_EXLINK in nlock causes us to loop until
109 * we can acquire MTX_EXLINK.
111 * Also set MTX_EXWANTED coincident with EXLINK, if
116 if (lock
& MTX_EXLINK
) {
118 ++mtx_collision_count
;
122 /*lock &= ~MTX_EXLINK;*/
123 nlock
= lock
| MTX_EXWANTED
| MTX_EXLINK
;
125 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
127 * Check for early abort
129 if (link
->state
== MTX_LINK_ABORTED
) {
130 atomic_clear_int(&mtx
->mtx_lock
,
134 if (mtx
->mtx_link
== NULL
) {
135 atomic_clear_int(&mtx
->mtx_lock
,
142 * Success. Link in our structure then
143 * release EXLINK and sleep.
146 link
->state
= MTX_LINK_LINKED
;
148 link
->next
= mtx
->mtx_link
;
149 link
->prev
= link
->next
->prev
;
150 link
->next
->prev
= link
;
151 link
->prev
->next
= link
;
155 mtx
->mtx_link
= link
;
157 tsleep_interlock(link
, 0);
158 atomic_clear_int(&mtx
->mtx_lock
, MTX_EXLINK
);
161 error
= tsleep(link
, flags
, ident
, to
);
162 ++mtx_contention_count
;
165 * Normal unlink, we should own the exclusive
168 if (link
->state
== MTX_LINK_LINKED
)
169 mtx_delete_link(mtx
, link
);
170 if (link
->state
== MTX_LINK_ACQUIRED
) {
171 KKASSERT(mtx
->mtx_owner
== link
->owner
);
177 * Aborted lock (mtx_abort_ex called).
179 if (link
->state
== MTX_LINK_ABORTED
) {
185 * tsleep error, else retry.
193 ++mtx_collision_count
;
199 _mtx_lock_ex_link(mtx_t mtx
, mtx_link_t link
,
200 const char *ident
, int flags
, int to
)
202 return(__mtx_lock_ex(mtx
, link
, ident
, flags
, to
));
206 _mtx_lock_ex(mtx_t mtx
, const char *ident
, int flags
, int to
)
208 struct mtx_link link
;
210 mtx_link_init(&link
);
211 return(__mtx_lock_ex(mtx
, &link
, ident
, flags
, to
));
215 _mtx_lock_ex_quick(mtx_t mtx
, const char *ident
)
217 struct mtx_link link
;
219 mtx_link_init(&link
);
220 return(__mtx_lock_ex(mtx
, &link
, ident
, 0, 0));
224 * Share-lock a mutex, block until acquired. Recursion is allowed.
226 * Returns 0 on success, or the tsleep() return code on failure.
227 * An error can only be returned if PCATCH is specified in the flags.
229 * NOTE: Shared locks get a mass-wakeup so if the tsleep fails we
230 * do not have to chain the wakeup().
233 __mtx_lock_sh(mtx_t mtx
, const char *ident
, int flags
, int to
)
240 lock
= mtx
->mtx_lock
;
241 if ((lock
& MTX_EXCLUSIVE
) == 0) {
242 KKASSERT((lock
& MTX_MASK
) != MTX_MASK
);
244 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
249 nlock
= lock
| MTX_SHWANTED
;
250 tsleep_interlock(mtx
, 0);
251 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
252 error
= tsleep(mtx
, flags
, ident
, to
);
255 ++mtx_contention_count
;
258 tsleep_remove(curthread
);
261 ++mtx_collision_count
;
267 _mtx_lock_sh(mtx_t mtx
, const char *ident
, int flags
, int to
)
269 return (__mtx_lock_sh(mtx
, ident
, flags
, to
));
273 _mtx_lock_sh_quick(mtx_t mtx
, const char *ident
)
275 return (__mtx_lock_sh(mtx
, ident
, 0, 0));
279 * Get an exclusive spinlock the hard way.
282 _mtx_spinlock(mtx_t mtx
)
290 lock
= mtx
->mtx_lock
;
292 nlock
= MTX_EXCLUSIVE
| 1;
293 if (atomic_cmpset_int(&mtx
->mtx_lock
, 0, nlock
)) {
294 mtx
->mtx_owner
= curthread
;
297 } else if ((lock
& MTX_EXCLUSIVE
) &&
298 mtx
->mtx_owner
== curthread
) {
299 KKASSERT((lock
& MTX_MASK
) != MTX_MASK
);
301 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
))
308 for (bo
= 0; bo
< bb
; ++bo
)
310 ++mtx_contention_count
;
313 ++mtx_collision_count
;
318 * Attempt to acquire a spinlock, if we fail we must undo the
319 * gd->gd_spinlocks_wr/gd->gd_curthead->td_critcount predisposition.
321 * Returns 0 on success, EAGAIN on failure.
324 _mtx_spinlock_try(mtx_t mtx
)
326 globaldata_t gd
= mycpu
;
332 lock
= mtx
->mtx_lock
;
334 nlock
= MTX_EXCLUSIVE
| 1;
335 if (atomic_cmpset_int(&mtx
->mtx_lock
, 0, nlock
)) {
336 mtx
->mtx_owner
= gd
->gd_curthread
;
339 } else if ((lock
& MTX_EXCLUSIVE
) &&
340 mtx
->mtx_owner
== gd
->gd_curthread
) {
341 KKASSERT((lock
& MTX_MASK
) != MTX_MASK
);
343 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
))
346 --gd
->gd_spinlocks_wr
;
348 --gd
->gd_curthread
->td_critcount
;
353 ++mtx_collision_count
;
361 _mtx_spinlock_sh(mtx_t mtx
)
369 lock
= mtx
->mtx_lock
;
370 if ((lock
& MTX_EXCLUSIVE
) == 0) {
371 KKASSERT((lock
& MTX_MASK
) != MTX_MASK
);
373 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
))
380 for (bo
= 0; bo
< bb
; ++bo
)
382 ++mtx_contention_count
;
385 ++mtx_collision_count
;
392 _mtx_lock_ex_try(mtx_t mtx
)
399 lock
= mtx
->mtx_lock
;
401 nlock
= MTX_EXCLUSIVE
| 1;
402 if (atomic_cmpset_int(&mtx
->mtx_lock
, 0, nlock
)) {
403 mtx
->mtx_owner
= curthread
;
406 } else if ((lock
& MTX_EXCLUSIVE
) &&
407 mtx
->mtx_owner
== curthread
) {
408 KKASSERT((lock
& MTX_MASK
) != MTX_MASK
);
410 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
))
417 ++mtx_collision_count
;
423 _mtx_lock_sh_try(mtx_t mtx
)
430 lock
= mtx
->mtx_lock
;
431 if ((lock
& MTX_EXCLUSIVE
) == 0) {
432 KKASSERT((lock
& MTX_MASK
) != MTX_MASK
);
434 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
))
441 ++mtx_collision_count
;
447 * If the lock is held exclusively it must be owned by the caller. If the
448 * lock is already a shared lock this operation is a NOP. A panic will
449 * occur if the lock is not held either shared or exclusive.
451 * The exclusive count is converted to a shared count.
454 _mtx_downgrade(mtx_t mtx
)
460 lock
= mtx
->mtx_lock
;
461 if ((lock
& MTX_EXCLUSIVE
) == 0) {
462 KKASSERT((lock
& MTX_MASK
) > 0);
465 KKASSERT(mtx
->mtx_owner
== curthread
);
466 nlock
= lock
& ~(MTX_EXCLUSIVE
| MTX_SHWANTED
);
467 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
468 if (lock
& MTX_SHWANTED
) {
475 ++mtx_collision_count
;
480 * Upgrade a shared lock to an exclusive lock. The upgrade will fail if
481 * the shared lock has a count other then 1. Optimize the most likely case
482 * but note that a single cmpset can fail due to WANTED races.
484 * If the lock is held exclusively it must be owned by the caller and
485 * this function will simply return without doing anything. A panic will
486 * occur if the lock is held exclusively by someone other then the caller.
488 * Returns 0 on success, EDEADLK on failure.
491 _mtx_upgrade_try(mtx_t mtx
)
498 lock
= mtx
->mtx_lock
;
500 if ((lock
& ~MTX_EXWANTED
) == 1) {
501 nlock
= lock
| MTX_EXCLUSIVE
;
502 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
503 mtx
->mtx_owner
= curthread
;
506 } else if (lock
& MTX_EXCLUSIVE
) {
507 KKASSERT(mtx
->mtx_owner
== curthread
);
514 ++mtx_collision_count
;
520 * Unlock a lock. The caller must hold the lock either shared or exclusive.
522 * Any release which makes the lock available when others want an exclusive
523 * lock causes us to chain the owner to the next exclusive lock instead of
524 * releasing the lock.
527 _mtx_unlock(mtx_t mtx
)
533 lock
= mtx
->mtx_lock
;
534 nlock
= lock
& ~(MTX_SHWANTED
| MTX_EXLINK
);
538 * Last release, shared lock, no exclusive waiters.
540 nlock
= lock
& MTX_EXLINK
;
541 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
))
543 } else if (nlock
== (MTX_EXCLUSIVE
| 1)) {
545 * Last release, exclusive lock, no exclusive waiters.
546 * Wake up any shared waiters.
548 mtx
->mtx_owner
= NULL
;
549 nlock
= lock
& MTX_EXLINK
;
550 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
551 if (lock
& MTX_SHWANTED
) {
557 } else if (nlock
== (MTX_EXWANTED
| 1)) {
559 * Last release, shared lock, with exclusive
562 * Wait for EXLINK to clear, then acquire it.
563 * We could use the cmpset for this but polling
564 * is better on the cpu caches.
566 * Acquire an exclusive lock leaving the lockcount
567 * set to 1, and get EXLINK for access to mtx_link.
571 if (lock
& MTX_EXLINK
) {
573 ++mtx_collision_count
;
577 /*lock &= ~MTX_EXLINK;*/
578 nlock
|= MTX_EXLINK
| MTX_EXCLUSIVE
;
579 nlock
|= (lock
& MTX_SHWANTED
);
581 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
587 } else if (nlock
== (MTX_EXCLUSIVE
| MTX_EXWANTED
| 1)) {
589 * Last release, exclusive lock, with exclusive
592 * leave the exclusive lock intact and the lockcount
593 * set to 1, and get EXLINK for access to mtx_link.
597 if (lock
& MTX_EXLINK
) {
599 ++mtx_collision_count
;
603 /*lock &= ~MTX_EXLINK;*/
605 nlock
|= (lock
& MTX_SHWANTED
);
607 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
615 * Not the last release (shared or exclusive)
618 KKASSERT((nlock
& MTX_MASK
) != MTX_MASK
);
619 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
))
623 ++mtx_collision_count
;
628 * Chain mtx_chain_link. Called with the lock held exclusively with a
629 * single ref count, and also with MTX_EXLINK held.
632 mtx_chain_link(mtx_t mtx
)
637 u_int clock
; /* bits we own and want to clear */
640 * Chain the exclusive lock to the next link. The caller cleared
641 * SHWANTED so if there is no link we have to wake up any shared
645 if ((link
= mtx
->mtx_link
) != NULL
) {
646 KKASSERT(link
->state
== MTX_LINK_LINKED
);
647 if (link
->next
== link
) {
648 mtx
->mtx_link
= NULL
;
649 clock
|= MTX_EXWANTED
;
651 mtx
->mtx_link
= link
->next
;
652 link
->next
->prev
= link
->prev
;
653 link
->prev
->next
= link
->next
;
655 link
->state
= MTX_LINK_ACQUIRED
;
656 mtx
->mtx_owner
= link
->owner
;
659 * Chain was empty, release the exclusive lock's last count
660 * as well the bits shown.
662 clock
|= MTX_EXCLUSIVE
| MTX_EXWANTED
| MTX_SHWANTED
| 1;
666 * We have to uset cmpset here to deal with MTX_SHWANTED. If
667 * we just clear the bits we can miss a wakeup or, worse,
668 * leave mtx_lock unlocked with MTX_SHWANTED still set.
671 lock
= mtx
->mtx_lock
;
672 nlock
= lock
& ~clock
;
674 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
)) {
677 * Wakeup new exclusive holder. Leave
681 } else if (lock
& MTX_SHWANTED
) {
683 * Signal any shared waiters (and we also
686 mtx
->mtx_owner
= NULL
;
693 ++mtx_collision_count
;
698 * Delete a link structure after tsleep has failed. This code is not
699 * in the critical path as most exclusive waits are chained.
703 mtx_delete_link(mtx_t mtx
, mtx_link_t link
)
705 thread_t td
= curthread
;
710 * Acquire MTX_EXLINK.
712 * Do not use cmpxchg to wait for EXLINK to clear as this might
713 * result in too much cpu cache traffic.
717 lock
= mtx
->mtx_lock
;
718 if (lock
& MTX_EXLINK
) {
720 ++mtx_collision_count
;
723 /* lock &= ~MTX_EXLINK; */
724 nlock
= lock
| MTX_EXLINK
;
725 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
))
728 ++mtx_collision_count
;
732 * Delete the link and release EXLINK.
734 if (link
->state
== MTX_LINK_LINKED
) {
735 if (link
->next
== link
) {
736 mtx
->mtx_link
= NULL
;
738 mtx
->mtx_link
= link
->next
;
739 link
->next
->prev
= link
->prev
;
740 link
->prev
->next
= link
->next
;
742 link
->state
= MTX_LINK_IDLE
;
744 atomic_clear_int(&mtx
->mtx_lock
, MTX_EXLINK
);
749 * Abort a mutex locking operation, causing mtx_lock_ex_link() to
750 * return ENOLCK. This may be called at any time after the
751 * mtx_link is initialized, including both before and after the call
752 * to mtx_lock_ex_link().
755 mtx_abort_ex_link(mtx_t mtx
, mtx_link_t link
)
757 thread_t td
= curthread
;
766 lock
= mtx
->mtx_lock
;
767 if (lock
& MTX_EXLINK
) {
769 ++mtx_collision_count
;
772 /* lock &= ~MTX_EXLINK; */
773 nlock
= lock
| MTX_EXLINK
;
774 if (atomic_cmpset_int(&mtx
->mtx_lock
, lock
, nlock
))
777 ++mtx_collision_count
;
783 switch(link
->state
) {
786 * Link not started yet
788 link
->state
= MTX_LINK_ABORTED
;
790 case MTX_LINK_LINKED
:
792 * de-link, mark aborted, and wakeup the thread.
794 if (link
->next
== link
) {
795 mtx
->mtx_link
= NULL
;
797 mtx
->mtx_link
= link
->next
;
798 link
->next
->prev
= link
->prev
;
799 link
->prev
->next
= link
->next
;
801 link
->state
= MTX_LINK_ABORTED
;
804 case MTX_LINK_ACQUIRED
:
806 * Too late, the lock was acquired. Let it complete.
811 * link already aborted, do nothing.
815 atomic_clear_int(&mtx
->mtx_lock
, MTX_EXLINK
);