kernel - revamp mtx_spinlock()
[dragonfly.git] / sys / kern / kern_mutex.c
blobf58b98ebe480cdc2d48c83c0bc81f848622f3066
1 /*
2 * Copyright (c) 2009 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 * Implement fast persistent locks based on atomic_cmpset_int() with
36 * semantics similar to lockmgr locks but faster and taking up much less
37 * space. Taken from HAMMER's lock implementation.
39 * These are meant to complement our LWKT tokens. Tokens are only held
40 * while the thread is running. Mutexes can be held across blocking
41 * conditions.
43 * Most of the support is in sys/mutex[2].h. We mostly provide backoff
44 * functions here.
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/kernel.h>
50 #include <sys/sysctl.h>
51 #include <sys/thread.h>
53 #include <machine/cpufunc.h>
55 #include <sys/thread2.h>
56 #include <sys/mutex2.h>
58 static __int64_t mtx_contention_count;
59 static __int64_t mtx_collision_count;
60 static __int64_t mtx_wakeup_count;
62 SYSCTL_QUAD(_kern, OID_AUTO, mtx_contention_count, CTLFLAG_RW,
63 &mtx_contention_count, 0, "");
64 SYSCTL_QUAD(_kern, OID_AUTO, mtx_collision_count, CTLFLAG_RW,
65 &mtx_collision_count, 0, "");
66 SYSCTL_QUAD(_kern, OID_AUTO, mtx_wakeup_count, CTLFLAG_RW,
67 &mtx_wakeup_count, 0, "");
69 static void mtx_chain_link(mtx_t mtx);
70 static void mtx_delete_link(mtx_t mtx, mtx_link_t link);
73 * Exclusive-lock a mutex, block until acquired. Recursion is allowed.
75 * Returns 0 on success, or the tsleep() return code on failure.
76 * An error can only be returned if PCATCH is specified in the flags.
78 static __inline int
79 __mtx_lock_ex(mtx_t mtx, mtx_link_t link, const char *ident, int flags, int to)
81 u_int lock;
82 u_int nlock;
83 int error;
85 for (;;) {
86 lock = mtx->mtx_lock;
87 if (lock == 0) {
88 nlock = MTX_EXCLUSIVE | 1;
89 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
90 mtx->mtx_owner = curthread;
91 error = 0;
92 break;
94 } else if ((lock & MTX_EXCLUSIVE) &&
95 mtx->mtx_owner == curthread) {
96 KKASSERT((lock & MTX_MASK) != MTX_MASK);
97 nlock = lock + 1;
98 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
99 error = 0;
100 break;
102 } else {
104 * Clearing MTX_EXLINK in lock causes us to loop until
105 * MTX_EXLINK is available. However, to avoid
106 * unnecessary cpu cache traffic we poll instead.
108 * Setting MTX_EXLINK in nlock causes us to loop until
109 * we can acquire MTX_EXLINK.
111 * Also set MTX_EXWANTED coincident with EXLINK, if
112 * not already set.
114 thread_t td;
116 if (lock & MTX_EXLINK) {
117 cpu_pause();
118 ++mtx_collision_count;
119 continue;
121 td = curthread;
122 /*lock &= ~MTX_EXLINK;*/
123 nlock = lock | MTX_EXWANTED | MTX_EXLINK;
124 ++td->td_critcount;
125 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
127 * Check for early abort
129 if (link->state == MTX_LINK_ABORTED) {
130 atomic_clear_int(&mtx->mtx_lock,
131 MTX_EXLINK);
132 --td->td_critcount;
133 error = ENOLCK;
134 if (mtx->mtx_link == NULL) {
135 atomic_clear_int(&mtx->mtx_lock,
136 MTX_EXWANTED);
138 break;
142 * Success. Link in our structure then
143 * release EXLINK and sleep.
145 link->owner = td;
146 link->state = MTX_LINK_LINKED;
147 if (mtx->mtx_link) {
148 link->next = mtx->mtx_link;
149 link->prev = link->next->prev;
150 link->next->prev = link;
151 link->prev->next = link;
152 } else {
153 link->next = link;
154 link->prev = link;
155 mtx->mtx_link = link;
157 tsleep_interlock(link, 0);
158 atomic_clear_int(&mtx->mtx_lock, MTX_EXLINK);
159 --td->td_critcount;
161 error = tsleep(link, flags, ident, to);
162 ++mtx_contention_count;
165 * Normal unlink, we should own the exclusive
166 * lock now.
168 if (link->state == MTX_LINK_LINKED)
169 mtx_delete_link(mtx, link);
170 if (link->state == MTX_LINK_ACQUIRED) {
171 KKASSERT(mtx->mtx_owner == link->owner);
172 error = 0;
173 break;
177 * Aborted lock (mtx_abort_ex called).
179 if (link->state == MTX_LINK_ABORTED) {
180 error = ENOLCK;
181 break;
185 * tsleep error, else retry.
187 if (error)
188 break;
189 } else {
190 --td->td_critcount;
193 ++mtx_collision_count;
195 return (error);
199 _mtx_lock_ex_link(mtx_t mtx, mtx_link_t link,
200 const char *ident, int flags, int to)
202 return(__mtx_lock_ex(mtx, link, ident, flags, to));
206 _mtx_lock_ex(mtx_t mtx, const char *ident, int flags, int to)
208 struct mtx_link link;
210 mtx_link_init(&link);
211 return(__mtx_lock_ex(mtx, &link, ident, flags, to));
215 _mtx_lock_ex_quick(mtx_t mtx, const char *ident)
217 struct mtx_link link;
219 mtx_link_init(&link);
220 return(__mtx_lock_ex(mtx, &link, ident, 0, 0));
224 * Share-lock a mutex, block until acquired. Recursion is allowed.
226 * Returns 0 on success, or the tsleep() return code on failure.
227 * An error can only be returned if PCATCH is specified in the flags.
229 * NOTE: Shared locks get a mass-wakeup so if the tsleep fails we
230 * do not have to chain the wakeup().
232 static __inline int
233 __mtx_lock_sh(mtx_t mtx, const char *ident, int flags, int to)
235 u_int lock;
236 u_int nlock;
237 int error;
239 for (;;) {
240 lock = mtx->mtx_lock;
241 if ((lock & MTX_EXCLUSIVE) == 0) {
242 KKASSERT((lock & MTX_MASK) != MTX_MASK);
243 nlock = lock + 1;
244 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
245 error = 0;
246 break;
248 } else {
249 nlock = lock | MTX_SHWANTED;
250 tsleep_interlock(mtx, 0);
251 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
252 error = tsleep(mtx, flags, ident, to);
253 if (error)
254 break;
255 ++mtx_contention_count;
256 /* retry */
257 } else {
258 tsleep_remove(curthread);
261 ++mtx_collision_count;
263 return (error);
267 _mtx_lock_sh(mtx_t mtx, const char *ident, int flags, int to)
269 return (__mtx_lock_sh(mtx, ident, flags, to));
273 _mtx_lock_sh_quick(mtx_t mtx, const char *ident)
275 return (__mtx_lock_sh(mtx, ident, 0, 0));
279 * Get an exclusive spinlock the hard way.
281 void
282 _mtx_spinlock(mtx_t mtx)
284 u_int lock;
285 u_int nlock;
286 int bb = 1;
287 int bo;
289 for (;;) {
290 lock = mtx->mtx_lock;
291 if (lock == 0) {
292 nlock = MTX_EXCLUSIVE | 1;
293 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
294 mtx->mtx_owner = curthread;
295 break;
297 } else if ((lock & MTX_EXCLUSIVE) &&
298 mtx->mtx_owner == curthread) {
299 KKASSERT((lock & MTX_MASK) != MTX_MASK);
300 nlock = lock + 1;
301 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
302 break;
303 } else {
304 /* MWAIT here */
305 if (bb < 1000)
306 ++bb;
307 cpu_pause();
308 for (bo = 0; bo < bb; ++bo)
310 ++mtx_contention_count;
312 cpu_pause();
313 ++mtx_collision_count;
318 * Attempt to acquire a spinlock, if we fail we must undo the
319 * gd->gd_spinlocks_wr/gd->gd_curthead->td_critcount predisposition.
321 * Returns 0 on success, EAGAIN on failure.
324 _mtx_spinlock_try(mtx_t mtx)
326 globaldata_t gd = mycpu;
327 u_int lock;
328 u_int nlock;
329 int res = 0;
331 for (;;) {
332 lock = mtx->mtx_lock;
333 if (lock == 0) {
334 nlock = MTX_EXCLUSIVE | 1;
335 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
336 mtx->mtx_owner = gd->gd_curthread;
337 break;
339 } else if ((lock & MTX_EXCLUSIVE) &&
340 mtx->mtx_owner == gd->gd_curthread) {
341 KKASSERT((lock & MTX_MASK) != MTX_MASK);
342 nlock = lock + 1;
343 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
344 break;
345 } else {
346 --gd->gd_spinlocks_wr;
347 cpu_ccfence();
348 --gd->gd_curthread->td_critcount;
349 res = EAGAIN;
350 break;
352 cpu_pause();
353 ++mtx_collision_count;
355 return res;
358 #if 0
360 void
361 _mtx_spinlock_sh(mtx_t mtx)
363 u_int lock;
364 u_int nlock;
365 int bb = 1;
366 int bo;
368 for (;;) {
369 lock = mtx->mtx_lock;
370 if ((lock & MTX_EXCLUSIVE) == 0) {
371 KKASSERT((lock & MTX_MASK) != MTX_MASK);
372 nlock = lock + 1;
373 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
374 break;
375 } else {
376 /* MWAIT here */
377 if (bb < 1000)
378 ++bb;
379 cpu_pause();
380 for (bo = 0; bo < bb; ++bo)
382 ++mtx_contention_count;
384 cpu_pause();
385 ++mtx_collision_count;
389 #endif
392 _mtx_lock_ex_try(mtx_t mtx)
394 u_int lock;
395 u_int nlock;
396 int error = 0;
398 for (;;) {
399 lock = mtx->mtx_lock;
400 if (lock == 0) {
401 nlock = MTX_EXCLUSIVE | 1;
402 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
403 mtx->mtx_owner = curthread;
404 break;
406 } else if ((lock & MTX_EXCLUSIVE) &&
407 mtx->mtx_owner == curthread) {
408 KKASSERT((lock & MTX_MASK) != MTX_MASK);
409 nlock = lock + 1;
410 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
411 break;
412 } else {
413 error = EAGAIN;
414 break;
416 cpu_pause();
417 ++mtx_collision_count;
419 return (error);
423 _mtx_lock_sh_try(mtx_t mtx)
425 u_int lock;
426 u_int nlock;
427 int error = 0;
429 for (;;) {
430 lock = mtx->mtx_lock;
431 if ((lock & MTX_EXCLUSIVE) == 0) {
432 KKASSERT((lock & MTX_MASK) != MTX_MASK);
433 nlock = lock + 1;
434 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
435 break;
436 } else {
437 error = EAGAIN;
438 break;
440 cpu_pause();
441 ++mtx_collision_count;
443 return (error);
447 * If the lock is held exclusively it must be owned by the caller. If the
448 * lock is already a shared lock this operation is a NOP. A panic will
449 * occur if the lock is not held either shared or exclusive.
451 * The exclusive count is converted to a shared count.
453 void
454 _mtx_downgrade(mtx_t mtx)
456 u_int lock;
457 u_int nlock;
459 for (;;) {
460 lock = mtx->mtx_lock;
461 if ((lock & MTX_EXCLUSIVE) == 0) {
462 KKASSERT((lock & MTX_MASK) > 0);
463 break;
465 KKASSERT(mtx->mtx_owner == curthread);
466 nlock = lock & ~(MTX_EXCLUSIVE | MTX_SHWANTED);
467 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
468 if (lock & MTX_SHWANTED) {
469 wakeup(mtx);
470 ++mtx_wakeup_count;
472 break;
474 cpu_pause();
475 ++mtx_collision_count;
480 * Upgrade a shared lock to an exclusive lock. The upgrade will fail if
481 * the shared lock has a count other then 1. Optimize the most likely case
482 * but note that a single cmpset can fail due to WANTED races.
484 * If the lock is held exclusively it must be owned by the caller and
485 * this function will simply return without doing anything. A panic will
486 * occur if the lock is held exclusively by someone other then the caller.
488 * Returns 0 on success, EDEADLK on failure.
491 _mtx_upgrade_try(mtx_t mtx)
493 u_int lock;
494 u_int nlock;
495 int error = 0;
497 for (;;) {
498 lock = mtx->mtx_lock;
500 if ((lock & ~MTX_EXWANTED) == 1) {
501 nlock = lock | MTX_EXCLUSIVE;
502 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
503 mtx->mtx_owner = curthread;
504 break;
506 } else if (lock & MTX_EXCLUSIVE) {
507 KKASSERT(mtx->mtx_owner == curthread);
508 break;
509 } else {
510 error = EDEADLK;
511 break;
513 cpu_pause();
514 ++mtx_collision_count;
516 return (error);
520 * Unlock a lock. The caller must hold the lock either shared or exclusive.
522 * Any release which makes the lock available when others want an exclusive
523 * lock causes us to chain the owner to the next exclusive lock instead of
524 * releasing the lock.
526 void
527 _mtx_unlock(mtx_t mtx)
529 u_int lock;
530 u_int nlock;
532 for (;;) {
533 lock = mtx->mtx_lock;
534 nlock = lock & ~(MTX_SHWANTED | MTX_EXLINK);
536 if (nlock == 1) {
538 * Last release, shared lock, no exclusive waiters.
540 nlock = lock & MTX_EXLINK;
541 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
542 break;
543 } else if (nlock == (MTX_EXCLUSIVE | 1)) {
545 * Last release, exclusive lock, no exclusive waiters.
546 * Wake up any shared waiters.
548 mtx->mtx_owner = NULL;
549 nlock = lock & MTX_EXLINK;
550 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
551 if (lock & MTX_SHWANTED) {
552 wakeup(mtx);
553 ++mtx_wakeup_count;
555 break;
557 } else if (nlock == (MTX_EXWANTED | 1)) {
559 * Last release, shared lock, with exclusive
560 * waiters.
562 * Wait for EXLINK to clear, then acquire it.
563 * We could use the cmpset for this but polling
564 * is better on the cpu caches.
566 * Acquire an exclusive lock leaving the lockcount
567 * set to 1, and get EXLINK for access to mtx_link.
569 thread_t td;
571 if (lock & MTX_EXLINK) {
572 cpu_pause();
573 ++mtx_collision_count;
574 continue;
576 td = curthread;
577 /*lock &= ~MTX_EXLINK;*/
578 nlock |= MTX_EXLINK | MTX_EXCLUSIVE;
579 nlock |= (lock & MTX_SHWANTED);
580 ++td->td_critcount;
581 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
582 mtx_chain_link(mtx);
583 --td->td_critcount;
584 break;
586 --td->td_critcount;
587 } else if (nlock == (MTX_EXCLUSIVE | MTX_EXWANTED | 1)) {
589 * Last release, exclusive lock, with exclusive
590 * waiters.
592 * leave the exclusive lock intact and the lockcount
593 * set to 1, and get EXLINK for access to mtx_link.
595 thread_t td;
597 if (lock & MTX_EXLINK) {
598 cpu_pause();
599 ++mtx_collision_count;
600 continue;
602 td = curthread;
603 /*lock &= ~MTX_EXLINK;*/
604 nlock |= MTX_EXLINK;
605 nlock |= (lock & MTX_SHWANTED);
606 ++td->td_critcount;
607 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
608 mtx_chain_link(mtx);
609 --td->td_critcount;
610 break;
612 --td->td_critcount;
613 } else {
615 * Not the last release (shared or exclusive)
617 nlock = lock - 1;
618 KKASSERT((nlock & MTX_MASK) != MTX_MASK);
619 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
620 break;
622 cpu_pause();
623 ++mtx_collision_count;
628 * Chain mtx_chain_link. Called with the lock held exclusively with a
629 * single ref count, and also with MTX_EXLINK held.
631 static void
632 mtx_chain_link(mtx_t mtx)
634 mtx_link_t link;
635 u_int lock;
636 u_int nlock;
637 u_int clock; /* bits we own and want to clear */
640 * Chain the exclusive lock to the next link. The caller cleared
641 * SHWANTED so if there is no link we have to wake up any shared
642 * waiters.
644 clock = MTX_EXLINK;
645 if ((link = mtx->mtx_link) != NULL) {
646 KKASSERT(link->state == MTX_LINK_LINKED);
647 if (link->next == link) {
648 mtx->mtx_link = NULL;
649 clock |= MTX_EXWANTED;
650 } else {
651 mtx->mtx_link = link->next;
652 link->next->prev = link->prev;
653 link->prev->next = link->next;
655 link->state = MTX_LINK_ACQUIRED;
656 mtx->mtx_owner = link->owner;
657 } else {
659 * Chain was empty, release the exclusive lock's last count
660 * as well the bits shown.
662 clock |= MTX_EXCLUSIVE | MTX_EXWANTED | MTX_SHWANTED | 1;
666 * We have to uset cmpset here to deal with MTX_SHWANTED. If
667 * we just clear the bits we can miss a wakeup or, worse,
668 * leave mtx_lock unlocked with MTX_SHWANTED still set.
670 for (;;) {
671 lock = mtx->mtx_lock;
672 nlock = lock & ~clock;
674 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
675 if (link) {
677 * Wakeup new exclusive holder. Leave
678 * SHWANTED intact.
680 wakeup(link);
681 } else if (lock & MTX_SHWANTED) {
683 * Signal any shared waiters (and we also
684 * clear SHWANTED).
686 mtx->mtx_owner = NULL;
687 wakeup(mtx);
688 ++mtx_wakeup_count;
690 break;
692 cpu_pause();
693 ++mtx_collision_count;
698 * Delete a link structure after tsleep has failed. This code is not
699 * in the critical path as most exclusive waits are chained.
701 static
702 void
703 mtx_delete_link(mtx_t mtx, mtx_link_t link)
705 thread_t td = curthread;
706 u_int lock;
707 u_int nlock;
710 * Acquire MTX_EXLINK.
712 * Do not use cmpxchg to wait for EXLINK to clear as this might
713 * result in too much cpu cache traffic.
715 ++td->td_critcount;
716 for (;;) {
717 lock = mtx->mtx_lock;
718 if (lock & MTX_EXLINK) {
719 cpu_pause();
720 ++mtx_collision_count;
721 continue;
723 /* lock &= ~MTX_EXLINK; */
724 nlock = lock | MTX_EXLINK;
725 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
726 break;
727 cpu_pause();
728 ++mtx_collision_count;
732 * Delete the link and release EXLINK.
734 if (link->state == MTX_LINK_LINKED) {
735 if (link->next == link) {
736 mtx->mtx_link = NULL;
737 } else {
738 mtx->mtx_link = link->next;
739 link->next->prev = link->prev;
740 link->prev->next = link->next;
742 link->state = MTX_LINK_IDLE;
744 atomic_clear_int(&mtx->mtx_lock, MTX_EXLINK);
745 --td->td_critcount;
749 * Abort a mutex locking operation, causing mtx_lock_ex_link() to
750 * return ENOLCK. This may be called at any time after the
751 * mtx_link is initialized, including both before and after the call
752 * to mtx_lock_ex_link().
754 void
755 mtx_abort_ex_link(mtx_t mtx, mtx_link_t link)
757 thread_t td = curthread;
758 u_int lock;
759 u_int nlock;
762 * Acquire MTX_EXLINK
764 ++td->td_critcount;
765 for (;;) {
766 lock = mtx->mtx_lock;
767 if (lock & MTX_EXLINK) {
768 cpu_pause();
769 ++mtx_collision_count;
770 continue;
772 /* lock &= ~MTX_EXLINK; */
773 nlock = lock | MTX_EXLINK;
774 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
775 break;
776 cpu_pause();
777 ++mtx_collision_count;
781 * Do the abort
783 switch(link->state) {
784 case MTX_LINK_IDLE:
786 * Link not started yet
788 link->state = MTX_LINK_ABORTED;
789 break;
790 case MTX_LINK_LINKED:
792 * de-link, mark aborted, and wakeup the thread.
794 if (link->next == link) {
795 mtx->mtx_link = NULL;
796 } else {
797 mtx->mtx_link = link->next;
798 link->next->prev = link->prev;
799 link->prev->next = link->next;
801 link->state = MTX_LINK_ABORTED;
802 wakeup(link);
803 break;
804 case MTX_LINK_ACQUIRED:
806 * Too late, the lock was acquired. Let it complete.
808 break;
809 default:
811 * link already aborted, do nothing.
813 break;
815 atomic_clear_int(&mtx->mtx_lock, MTX_EXLINK);
816 --td->td_critcount;