kernel - Fix excessive call stack depth on stuck interrupt
[dragonfly.git] / sys / kern / kern_mutex.c
blobdc652fdc85bc40283c57fd0d162ddd1e101f2c42
1 /*
2 * Copyright (c) 2009 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 * Implement fast persistent locks based on atomic_cmpset_int() with
36 * semantics similar to lockmgr locks but faster and taking up much less
37 * space. Taken from HAMMER's lock implementation.
39 * These are meant to complement our LWKT tokens. Tokens are only held
40 * while the thread is running. Mutexes can be held across blocking
41 * conditions.
43 * - Exclusive priority over shared to prevent SMP starvation.
44 * - locks can be aborted (async callback, if any, will be made w/ENOLCK).
45 * - locks can be asynchronous.
46 * - synchronous fast path if no blocking occurs (async callback is not
47 * made in this case).
49 * Generally speaking any caller-supplied link state must be properly
50 * initialized before use.
52 * Most of the support is in sys/mutex[2].h. We mostly provide backoff
53 * functions here.
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/kernel.h>
59 #include <sys/sysctl.h>
60 #include <sys/thread.h>
62 #include <machine/cpufunc.h>
64 #include <sys/thread2.h>
65 #include <sys/mutex2.h>
67 static int64_t mtx_contention_count;
68 static int64_t mtx_collision_count;
69 static int64_t mtx_wakeup_count;
71 SYSCTL_QUAD(_kern, OID_AUTO, mtx_contention_count, CTLFLAG_RW,
72 &mtx_contention_count, 0, "");
73 SYSCTL_QUAD(_kern, OID_AUTO, mtx_collision_count, CTLFLAG_RW,
74 &mtx_collision_count, 0, "");
75 SYSCTL_QUAD(_kern, OID_AUTO, mtx_wakeup_count, CTLFLAG_RW,
76 &mtx_wakeup_count, 0, "");
78 static int mtx_chain_link_ex(mtx_t *mtx, u_int olock);
79 static int mtx_chain_link_sh(mtx_t *mtx, u_int olock, int addcount);
80 static void mtx_delete_link(mtx_t *mtx, mtx_link_t *link);
83 * Exclusive-lock a mutex, block until acquired unless link is async.
84 * Recursion is allowed.
86 * Returns 0 on success, the tsleep() return code on failure, EINPROGRESS
87 * if async. If immediately successful an async exclusive lock will return 0
88 * and not issue the async callback or link the link structure. The caller
89 * must handle this case (typically this is an optimal code path).
91 * A tsleep() error can only be returned if PCATCH is specified in the flags.
93 static __inline int
94 __mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, int flags, int to)
96 thread_t td;
97 u_int lock;
98 u_int nlock;
99 int error;
100 int isasync;
102 for (;;) {
103 lock = mtx->mtx_lock;
104 cpu_ccfence();
106 if (lock == 0) {
107 nlock = MTX_EXCLUSIVE | 1;
108 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
109 mtx->mtx_owner = curthread;
110 link->state = MTX_LINK_ACQUIRED;
111 error = 0;
112 break;
114 continue;
116 if ((lock & MTX_EXCLUSIVE) && mtx->mtx_owner == curthread) {
117 KKASSERT((lock & MTX_MASK) != MTX_MASK);
118 nlock = lock + 1;
119 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
120 link->state = MTX_LINK_ACQUIRED;
121 error = 0;
122 break;
124 continue;
128 * We need MTX_LINKSPIN to manipulate exlink or
129 * shlink.
131 * We must set MTX_EXWANTED with MTX_LINKSPIN to indicate
132 * pending shared requests. It cannot be set as a separate
133 * operation prior to acquiring MTX_LINKSPIN.
135 * To avoid unnecessary cpu cache traffic we poll
136 * for collisions. It is also possible that EXWANTED
137 * state failing the above test was spurious, so all the
138 * tests must be repeated if we cannot obtain LINKSPIN
139 * with the prior state tests intact (i.e. don't reload
140 * the (lock) variable here, for heaven's sake!).
142 if (lock & MTX_LINKSPIN) {
143 cpu_pause();
144 ++mtx_collision_count;
145 continue;
147 td = curthread;
148 nlock = lock | MTX_EXWANTED | MTX_LINKSPIN;
149 ++td->td_critcount;
150 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) {
151 --td->td_critcount;
152 continue;
156 * Check for early abort.
158 if (link->state == MTX_LINK_ABORTED) {
159 if (mtx->mtx_exlink == NULL) {
160 atomic_clear_int(&mtx->mtx_lock,
161 MTX_LINKSPIN |
162 MTX_EXWANTED);
163 } else {
164 atomic_clear_int(&mtx->mtx_lock,
165 MTX_LINKSPIN);
167 --td->td_critcount;
168 link->state = MTX_LINK_IDLE;
169 error = ENOLCK;
170 break;
174 * Add our link to the exlink list and release LINKSPIN.
176 link->owner = td;
177 link->state = MTX_LINK_LINKED_EX;
178 if (mtx->mtx_exlink) {
179 link->next = mtx->mtx_exlink;
180 link->prev = link->next->prev;
181 link->next->prev = link;
182 link->prev->next = link;
183 } else {
184 link->next = link;
185 link->prev = link;
186 mtx->mtx_exlink = link;
188 isasync = (link->callback != NULL);
189 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN);
190 --td->td_critcount;
193 * If asynchronous lock request return without
194 * blocking, leave link structure linked.
196 if (isasync) {
197 error = EINPROGRESS;
198 break;
202 * Wait for lock
204 error = mtx_wait_link(mtx, link, flags, to);
205 break;
207 return (error);
211 _mtx_lock_ex_link(mtx_t *mtx, mtx_link_t *link, int flags, int to)
213 return(__mtx_lock_ex(mtx, link, flags, to));
217 _mtx_lock_ex(mtx_t *mtx, int flags, int to)
219 mtx_link_t link;
221 mtx_link_init(&link);
222 return(__mtx_lock_ex(mtx, &link, flags, to));
226 _mtx_lock_ex_quick(mtx_t *mtx)
228 mtx_link_t link;
230 mtx_link_init(&link);
231 return(__mtx_lock_ex(mtx, &link, 0, 0));
235 * Share-lock a mutex, block until acquired. Recursion is allowed.
237 * Returns 0 on success, or the tsleep() return code on failure.
238 * An error can only be returned if PCATCH is specified in the flags.
240 * NOTE: Shared locks get a mass-wakeup so if the tsleep fails we
241 * do not have to chain the wakeup().
243 static __inline int
244 __mtx_lock_sh(mtx_t *mtx, mtx_link_t *link, int flags, int to)
246 thread_t td;
247 u_int lock;
248 u_int nlock;
249 int error;
250 int isasync;
252 for (;;) {
253 lock = mtx->mtx_lock;
254 cpu_ccfence();
256 if (lock == 0) {
257 nlock = 1;
258 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
259 error = 0;
260 link->state = MTX_LINK_ACQUIRED;
261 break;
263 continue;
265 if ((lock & (MTX_EXCLUSIVE | MTX_EXWANTED)) == 0) {
266 KKASSERT((lock & MTX_MASK) != MTX_MASK);
267 nlock = lock + 1;
268 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
269 error = 0;
270 link->state = MTX_LINK_ACQUIRED;
271 break;
273 continue;
277 * We need MTX_LINKSPIN to manipulate exlink or
278 * shlink.
280 * We must set MTX_SHWANTED with MTX_LINKSPIN to indicate
281 * pending shared requests. It cannot be set as a separate
282 * operation prior to acquiring MTX_LINKSPIN.
284 * To avoid unnecessary cpu cache traffic we poll
285 * for collisions. It is also possible that EXWANTED
286 * state failing the above test was spurious, so all the
287 * tests must be repeated if we cannot obtain LINKSPIN
288 * with the prior state tests intact (i.e. don't reload
289 * the (lock) variable here, for heaven's sake!).
291 if (lock & MTX_LINKSPIN) {
292 cpu_pause();
293 ++mtx_collision_count;
294 continue;
296 td = curthread;
297 nlock = lock | MTX_SHWANTED | MTX_LINKSPIN;
298 ++td->td_critcount;
299 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) {
300 --td->td_critcount;
301 continue;
305 * Check for early abort.
307 if (link->state == MTX_LINK_ABORTED) {
308 if (mtx->mtx_exlink == NULL) {
309 atomic_clear_int(&mtx->mtx_lock,
310 MTX_LINKSPIN |
311 MTX_SHWANTED);
312 } else {
313 atomic_clear_int(&mtx->mtx_lock,
314 MTX_LINKSPIN);
316 --td->td_critcount;
317 link->state = MTX_LINK_IDLE;
318 error = ENOLCK;
319 break;
323 * Add our link to the exlink list and release LINKSPIN.
325 link->owner = td;
326 link->state = MTX_LINK_LINKED_SH;
327 if (mtx->mtx_shlink) {
328 link->next = mtx->mtx_shlink;
329 link->prev = link->next->prev;
330 link->next->prev = link;
331 link->prev->next = link;
332 } else {
333 link->next = link;
334 link->prev = link;
335 mtx->mtx_shlink = link;
337 isasync = (link->callback != NULL);
338 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN);
339 --td->td_critcount;
342 * If asynchronous lock request return without
343 * blocking, leave link structure linked.
345 if (isasync) {
346 error = EINPROGRESS;
347 break;
351 * Wait for lock
353 error = mtx_wait_link(mtx, link, flags, to);
354 break;
356 return (error);
360 _mtx_lock_sh_link(mtx_t *mtx, mtx_link_t *link, int flags, int to)
362 return(__mtx_lock_sh(mtx, link, flags, to));
366 _mtx_lock_sh(mtx_t *mtx, int flags, int to)
368 mtx_link_t link;
370 mtx_link_init(&link);
371 return(__mtx_lock_sh(mtx, &link, flags, to));
375 _mtx_lock_sh_quick(mtx_t *mtx)
377 mtx_link_t link;
379 mtx_link_init(&link);
380 return(__mtx_lock_sh(mtx, &link, 0, 0));
384 * Get an exclusive spinlock the hard way.
386 void
387 _mtx_spinlock(mtx_t *mtx)
389 u_int lock;
390 u_int nlock;
391 int bb = 1;
392 int bo;
394 for (;;) {
395 lock = mtx->mtx_lock;
396 if (lock == 0) {
397 nlock = MTX_EXCLUSIVE | 1;
398 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
399 mtx->mtx_owner = curthread;
400 break;
402 } else if ((lock & MTX_EXCLUSIVE) &&
403 mtx->mtx_owner == curthread) {
404 KKASSERT((lock & MTX_MASK) != MTX_MASK);
405 nlock = lock + 1;
406 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
407 break;
408 } else {
409 /* MWAIT here */
410 if (bb < 1000)
411 ++bb;
412 cpu_pause();
413 for (bo = 0; bo < bb; ++bo)
415 ++mtx_contention_count;
417 cpu_pause();
418 ++mtx_collision_count;
423 * Attempt to acquire a spinlock, if we fail we must undo the
424 * gd->gd_spinlocks/gd->gd_curthead->td_critcount predisposition.
426 * Returns 0 on success, EAGAIN on failure.
429 _mtx_spinlock_try(mtx_t *mtx)
431 globaldata_t gd = mycpu;
432 u_int lock;
433 u_int nlock;
434 int res = 0;
436 for (;;) {
437 lock = mtx->mtx_lock;
438 if (lock == 0) {
439 nlock = MTX_EXCLUSIVE | 1;
440 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
441 mtx->mtx_owner = gd->gd_curthread;
442 break;
444 } else if ((lock & MTX_EXCLUSIVE) &&
445 mtx->mtx_owner == gd->gd_curthread) {
446 KKASSERT((lock & MTX_MASK) != MTX_MASK);
447 nlock = lock + 1;
448 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
449 break;
450 } else {
451 --gd->gd_spinlocks;
452 cpu_ccfence();
453 --gd->gd_curthread->td_critcount;
454 res = EAGAIN;
455 break;
457 cpu_pause();
458 ++mtx_collision_count;
460 return res;
463 #if 0
465 void
466 _mtx_spinlock_sh(mtx_t *mtx)
468 u_int lock;
469 u_int nlock;
470 int bb = 1;
471 int bo;
473 for (;;) {
474 lock = mtx->mtx_lock;
475 if ((lock & MTX_EXCLUSIVE) == 0) {
476 KKASSERT((lock & MTX_MASK) != MTX_MASK);
477 nlock = lock + 1;
478 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
479 break;
480 } else {
481 /* MWAIT here */
482 if (bb < 1000)
483 ++bb;
484 cpu_pause();
485 for (bo = 0; bo < bb; ++bo)
487 ++mtx_contention_count;
489 cpu_pause();
490 ++mtx_collision_count;
494 #endif
497 _mtx_lock_ex_try(mtx_t *mtx)
499 u_int lock;
500 u_int nlock;
501 int error;
503 for (;;) {
504 lock = mtx->mtx_lock;
505 if (lock == 0) {
506 nlock = MTX_EXCLUSIVE | 1;
507 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
508 mtx->mtx_owner = curthread;
509 error = 0;
510 break;
512 } else if ((lock & MTX_EXCLUSIVE) &&
513 mtx->mtx_owner == curthread) {
514 KKASSERT((lock & MTX_MASK) != MTX_MASK);
515 nlock = lock + 1;
516 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
517 error = 0;
518 break;
520 } else {
521 error = EAGAIN;
522 break;
524 cpu_pause();
525 ++mtx_collision_count;
527 return (error);
531 _mtx_lock_sh_try(mtx_t *mtx)
533 u_int lock;
534 u_int nlock;
535 int error = 0;
537 for (;;) {
538 lock = mtx->mtx_lock;
539 if ((lock & MTX_EXCLUSIVE) == 0) {
540 KKASSERT((lock & MTX_MASK) != MTX_MASK);
541 nlock = lock + 1;
542 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
543 break;
544 } else {
545 error = EAGAIN;
546 break;
548 cpu_pause();
549 ++mtx_collision_count;
551 return (error);
555 * If the lock is held exclusively it must be owned by the caller. If the
556 * lock is already a shared lock this operation is a NOP. A panic will
557 * occur if the lock is not held either shared or exclusive.
559 * The exclusive count is converted to a shared count.
561 void
562 _mtx_downgrade(mtx_t *mtx)
564 u_int lock;
565 u_int nlock;
567 for (;;) {
568 lock = mtx->mtx_lock;
569 cpu_ccfence();
572 * NOP if already shared.
574 if ((lock & MTX_EXCLUSIVE) == 0) {
575 KKASSERT((lock & MTX_MASK) > 0);
576 break;
580 * Transfer count to shared. Any additional pending shared
581 * waiters must be woken up.
583 if (lock & MTX_SHWANTED) {
584 if (mtx_chain_link_sh(mtx, lock, 1))
585 break;
586 /* retry */
587 } else {
588 nlock = lock & ~MTX_EXCLUSIVE;
589 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
590 break;
591 /* retry */
593 cpu_pause();
594 ++mtx_collision_count;
599 * Upgrade a shared lock to an exclusive lock. The upgrade will fail if
600 * the shared lock has a count other then 1. Optimize the most likely case
601 * but note that a single cmpset can fail due to WANTED races.
603 * If the lock is held exclusively it must be owned by the caller and
604 * this function will simply return without doing anything. A panic will
605 * occur if the lock is held exclusively by someone other then the caller.
607 * Returns 0 on success, EDEADLK on failure.
610 _mtx_upgrade_try(mtx_t *mtx)
612 u_int lock;
613 u_int nlock;
614 int error = 0;
616 for (;;) {
617 lock = mtx->mtx_lock;
619 if ((lock & ~MTX_EXWANTED) == 1) {
620 nlock = lock | MTX_EXCLUSIVE;
621 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
622 mtx->mtx_owner = curthread;
623 break;
625 } else if (lock & MTX_EXCLUSIVE) {
626 KKASSERT(mtx->mtx_owner == curthread);
627 break;
628 } else {
629 error = EDEADLK;
630 break;
632 cpu_pause();
633 ++mtx_collision_count;
635 return (error);
639 * Unlock a lock. The caller must hold the lock either shared or exclusive.
641 * On the last release we handle any pending chains.
643 void
644 _mtx_unlock(mtx_t *mtx)
646 u_int lock;
647 u_int nlock;
649 for (;;) {
650 lock = mtx->mtx_lock;
651 cpu_ccfence();
653 switch(lock) {
654 case MTX_EXCLUSIVE | 1:
656 * Last release, exclusive lock.
657 * No exclusive or shared requests pending.
659 mtx->mtx_owner = NULL;
660 nlock = 0;
661 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
662 goto done;
663 break;
664 case MTX_EXCLUSIVE | MTX_EXWANTED | 1:
665 case MTX_EXCLUSIVE | MTX_EXWANTED | MTX_SHWANTED | 1:
667 * Last release, exclusive lock.
668 * Exclusive requests pending.
669 * Exclusive requests have priority over shared reqs.
671 if (mtx_chain_link_ex(mtx, lock))
672 goto done;
673 break;
674 case MTX_EXCLUSIVE | MTX_SHWANTED | 1:
676 * Last release, exclusive lock.
678 * Shared requests are pending. Transfer our count (1)
679 * to the first shared request, wakeup all shared reqs.
681 if (mtx_chain_link_sh(mtx, lock, 0))
682 goto done;
683 break;
684 case 1:
686 * Last release, shared lock.
687 * No exclusive or shared requests pending.
689 nlock = 0;
690 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
691 goto done;
692 break;
693 case MTX_EXWANTED | 1:
694 case MTX_EXWANTED | MTX_SHWANTED | 1:
696 * Last release, shared lock.
698 * Exclusive requests are pending. Transfer our
699 * count (1) to the next exclusive request.
701 * Exclusive requests have priority over shared reqs.
703 if (mtx_chain_link_ex(mtx, lock))
704 goto done;
705 break;
706 case MTX_SHWANTED | 1:
708 * Last release, shared lock.
709 * Shared requests pending.
711 if (mtx_chain_link_sh(mtx, lock, 0))
712 goto done;
713 break;
714 default:
716 * We have to loop if this is the last release but
717 * someone is fiddling with LINKSPIN.
719 if ((lock & MTX_MASK) == 1) {
720 KKASSERT(lock & MTX_LINKSPIN);
721 break;
725 * Not the last release (shared or exclusive)
727 nlock = lock - 1;
728 KKASSERT((nlock & MTX_MASK) != MTX_MASK);
729 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
730 goto done;
731 break;
733 /* loop try again */
734 cpu_pause();
735 ++mtx_collision_count;
737 done:
742 * Chain pending links. Called on the last release of an exclusive or
743 * shared lock when the appropriate WANTED bit is set. mtx_lock old state
744 * is passed in with the count left at 1, which we can inherit, and other
745 * bits which we must adjust in a single atomic operation.
747 * Return non-zero on success, 0 if caller needs to retry.
749 * NOTE: It's ok if MTX_EXWANTED is in an indeterminant state while we are
750 * acquiring LINKSPIN as all other cases will also need to acquire
751 * LINKSPIN when handling the EXWANTED case.
753 static int
754 mtx_chain_link_ex(mtx_t *mtx, u_int olock)
756 thread_t td = curthread;
757 mtx_link_t *link;
758 u_int nlock;
760 olock &= ~MTX_LINKSPIN;
761 nlock = olock | MTX_LINKSPIN | MTX_EXCLUSIVE;
762 ++td->td_critcount;
763 if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) {
764 link = mtx->mtx_exlink;
765 KKASSERT(link != NULL);
766 if (link->next == link) {
767 mtx->mtx_exlink = NULL;
768 nlock = MTX_LINKSPIN | MTX_EXWANTED; /* to clear */
769 } else {
770 mtx->mtx_exlink = link->next;
771 link->next->prev = link->prev;
772 link->prev->next = link->next;
773 nlock = MTX_LINKSPIN; /* to clear */
775 KKASSERT(link->state == MTX_LINK_LINKED_EX);
776 mtx->mtx_owner = link->owner;
777 cpu_sfence();
780 * WARNING! The callback can only be safely
781 * made with LINKSPIN still held
782 * and in a critical section.
784 * WARNING! The link can go away after the
785 * state is set, or after the
786 * callback.
788 if (link->callback) {
789 link->state = MTX_LINK_CALLEDBACK;
790 link->callback(link, link->arg, 0);
791 } else {
792 link->state = MTX_LINK_ACQUIRED;
793 wakeup(link);
795 atomic_clear_int(&mtx->mtx_lock, nlock);
796 --td->td_critcount;
797 ++mtx_wakeup_count;
798 return 1;
800 /* retry */
801 --td->td_critcount;
802 return 0;
806 * Flush waiting shared locks. The lock's prior state is passed in and must
807 * be adjusted atomically only if it matches.
809 * If addcount is 0, the count for the first shared lock in the chain is
810 * assumed to have already been accounted for.
812 * If addcount is 1, the count for the first shared lock in the chain has
813 * not yet been accounted for.
815 static int
816 mtx_chain_link_sh(mtx_t *mtx, u_int olock, int addcount)
818 thread_t td = curthread;
819 mtx_link_t *link;
820 u_int nlock;
822 olock &= ~MTX_LINKSPIN;
823 nlock = olock | MTX_LINKSPIN;
824 nlock &= ~MTX_EXCLUSIVE;
825 ++td->td_critcount;
826 if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) {
827 KKASSERT(mtx->mtx_shlink != NULL);
828 for (;;) {
829 link = mtx->mtx_shlink;
830 atomic_add_int(&mtx->mtx_lock, addcount);
831 KKASSERT(link->state == MTX_LINK_LINKED_SH);
832 if (link->next == link) {
833 mtx->mtx_shlink = NULL;
834 cpu_sfence();
837 * WARNING! The callback can only be safely
838 * made with LINKSPIN still held
839 * and in a critical section.
841 * WARNING! The link can go away after the
842 * state is set, or after the
843 * callback.
845 if (link->callback) {
846 link->state = MTX_LINK_CALLEDBACK;
847 link->callback(link, link->arg, 0);
848 } else {
849 link->state = MTX_LINK_ACQUIRED;
850 wakeup(link);
852 ++mtx_wakeup_count;
853 break;
855 mtx->mtx_shlink = link->next;
856 link->next->prev = link->prev;
857 link->prev->next = link->next;
858 cpu_sfence();
859 link->state = MTX_LINK_ACQUIRED;
860 /* link can go away */
861 wakeup(link);
862 ++mtx_wakeup_count;
863 addcount = 1;
865 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN |
866 MTX_SHWANTED);
867 --td->td_critcount;
868 return 1;
870 /* retry */
871 --td->td_critcount;
872 return 0;
876 * Delete a link structure after tsleep has failed. This code is not
877 * in the critical path as most exclusive waits are chained.
879 static
880 void
881 mtx_delete_link(mtx_t *mtx, mtx_link_t *link)
883 thread_t td = curthread;
884 u_int lock;
885 u_int nlock;
888 * Acquire MTX_LINKSPIN.
890 * Do not use cmpxchg to wait for LINKSPIN to clear as this might
891 * result in too much cpu cache traffic.
893 ++td->td_critcount;
894 for (;;) {
895 lock = mtx->mtx_lock;
896 if (lock & MTX_LINKSPIN) {
897 cpu_pause();
898 ++mtx_collision_count;
899 continue;
901 nlock = lock | MTX_LINKSPIN;
902 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
903 break;
904 cpu_pause();
905 ++mtx_collision_count;
909 * Delete the link and release LINKSPIN.
911 nlock = MTX_LINKSPIN; /* to clear */
913 switch(link->state) {
914 case MTX_LINK_LINKED_EX:
915 if (link->next == link) {
916 mtx->mtx_exlink = NULL;
917 nlock |= MTX_EXWANTED; /* to clear */
918 } else {
919 mtx->mtx_exlink = link->next;
920 link->next->prev = link->prev;
921 link->prev->next = link->next;
923 break;
924 case MTX_LINK_LINKED_SH:
925 if (link->next == link) {
926 mtx->mtx_shlink = NULL;
927 nlock |= MTX_SHWANTED; /* to clear */
928 } else {
929 mtx->mtx_shlink = link->next;
930 link->next->prev = link->prev;
931 link->prev->next = link->next;
933 break;
934 default:
935 /* no change */
936 break;
938 atomic_clear_int(&mtx->mtx_lock, nlock);
939 --td->td_critcount;
943 * Wait for async lock completion or abort. Returns ENOLCK if an abort
944 * occurred.
947 mtx_wait_link(mtx_t *mtx, mtx_link_t *link, int flags, int to)
949 int error;
952 * Sleep. Handle false wakeups, interruptions, etc.
953 * The link may also have been aborted.
955 error = 0;
956 while (link->state & MTX_LINK_LINKED) {
957 tsleep_interlock(link, 0);
958 cpu_lfence();
959 if (link->state & MTX_LINK_LINKED) {
960 ++mtx_contention_count;
961 if (link->state & MTX_LINK_LINKED_SH)
962 mycpu->gd_cnt.v_lock_name[0] = 'S';
963 else
964 mycpu->gd_cnt.v_lock_name[0] = 'X';
965 strncpy(mycpu->gd_cnt.v_lock_name + 1,
966 mtx->mtx_ident,
967 sizeof(mycpu->gd_cnt.v_lock_name) - 2);
968 ++mycpu->gd_cnt.v_lock_colls;
970 error = tsleep(link, flags | PINTERLOCKED,
971 mtx->mtx_ident, to);
972 if (error)
973 break;
978 * We are done, make sure the link structure is unlinked.
979 * It may still be on the list due to e.g. EINTR or
980 * EWOULDBLOCK.
982 * It is possible for the tsleep to race an ABORT and cause
983 * error to be 0.
985 * The tsleep() can be woken up for numerous reasons and error
986 * might be zero in situations where we intend to return an error.
988 * (This is the synchronous case so state cannot be CALLEDBACK)
990 switch(link->state) {
991 case MTX_LINK_ACQUIRED:
992 case MTX_LINK_CALLEDBACK:
993 error = 0;
994 break;
995 case MTX_LINK_ABORTED:
996 error = ENOLCK;
997 break;
998 case MTX_LINK_LINKED_EX:
999 case MTX_LINK_LINKED_SH:
1000 mtx_delete_link(mtx, link);
1001 /* fall through */
1002 default:
1003 if (error == 0)
1004 error = EWOULDBLOCK;
1005 break;
1009 * Clear state on status returned.
1011 link->state = MTX_LINK_IDLE;
1013 return error;
1017 * Abort a mutex locking operation, causing mtx_lock_ex_link() to
1018 * return ENOLCK. This may be called at any time after the mtx_link
1019 * is initialized or the status from a previous lock has been
1020 * returned. If called prior to the next (non-try) lock attempt, the
1021 * next lock attempt using this link structure will abort instantly.
1023 * Caller must still wait for the operation to complete, either from a
1024 * blocking call that is still in progress or by calling mtx_wait_link().
1026 * If an asynchronous lock request is possibly in-progress, the caller
1027 * should call mtx_wait_link() synchronously. Note that the asynchronous
1028 * lock callback will NOT be called if a successful abort occurred. XXX
1030 void
1031 mtx_abort_link(mtx_t *mtx, mtx_link_t *link)
1033 thread_t td = curthread;
1034 u_int lock;
1035 u_int nlock;
1038 * Acquire MTX_LINKSPIN
1040 ++td->td_critcount;
1041 for (;;) {
1042 lock = mtx->mtx_lock;
1043 if (lock & MTX_LINKSPIN) {
1044 cpu_pause();
1045 ++mtx_collision_count;
1046 continue;
1048 nlock = lock | MTX_LINKSPIN;
1049 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
1050 break;
1051 cpu_pause();
1052 ++mtx_collision_count;
1056 * Do the abort.
1058 * WARNING! Link structure can disappear once link->state is set.
1060 nlock = MTX_LINKSPIN; /* to clear */
1062 switch(link->state) {
1063 case MTX_LINK_IDLE:
1065 * Link not started yet
1067 link->state = MTX_LINK_ABORTED;
1068 break;
1069 case MTX_LINK_LINKED_EX:
1071 * de-link, mark aborted, and potentially wakeup the thread
1072 * or issue the callback.
1074 if (link->next == link) {
1075 if (mtx->mtx_exlink == link) {
1076 mtx->mtx_exlink = NULL;
1077 nlock |= MTX_EXWANTED; /* to clear */
1079 } else {
1080 if (mtx->mtx_exlink == link)
1081 mtx->mtx_exlink = link->next;
1082 link->next->prev = link->prev;
1083 link->prev->next = link->next;
1087 * When aborting the async callback is still made. We must
1088 * not set the link status to ABORTED in the callback case
1089 * since there is nothing else to clear its status if the
1090 * link is reused.
1092 if (link->callback) {
1093 link->state = MTX_LINK_CALLEDBACK;
1094 link->callback(link, link->arg, ENOLCK);
1095 } else {
1096 link->state = MTX_LINK_ABORTED;
1097 wakeup(link);
1099 ++mtx_wakeup_count;
1100 break;
1101 case MTX_LINK_LINKED_SH:
1103 * de-link, mark aborted, and potentially wakeup the thread
1104 * or issue the callback.
1106 if (link->next == link) {
1107 if (mtx->mtx_shlink == link) {
1108 mtx->mtx_shlink = NULL;
1109 nlock |= MTX_SHWANTED; /* to clear */
1111 } else {
1112 if (mtx->mtx_shlink == link)
1113 mtx->mtx_shlink = link->next;
1114 link->next->prev = link->prev;
1115 link->prev->next = link->next;
1119 * When aborting the async callback is still made. We must
1120 * not set the link status to ABORTED in the callback case
1121 * since there is nothing else to clear its status if the
1122 * link is reused.
1124 if (link->callback) {
1125 link->state = MTX_LINK_CALLEDBACK;
1126 link->callback(link, link->arg, ENOLCK);
1127 } else {
1128 link->state = MTX_LINK_ABORTED;
1129 wakeup(link);
1131 ++mtx_wakeup_count;
1132 break;
1133 case MTX_LINK_ACQUIRED:
1134 case MTX_LINK_CALLEDBACK:
1136 * Too late, the lock was acquired. Let it complete.
1138 break;
1139 default:
1141 * link already aborted, do nothing.
1143 break;
1145 atomic_clear_int(&mtx->mtx_lock, nlock);
1146 --td->td_critcount;