kernel - Order ipfw3 module before other ipfw3_* modules
[dragonfly.git] / sys / kern / kern_mutex.c
blob527ab832917151641a4337123e25a87d6a612a6a
1 /*
2 * Copyright (c) 2009 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 * Implement fast persistent locks based on atomic_cmpset_int() with
36 * semantics similar to lockmgr locks but faster and taking up much less
37 * space. Taken from HAMMER's lock implementation.
39 * These are meant to complement our LWKT tokens. Tokens are only held
40 * while the thread is running. Mutexes can be held across blocking
41 * conditions.
43 * - Exclusive priority over shared to prevent SMP starvation.
44 * - locks can be aborted (async callback, if any, will be made w/ENOLCK).
45 * - locks can be asynchronous.
46 * - synchronous fast path if no blocking occurs (async callback is not
47 * made in this case).
49 * Generally speaking any caller-supplied link state must be properly
50 * initialized before use.
52 * Most of the support is in sys/mutex[2].h. We mostly provide backoff
53 * functions here.
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/kernel.h>
59 #include <sys/sysctl.h>
60 #include <sys/thread.h>
62 #include <machine/cpufunc.h>
64 #include <sys/thread2.h>
65 #include <sys/mutex2.h>
67 static int mtx_chain_link_ex(mtx_t *mtx, u_int olock);
68 static int mtx_chain_link_sh(mtx_t *mtx, u_int olock);
69 static void mtx_delete_link(mtx_t *mtx, mtx_link_t *link);
72 * Exclusive-lock a mutex, block until acquired unless link is async.
73 * Recursion is allowed.
75 * Returns 0 on success, the tsleep() return code on failure, EINPROGRESS
76 * if async. If immediately successful an async exclusive lock will return 0
77 * and not issue the async callback or link the link structure. The caller
78 * must handle this case (typically this is an optimal code path).
80 * A tsleep() error can only be returned if PCATCH is specified in the flags.
82 static __inline int
83 __mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, int flags, int to)
85 thread_t td;
86 u_int lock;
87 u_int nlock;
88 int error;
89 int isasync;
91 for (;;) {
92 lock = mtx->mtx_lock;
93 cpu_ccfence();
95 if (lock == 0) {
96 nlock = MTX_EXCLUSIVE | 1;
97 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
98 mtx->mtx_owner = curthread;
99 cpu_sfence();
100 link->state = MTX_LINK_ACQUIRED;
101 error = 0;
102 break;
104 continue;
106 if ((lock & MTX_EXCLUSIVE) && mtx->mtx_owner == curthread) {
107 KKASSERT((lock & MTX_MASK) != MTX_MASK);
108 nlock = lock + 1;
109 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
110 cpu_sfence();
111 link->state = MTX_LINK_ACQUIRED;
112 error = 0;
113 break;
115 continue;
119 * We need MTX_LINKSPIN to manipulate exlink or
120 * shlink.
122 * We must set MTX_EXWANTED with MTX_LINKSPIN to indicate
123 * pending exclusive requests. It cannot be set as a separate
124 * operation prior to acquiring MTX_LINKSPIN.
126 * To avoid unnecessary cpu cache traffic we poll
127 * for collisions. It is also possible that EXWANTED
128 * state failing the above test was spurious, so all the
129 * tests must be repeated if we cannot obtain LINKSPIN
130 * with the prior state tests intact (i.e. don't reload
131 * the (lock) variable here, for heaven's sake!).
133 if (lock & MTX_LINKSPIN) {
134 cpu_pause();
135 continue;
137 td = curthread;
138 nlock = lock | MTX_EXWANTED | MTX_LINKSPIN;
139 ++td->td_critcount;
140 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) {
141 --td->td_critcount;
142 continue;
146 * Check for early abort.
148 if (link->state == MTX_LINK_ABORTED) {
149 if (mtx->mtx_exlink == NULL) {
150 atomic_clear_int(&mtx->mtx_lock,
151 MTX_LINKSPIN |
152 MTX_EXWANTED);
153 } else {
154 atomic_clear_int(&mtx->mtx_lock,
155 MTX_LINKSPIN);
157 --td->td_critcount;
158 link->state = MTX_LINK_IDLE;
159 error = ENOLCK;
160 break;
164 * Add our link to the exlink list and release LINKSPIN.
166 link->owner = td;
167 link->state = MTX_LINK_LINKED_EX;
168 if (mtx->mtx_exlink) {
169 link->next = mtx->mtx_exlink;
170 link->prev = link->next->prev;
171 link->next->prev = link;
172 link->prev->next = link;
173 } else {
174 link->next = link;
175 link->prev = link;
176 mtx->mtx_exlink = link;
178 isasync = (link->callback != NULL);
179 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN);
180 --td->td_critcount;
183 * If asynchronous lock request return without
184 * blocking, leave link structure linked.
186 if (isasync) {
187 error = EINPROGRESS;
188 break;
192 * Wait for lock
194 error = mtx_wait_link(mtx, link, flags, to);
195 break;
197 return (error);
201 _mtx_lock_ex_link(mtx_t *mtx, mtx_link_t *link, int flags, int to)
203 return(__mtx_lock_ex(mtx, link, flags, to));
207 _mtx_lock_ex(mtx_t *mtx, int flags, int to)
209 mtx_link_t link;
211 mtx_link_init(&link);
212 return(__mtx_lock_ex(mtx, &link, flags, to));
216 _mtx_lock_ex_quick(mtx_t *mtx)
218 mtx_link_t link;
220 mtx_link_init(&link);
221 return(__mtx_lock_ex(mtx, &link, 0, 0));
225 * Share-lock a mutex, block until acquired. Recursion is allowed.
227 * Returns 0 on success, or the tsleep() return code on failure.
228 * An error can only be returned if PCATCH is specified in the flags.
230 * NOTE: Shared locks get a mass-wakeup so if the tsleep fails we
231 * do not have to chain the wakeup().
233 static __inline int
234 __mtx_lock_sh(mtx_t *mtx, mtx_link_t *link, int flags, int to)
236 thread_t td;
237 u_int lock;
238 u_int nlock;
239 int error;
240 int isasync;
242 for (;;) {
243 lock = mtx->mtx_lock;
244 cpu_ccfence();
246 if (lock == 0) {
247 nlock = 1;
248 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
249 error = 0;
250 cpu_sfence();
251 link->state = MTX_LINK_ACQUIRED;
252 break;
254 continue;
256 if ((lock & (MTX_EXCLUSIVE | MTX_EXWANTED)) == 0) {
257 KKASSERT((lock & MTX_MASK) != MTX_MASK);
258 nlock = lock + 1;
259 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
260 error = 0;
261 cpu_sfence();
262 link->state = MTX_LINK_ACQUIRED;
263 break;
265 continue;
269 * We need MTX_LINKSPIN to manipulate exlink or
270 * shlink.
272 * We must set MTX_SHWANTED with MTX_LINKSPIN to indicate
273 * pending shared requests. It cannot be set as a separate
274 * operation prior to acquiring MTX_LINKSPIN.
276 * To avoid unnecessary cpu cache traffic we poll
277 * for collisions. It is also possible that EXWANTED
278 * state failing the above test was spurious, so all the
279 * tests must be repeated if we cannot obtain LINKSPIN
280 * with the prior state tests intact (i.e. don't reload
281 * the (lock) variable here, for heaven's sake!).
283 if (lock & MTX_LINKSPIN) {
284 cpu_pause();
285 continue;
287 td = curthread;
288 nlock = lock | MTX_SHWANTED | MTX_LINKSPIN;
289 ++td->td_critcount;
290 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) {
291 --td->td_critcount;
292 continue;
296 * Check for early abort. Other shared lock requestors
297 * could have sneaked in before we set LINKSPIN so make
298 * sure we undo the state properly.
300 if (link->state == MTX_LINK_ABORTED) {
301 if (mtx->mtx_shlink) {
302 atomic_clear_int(&mtx->mtx_lock,
303 MTX_LINKSPIN);
304 } else {
305 atomic_clear_int(&mtx->mtx_lock,
306 MTX_LINKSPIN |
307 MTX_SHWANTED);
309 --td->td_critcount;
310 link->state = MTX_LINK_IDLE;
311 error = ENOLCK;
312 break;
316 * Add our link to the shlink list and release LINKSPIN.
318 link->owner = td;
319 link->state = MTX_LINK_LINKED_SH;
320 if (mtx->mtx_shlink) {
321 link->next = mtx->mtx_shlink;
322 link->prev = link->next->prev;
323 link->next->prev = link;
324 link->prev->next = link;
325 } else {
326 link->next = link;
327 link->prev = link;
328 mtx->mtx_shlink = link;
330 isasync = (link->callback != NULL);
331 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN);
332 --td->td_critcount;
335 * If asynchronous lock request return without
336 * blocking, leave link structure linked.
338 if (isasync) {
339 error = EINPROGRESS;
340 break;
344 * Wait for lock
346 error = mtx_wait_link(mtx, link, flags, to);
347 break;
349 return (error);
353 _mtx_lock_sh_link(mtx_t *mtx, mtx_link_t *link, int flags, int to)
355 return(__mtx_lock_sh(mtx, link, flags, to));
359 _mtx_lock_sh(mtx_t *mtx, int flags, int to)
361 mtx_link_t link;
363 mtx_link_init(&link);
364 return(__mtx_lock_sh(mtx, &link, flags, to));
368 _mtx_lock_sh_quick(mtx_t *mtx)
370 mtx_link_t link;
372 mtx_link_init(&link);
373 return(__mtx_lock_sh(mtx, &link, 0, 0));
377 * Get an exclusive spinlock the hard way.
379 void
380 _mtx_spinlock(mtx_t *mtx)
382 u_int lock;
383 u_int nlock;
384 int bb = 1;
385 int bo;
387 for (;;) {
388 lock = mtx->mtx_lock;
389 if (lock == 0) {
390 nlock = MTX_EXCLUSIVE | 1;
391 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
392 mtx->mtx_owner = curthread;
393 break;
395 } else if ((lock & MTX_EXCLUSIVE) &&
396 mtx->mtx_owner == curthread) {
397 KKASSERT((lock & MTX_MASK) != MTX_MASK);
398 nlock = lock + 1;
399 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
400 break;
401 } else {
402 /* MWAIT here */
403 if (bb < 1000)
404 ++bb;
405 cpu_pause();
406 for (bo = 0; bo < bb; ++bo)
409 cpu_pause();
414 * Attempt to acquire a spinlock, if we fail we must undo the
415 * gd->gd_spinlocks/gd->gd_curthead->td_critcount predisposition.
417 * Returns 0 on success, EAGAIN on failure.
420 _mtx_spinlock_try(mtx_t *mtx)
422 globaldata_t gd = mycpu;
423 u_int lock;
424 u_int nlock;
425 int res = 0;
427 for (;;) {
428 lock = mtx->mtx_lock;
429 if (lock == 0) {
430 nlock = MTX_EXCLUSIVE | 1;
431 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
432 mtx->mtx_owner = gd->gd_curthread;
433 break;
435 } else if ((lock & MTX_EXCLUSIVE) &&
436 mtx->mtx_owner == gd->gd_curthread) {
437 KKASSERT((lock & MTX_MASK) != MTX_MASK);
438 nlock = lock + 1;
439 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
440 break;
441 } else {
442 --gd->gd_spinlocks;
443 cpu_ccfence();
444 --gd->gd_curthread->td_critcount;
445 res = EAGAIN;
446 break;
448 cpu_pause();
450 return res;
453 #if 0
455 void
456 _mtx_spinlock_sh(mtx_t *mtx)
458 u_int lock;
459 u_int nlock;
460 int bb = 1;
461 int bo;
463 for (;;) {
464 lock = mtx->mtx_lock;
465 if ((lock & MTX_EXCLUSIVE) == 0) {
466 KKASSERT((lock & MTX_MASK) != MTX_MASK);
467 nlock = lock + 1;
468 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
469 break;
470 } else {
471 /* MWAIT here */
472 if (bb < 1000)
473 ++bb;
474 cpu_pause();
475 for (bo = 0; bo < bb; ++bo)
478 cpu_pause();
482 #endif
485 _mtx_lock_ex_try(mtx_t *mtx)
487 u_int lock;
488 u_int nlock;
489 int error;
491 for (;;) {
492 lock = mtx->mtx_lock;
493 if (lock == 0) {
494 nlock = MTX_EXCLUSIVE | 1;
495 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
496 mtx->mtx_owner = curthread;
497 error = 0;
498 break;
500 } else if ((lock & MTX_EXCLUSIVE) &&
501 mtx->mtx_owner == curthread) {
502 KKASSERT((lock & MTX_MASK) != MTX_MASK);
503 nlock = lock + 1;
504 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
505 error = 0;
506 break;
508 } else {
509 error = EAGAIN;
510 break;
512 cpu_pause();
514 return (error);
518 _mtx_lock_sh_try(mtx_t *mtx)
520 u_int lock;
521 u_int nlock;
522 int error = 0;
524 for (;;) {
525 lock = mtx->mtx_lock;
526 if ((lock & MTX_EXCLUSIVE) == 0) {
527 KKASSERT((lock & MTX_MASK) != MTX_MASK);
528 nlock = lock + 1;
529 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
530 break;
531 } else {
532 error = EAGAIN;
533 break;
535 cpu_pause();
537 return (error);
541 * If the lock is held exclusively it must be owned by the caller. If the
542 * lock is already a shared lock this operation is a NOP. A panic will
543 * occur if the lock is not held either shared or exclusive.
545 * The exclusive count is converted to a shared count.
547 void
548 _mtx_downgrade(mtx_t *mtx)
550 u_int lock;
551 u_int nlock;
553 for (;;) {
554 lock = mtx->mtx_lock;
555 cpu_ccfence();
558 * NOP if already shared.
560 if ((lock & MTX_EXCLUSIVE) == 0) {
561 KKASSERT((lock & MTX_MASK) > 0);
562 break;
566 * Transfer count to shared. Any additional pending shared
567 * waiters must be woken up.
569 if (lock & MTX_SHWANTED) {
570 if (mtx_chain_link_sh(mtx, lock))
571 break;
572 /* retry */
573 } else {
574 nlock = lock & ~MTX_EXCLUSIVE;
575 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
576 break;
577 /* retry */
579 cpu_pause();
584 * Upgrade a shared lock to an exclusive lock. The upgrade will fail if
585 * the shared lock has a count other then 1. Optimize the most likely case
586 * but note that a single cmpset can fail due to WANTED races.
588 * If the lock is held exclusively it must be owned by the caller and
589 * this function will simply return without doing anything. A panic will
590 * occur if the lock is held exclusively by someone other then the caller.
592 * Returns 0 on success, EDEADLK on failure.
595 _mtx_upgrade_try(mtx_t *mtx)
597 u_int lock;
598 u_int nlock;
599 int error = 0;
601 for (;;) {
602 lock = mtx->mtx_lock;
603 cpu_ccfence();
605 if ((lock & ~MTX_EXWANTED) == 1) {
606 nlock = lock | MTX_EXCLUSIVE;
607 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
608 mtx->mtx_owner = curthread;
609 break;
611 } else if (lock & MTX_EXCLUSIVE) {
612 KKASSERT(mtx->mtx_owner == curthread);
613 break;
614 } else {
615 error = EDEADLK;
616 break;
618 cpu_pause();
620 return (error);
624 * Unlock a lock. The caller must hold the lock either shared or exclusive.
626 * On the last release we handle any pending chains.
628 void
629 _mtx_unlock(mtx_t *mtx)
631 u_int lock;
632 u_int nlock;
634 for (;;) {
635 lock = mtx->mtx_lock;
636 cpu_ccfence();
638 switch(lock) {
639 case MTX_EXCLUSIVE | 1:
641 * Last release, exclusive lock.
642 * No exclusive or shared requests pending.
644 mtx->mtx_owner = NULL;
645 nlock = 0;
646 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
647 goto done;
648 break;
649 case MTX_EXCLUSIVE | MTX_EXWANTED | 1:
650 case MTX_EXCLUSIVE | MTX_EXWANTED | MTX_SHWANTED | 1:
652 * Last release, exclusive lock.
653 * Exclusive requests pending.
654 * Exclusive requests have priority over shared reqs.
656 if (mtx_chain_link_ex(mtx, lock))
657 goto done;
658 break;
659 case MTX_EXCLUSIVE | MTX_SHWANTED | 1:
661 * Last release, exclusive lock.
663 * Shared requests are pending. Transfer our count (1)
664 * to the first shared request, wakeup all shared reqs.
666 if (mtx_chain_link_sh(mtx, lock))
667 goto done;
668 break;
669 case 1:
671 * Last release, shared lock.
672 * No exclusive or shared requests pending.
674 nlock = 0;
675 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
676 goto done;
677 break;
678 case MTX_EXWANTED | 1:
679 case MTX_EXWANTED | MTX_SHWANTED | 1:
681 * Last release, shared lock.
683 * Exclusive requests are pending. Upgrade this
684 * final shared lock to exclusive and transfer our
685 * count (1) to the next exclusive request.
687 * Exclusive requests have priority over shared reqs.
689 if (mtx_chain_link_ex(mtx, lock))
690 goto done;
691 break;
692 case MTX_SHWANTED | 1:
694 * Last release, shared lock.
695 * Shared requests pending.
697 if (mtx_chain_link_sh(mtx, lock))
698 goto done;
699 break;
700 default:
702 * We have to loop if this is the last release but
703 * someone is fiddling with LINKSPIN.
705 if ((lock & MTX_MASK) == 1) {
706 KKASSERT(lock & MTX_LINKSPIN);
707 break;
711 * Not the last release (shared or exclusive)
713 nlock = lock - 1;
714 KKASSERT((nlock & MTX_MASK) != MTX_MASK);
715 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
716 goto done;
717 break;
719 /* loop try again */
720 cpu_pause();
722 done:
727 * Chain pending links. Called on the last release of an exclusive or
728 * shared lock when the appropriate WANTED bit is set. mtx_lock old state
729 * is passed in with the count left at 1, which we can inherit, and other
730 * bits which we must adjust in a single atomic operation.
732 * Return non-zero on success, 0 if caller needs to retry.
734 * NOTE: It's ok if MTX_EXWANTED is in an indeterminant state while we are
735 * acquiring LINKSPIN as all other cases will also need to acquire
736 * LINKSPIN when handling the EXWANTED case.
738 static int
739 mtx_chain_link_ex(mtx_t *mtx, u_int olock)
741 thread_t td = curthread;
742 mtx_link_t *link;
743 u_int nlock;
745 olock &= ~MTX_LINKSPIN;
746 nlock = olock | MTX_LINKSPIN | MTX_EXCLUSIVE; /* upgrade if necc */
747 ++td->td_critcount;
748 if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) {
749 link = mtx->mtx_exlink;
750 KKASSERT(link != NULL);
751 if (link->next == link) {
752 mtx->mtx_exlink = NULL;
753 nlock = MTX_LINKSPIN | MTX_EXWANTED; /* to clear */
754 } else {
755 mtx->mtx_exlink = link->next;
756 link->next->prev = link->prev;
757 link->prev->next = link->next;
758 nlock = MTX_LINKSPIN; /* to clear */
760 KKASSERT(link->state == MTX_LINK_LINKED_EX);
761 mtx->mtx_owner = link->owner;
762 cpu_sfence();
765 * WARNING! The callback can only be safely
766 * made with LINKSPIN still held
767 * and in a critical section.
769 * WARNING! The link can go away after the
770 * state is set, or after the
771 * callback.
773 if (link->callback) {
774 link->state = MTX_LINK_CALLEDBACK;
775 link->callback(link, link->arg, 0);
776 } else {
777 link->state = MTX_LINK_ACQUIRED;
778 wakeup(link);
780 atomic_clear_int(&mtx->mtx_lock, nlock);
781 --td->td_critcount;
782 return 1;
784 /* retry */
785 --td->td_critcount;
786 return 0;
790 * Flush waiting shared locks. The lock's prior state is passed in and must
791 * be adjusted atomically only if it matches and LINKSPIN is not set.
793 * IMPORTANT! The caller has left one active count on the lock for us to
794 * consume. We will apply this to the first link, but must add
795 * additional counts for any other links.
797 static int
798 mtx_chain_link_sh(mtx_t *mtx, u_int olock)
800 thread_t td = curthread;
801 mtx_link_t *link;
802 u_int addcount;
803 u_int nlock;
805 olock &= ~MTX_LINKSPIN;
806 nlock = olock | MTX_LINKSPIN;
807 nlock &= ~MTX_EXCLUSIVE;
808 ++td->td_critcount;
809 if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) {
811 * It should not be possible for SHWANTED to be set without
812 * any links pending.
814 KKASSERT(mtx->mtx_shlink != NULL);
817 * We have to process the count for all shared locks before
818 * we process any of the links. Count the additional shared
819 * locks beyond the first link (which is already accounted
820 * for) and associate the full count with the lock
821 * immediately.
823 addcount = 0;
824 for (link = mtx->mtx_shlink->next; link != mtx->mtx_shlink;
825 link = link->next) {
826 ++addcount;
828 if (addcount > 0)
829 atomic_add_int(&mtx->mtx_lock, addcount);
832 * We can wakeup all waiting shared locks.
834 while ((link = mtx->mtx_shlink) != NULL) {
835 KKASSERT(link->state == MTX_LINK_LINKED_SH);
836 if (link->next == link) {
837 mtx->mtx_shlink = NULL;
838 } else {
839 mtx->mtx_shlink = link->next;
840 link->next->prev = link->prev;
841 link->prev->next = link->next;
843 link->next = NULL;
844 link->prev = NULL;
845 cpu_sfence();
846 if (link->callback) {
847 link->state = MTX_LINK_CALLEDBACK;
848 link->callback(link, link->arg, 0);
849 } else {
850 cpu_sfence();
851 link->state = MTX_LINK_ACQUIRED;
852 wakeup(link);
855 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN |
856 MTX_SHWANTED);
857 --td->td_critcount;
858 return 1;
860 /* retry */
861 --td->td_critcount;
862 return 0;
866 * Delete a link structure after tsleep has failed. This code is not
867 * in the critical path as most exclusive waits are chained.
869 static
870 void
871 mtx_delete_link(mtx_t *mtx, mtx_link_t *link)
873 thread_t td = curthread;
874 u_int lock;
875 u_int nlock;
878 * Acquire MTX_LINKSPIN.
880 * Do not use cmpxchg to wait for LINKSPIN to clear as this might
881 * result in too much cpu cache traffic.
883 ++td->td_critcount;
884 for (;;) {
885 lock = mtx->mtx_lock;
886 if (lock & MTX_LINKSPIN) {
887 cpu_pause();
888 continue;
890 nlock = lock | MTX_LINKSPIN;
891 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
892 break;
893 cpu_pause();
897 * Delete the link and release LINKSPIN.
899 nlock = MTX_LINKSPIN; /* to clear */
901 switch(link->state) {
902 case MTX_LINK_LINKED_EX:
903 if (link->next == link) {
904 mtx->mtx_exlink = NULL;
905 nlock |= MTX_EXWANTED; /* to clear */
906 } else {
907 mtx->mtx_exlink = link->next;
908 link->next->prev = link->prev;
909 link->prev->next = link->next;
911 break;
912 case MTX_LINK_LINKED_SH:
913 if (link->next == link) {
914 mtx->mtx_shlink = NULL;
915 nlock |= MTX_SHWANTED; /* to clear */
916 } else {
917 mtx->mtx_shlink = link->next;
918 link->next->prev = link->prev;
919 link->prev->next = link->next;
921 break;
922 default:
923 /* no change */
924 break;
926 atomic_clear_int(&mtx->mtx_lock, nlock);
927 --td->td_critcount;
931 * Wait for async lock completion or abort. Returns ENOLCK if an abort
932 * occurred.
935 mtx_wait_link(mtx_t *mtx, mtx_link_t *link, int flags, int to)
937 int error;
940 * Sleep. Handle false wakeups, interruptions, etc.
941 * The link may also have been aborted. The LINKED
942 * bit was set by this cpu so we can test it without
943 * fences.
945 error = 0;
946 while (link->state & MTX_LINK_LINKED) {
947 tsleep_interlock(link, 0);
948 cpu_lfence();
949 if (link->state & MTX_LINK_LINKED) {
950 if (link->state & MTX_LINK_LINKED_SH)
951 mycpu->gd_cnt.v_lock_name[0] = 'S';
952 else
953 mycpu->gd_cnt.v_lock_name[0] = 'X';
954 strncpy(mycpu->gd_cnt.v_lock_name + 1,
955 mtx->mtx_ident,
956 sizeof(mycpu->gd_cnt.v_lock_name) - 2);
957 ++mycpu->gd_cnt.v_lock_colls;
959 error = tsleep(link, flags | PINTERLOCKED,
960 mtx->mtx_ident, to);
961 if (error)
962 break;
967 * We need at least a lfence (load fence) to ensure our cpu does not
968 * reorder loads (of data outside the lock structure) prior to the
969 * remote cpu's release, since the above test may have run without
970 * any atomic interactions.
972 * If we do not do this then state updated by the other cpu before
973 * releasing its lock may not be read cleanly by our cpu when this
974 * function returns. Even though the other cpu ordered its stores,
975 * our loads can still be out of order.
977 cpu_mfence();
980 * We are done, make sure the link structure is unlinked.
981 * It may still be on the list due to e.g. EINTR or
982 * EWOULDBLOCK.
984 * It is possible for the tsleep to race an ABORT and cause
985 * error to be 0.
987 * The tsleep() can be woken up for numerous reasons and error
988 * might be zero in situations where we intend to return an error.
990 * (This is the synchronous case so state cannot be CALLEDBACK)
992 switch(link->state) {
993 case MTX_LINK_ACQUIRED:
994 case MTX_LINK_CALLEDBACK:
995 error = 0;
996 break;
997 case MTX_LINK_ABORTED:
998 error = ENOLCK;
999 break;
1000 case MTX_LINK_LINKED_EX:
1001 case MTX_LINK_LINKED_SH:
1002 mtx_delete_link(mtx, link);
1003 /* fall through */
1004 default:
1005 if (error == 0)
1006 error = EWOULDBLOCK;
1007 break;
1011 * Clear state on status returned.
1013 link->state = MTX_LINK_IDLE;
1015 return error;
1019 * Abort a mutex locking operation, causing mtx_lock_ex_link() to
1020 * return ENOLCK. This may be called at any time after the mtx_link
1021 * is initialized or the status from a previous lock has been
1022 * returned. If called prior to the next (non-try) lock attempt, the
1023 * next lock attempt using this link structure will abort instantly.
1025 * Caller must still wait for the operation to complete, either from a
1026 * blocking call that is still in progress or by calling mtx_wait_link().
1028 * If an asynchronous lock request is possibly in-progress, the caller
1029 * should call mtx_wait_link() synchronously. Note that the asynchronous
1030 * lock callback will NOT be called if a successful abort occurred. XXX
1032 void
1033 mtx_abort_link(mtx_t *mtx, mtx_link_t *link)
1035 thread_t td = curthread;
1036 u_int lock;
1037 u_int nlock;
1040 * Acquire MTX_LINKSPIN
1042 ++td->td_critcount;
1043 for (;;) {
1044 lock = mtx->mtx_lock;
1045 if (lock & MTX_LINKSPIN) {
1046 cpu_pause();
1047 continue;
1049 nlock = lock | MTX_LINKSPIN;
1050 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
1051 break;
1052 cpu_pause();
1056 * Do the abort.
1058 * WARNING! Link structure can disappear once link->state is set.
1060 nlock = MTX_LINKSPIN; /* to clear */
1062 switch(link->state) {
1063 case MTX_LINK_IDLE:
1065 * Link not started yet
1067 link->state = MTX_LINK_ABORTED;
1068 break;
1069 case MTX_LINK_LINKED_EX:
1071 * de-link, mark aborted, and potentially wakeup the thread
1072 * or issue the callback.
1074 if (link->next == link) {
1075 if (mtx->mtx_exlink == link) {
1076 mtx->mtx_exlink = NULL;
1077 nlock |= MTX_EXWANTED; /* to clear */
1079 } else {
1080 if (mtx->mtx_exlink == link)
1081 mtx->mtx_exlink = link->next;
1082 link->next->prev = link->prev;
1083 link->prev->next = link->next;
1087 * When aborting the async callback is still made. We must
1088 * not set the link status to ABORTED in the callback case
1089 * since there is nothing else to clear its status if the
1090 * link is reused.
1092 if (link->callback) {
1093 link->state = MTX_LINK_CALLEDBACK;
1094 link->callback(link, link->arg, ENOLCK);
1095 } else {
1096 link->state = MTX_LINK_ABORTED;
1097 wakeup(link);
1099 break;
1100 case MTX_LINK_LINKED_SH:
1102 * de-link, mark aborted, and potentially wakeup the thread
1103 * or issue the callback.
1105 if (link->next == link) {
1106 if (mtx->mtx_shlink == link) {
1107 mtx->mtx_shlink = NULL;
1108 nlock |= MTX_SHWANTED; /* to clear */
1110 } else {
1111 if (mtx->mtx_shlink == link)
1112 mtx->mtx_shlink = link->next;
1113 link->next->prev = link->prev;
1114 link->prev->next = link->next;
1118 * When aborting the async callback is still made. We must
1119 * not set the link status to ABORTED in the callback case
1120 * since there is nothing else to clear its status if the
1121 * link is reused.
1123 if (link->callback) {
1124 link->state = MTX_LINK_CALLEDBACK;
1125 link->callback(link, link->arg, ENOLCK);
1126 } else {
1127 link->state = MTX_LINK_ABORTED;
1128 wakeup(link);
1130 break;
1131 case MTX_LINK_ACQUIRED:
1132 case MTX_LINK_CALLEDBACK:
1134 * Too late, the lock was acquired. Let it complete.
1136 break;
1137 default:
1139 * link already aborted, do nothing.
1141 break;
1143 atomic_clear_int(&mtx->mtx_lock, nlock);
1144 --td->td_critcount;