From cff27badea2d316c1dd7581302d824e7a49fa9f3 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 5 Dec 2016 15:07:43 -0800 Subject: [PATCH] kernel - Spiff up locks a bit * Do a little optimization of _spin_lock_contested(). The critical path is able to avoid two atomic ops in the initialization portion of the contested path. * Optimize _spin_lock_shared_contested() to use atomic_fetchadd_long() to add a shared-lock count instead of atomic_cmpset_long(). Shared spinlocks are used heavily and this will prevent a lot of unnecessary spinning when many cpus are using the same lock at the same time. * Hold fdp->fd_spin across fdp->fd_cdir and fdp->fd_ncdir modifications. This completes other work which caches fdp->fd_ncdir and avoids having to obtain the spin-lock when the cache matches. Discussed-with: Mateusz Guzik (mjg_) --- sys/kern/kern_spinlock.c | 13 ++++++++----- sys/kern/lwkt_token.c | 7 +++---- sys/kern/vfs_syscalls.c | 8 ++++++-- sys/sys/spinlock2.h | 13 ++++++++----- 4 files changed, 25 insertions(+), 16 deletions(-) diff --git a/sys/kern/kern_spinlock.c b/sys/kern/kern_spinlock.c index ce158ddf0d..c086c68b86 100644 --- a/sys/kern/kern_spinlock.c +++ b/sys/kern/kern_spinlock.c @@ -188,7 +188,7 @@ spin_trylock_contested(struct spinlock *spin) * as well (no difference). */ void -_spin_lock_contested(struct spinlock *spin, const char *ident) +_spin_lock_contested(struct spinlock *spin, const char *ident, int value) { struct indefinite_info info = { 0, 0, ident }; int i; @@ -196,8 +196,10 @@ _spin_lock_contested(struct spinlock *spin, const char *ident) /* * Handle degenerate case. */ - if (atomic_cmpset_int(&spin->counta, SPINLOCK_SHARED|0, 1)) - return; + if (value == SPINLOCK_SHARED) { + if (atomic_cmpset_int(&spin->counta, SPINLOCK_SHARED|0, 1)) + return; + } /* * Transfer our count to the high bits, then loop until we can @@ -208,7 +210,8 @@ _spin_lock_contested(struct spinlock *spin, const char *ident) * understands that this may occur. */ atomic_add_int(&spin->counta, SPINLOCK_EXCLWAIT - 1); - atomic_clear_int(&spin->counta, SPINLOCK_SHARED); + if (value & SPINLOCK_SHARED) + atomic_clear_int(&spin->counta, SPINLOCK_SHARED); #ifdef DEBUG_LOCKS_LATENCY long j; @@ -262,7 +265,7 @@ _spin_lock_contested(struct spinlock *spin, const char *ident) * The caller has not modified counta. */ void -_spin_lock_shared_contested(struct spinlock *spin, const char *ident) +_spin_lock_shared_contested(struct spinlock *spin, const char *ident, int value) { struct indefinite_info info = { 0, 0, ident }; int i; diff --git a/sys/kern/lwkt_token.c b/sys/kern/lwkt_token.c index 0e6fe832b8..84cdd83f0e 100644 --- a/sys/kern/lwkt_token.c +++ b/sys/kern/lwkt_token.c @@ -331,14 +331,13 @@ _lwkt_trytokref(lwkt_tokref_t ref, thread_t td, long mode) oref = tok->t_ref; /* can be NULL */ cpu_ccfence(); if ((count & (TOK_EXCLUSIVE/*|TOK_EXCLREQ*/)) == 0) { - /* XXX EXCLREQ should work */ /* - * It is possible to get the token shared. + * It may be possible to get the token shared. */ - if (atomic_cmpset_long(&tok->t_count, count, - count + TOK_INCR)) { + if ((atomic_fetchadd_long(&tok->t_count, TOK_INCR) & TOK_EXCLUSIVE) == 0) { return TRUE; } + atomic_fetchadd_long(&tok->t_count, -TOK_INCR); /* retry */ } else if ((count & TOK_EXCLUSIVE) && oref >= &td->td_toks_base && diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index bbde8f42d3..e445ad7ff1 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1643,11 +1643,13 @@ sys_fchdir(struct fchdir_args *uap) cache_dropmount(mp); } if (error == 0) { + spin_lock(&fdp->fd_spin); ovp = fdp->fd_cdir; onch = fdp->fd_ncdir; - vn_unlock(vp); /* leave ref intact */ fdp->fd_cdir = vp; fdp->fd_ncdir = nch; + spin_unlock(&fdp->fd_spin); + vn_unlock(vp); /* leave ref intact */ cache_drop(&onch); vrele(ovp); } else { @@ -1682,11 +1684,13 @@ kern_chdir(struct nlookupdata *nd) error = checkvp_chdir(vp, td); vn_unlock(vp); if (error == 0) { + spin_lock(&fdp->fd_spin); ovp = fdp->fd_cdir; onch = fdp->fd_ncdir; - cache_unlock(&nd->nl_nch); /* leave reference intact */ fdp->fd_ncdir = nd->nl_nch; fdp->fd_cdir = vp; + spin_unlock(&fdp->fd_spin); + cache_unlock(&nd->nl_nch); /* leave reference intact */ cache_drop(&onch); vrele(ovp); cache_zero(&nd->nl_nch); diff --git a/sys/sys/spinlock2.h b/sys/sys/spinlock2.h index ae1b34dca7..de6e07dad3 100644 --- a/sys/sys/spinlock2.h +++ b/sys/sys/spinlock2.h @@ -54,8 +54,9 @@ extern struct spinlock pmap_spin; int spin_trylock_contested(struct spinlock *spin); -void _spin_lock_contested(struct spinlock *spin, const char *ident); -void _spin_lock_shared_contested(struct spinlock *spin, const char *ident); +void _spin_lock_contested(struct spinlock *spin, const char *ident, int count); +void _spin_lock_shared_contested(struct spinlock *spin, const char *ident, + int count); void _spin_pool_lock(void *chan, const char *ident); void _spin_pool_unlock(void *chan); @@ -111,11 +112,13 @@ spin_held(struct spinlock *spin) static __inline void _spin_lock_quick(globaldata_t gd, struct spinlock *spin, const char *ident) { + int count; + ++gd->gd_curthread->td_critcount; cpu_ccfence(); ++gd->gd_spinlocks; - if (atomic_fetchadd_int(&spin->counta, 1) != 0) - _spin_lock_contested(spin, ident); + if ((count = atomic_fetchadd_int(&spin->counta, 1)) != 0) + _spin_lock_contested(spin, ident, count + 1); #ifdef DEBUG_LOCKS int i; for (i = 0; i < SPINLOCK_DEBUG_ARRAY_SIZE; i++) { @@ -199,7 +202,7 @@ _spin_lock_shared_quick(globaldata_t gd, struct spinlock *spin, atomic_set_int(&spin->counta, SPINLOCK_SHARED); } else if ((counta & SPINLOCK_SHARED) == 0) { atomic_add_int(&spin->counta, -1); - _spin_lock_shared_contested(spin, ident); + _spin_lock_shared_contested(spin, ident, counta); } #ifdef DEBUG_LOCKS int i; -- 2.11.4.GIT