From 3536c341ffda90bfdcc8310ef91231f18c81db52 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sun, 4 Dec 2016 09:21:45 -0800 Subject: [PATCH] kernel - Overhaul namecache operations to reduce SMP contention * Overhaul the namecache code to remove a significant amount of cacheline ping-ponging from the namecache paths. This primarily effects multi-socket systems but also improves multi-core single-socket systems. Cacheline ping-ponging in the critical path can constrict a multi-core system to roughly ~1-2M operations per second running through that path. For example, even if looking up different paths or stating different files, even something as simple as a non-atomic ++global_counter seriously derates performance when it is being executed on all cores at once. In the simple non-conflicting single-component stat() case, this improves performance from ~2.5M/second to ~25M/second on a 4-socket 48-core opteron and has a similar improvement on a 2-socket 32-thread xeon, as well as significantly improves namecache perf on single-socket multi-core systems. * Remove the vfs.cache.numcalls and vfs.cache.numchecks debugging counters. These global counters caused significant cache ping-ponging and were only being used for debugging. * Implement a poor-man's referenced-structure pcpu cache for struct mount and struct namecache. This allows atomic ops on the ref-count for these structures to be avoided in certain critical path cases. For now limit to ncdir and nrdir (nrdir particularly, which is usually the same across nearly all processes in the system). Eventually we will want to expand this cache to handle more cases. Because we are holding refs persistently, add a bit of infrastructure to clear the cache as necessary (e.g. when doing an unmount, for example). * Shift the 'cachedvnodes' global to a per-cpu accumulator, then roll-up the counter back to the global approximately once per second. The code critical paths adjust only the per-cpu accumulator, removing another global cache ping-pong from nearly all vnode and nlookup paths. * The nlookup structure now 'Borrows' the ucred reference from td->td_ucred instead of crhold()ing it, removing another global ref/unref from all nlookup paths. * We have a large hash table of spinlocks for nchash, add a little pad from 24 to 32 bytes. Its ok that two spin locks share the same cache line (its a huge table), adding the pad cleans up cacheline-crossing cases. * Add a bit of pad to put mount->mnt_refs on its own cache-line verses prior fields which are accessed shared. But don't bother isolating it completely. --- sys/kern/vfs_cache.c | 216 ++++++++++++++++++++++++++++++++++++++---------- sys/kern/vfs_lock.c | 36 ++++++-- sys/kern/vfs_mount.c | 12 ++- sys/kern/vfs_nlookup.c | 20 +++-- sys/kern/vfs_syscalls.c | 8 ++ sys/sys/globaldata.h | 3 +- sys/sys/mount.h | 1 + sys/sys/namecache.h | 2 + sys/sys/nchstats.h | 4 +- sys/sys/nlookup.h | 3 +- sys/sys/vnode.h | 2 +- 11 files changed, 240 insertions(+), 67 deletions(-) diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 46f5680da5..0354a17318 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -128,9 +128,14 @@ MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); LIST_HEAD(nchash_list, namecache); +/* + * Don't cachealign, but at least pad to 32 bytes so entries + * don't cross a cache line. + */ struct nchash_head { - struct nchash_list list; - struct spinlock spin; + struct nchash_list list; /* 16 bytes */ + struct spinlock spin; /* 8 bytes */ + long pad01; /* 8 bytes */ }; struct ncmount_cache { @@ -209,6 +214,7 @@ static long ncmount_cache_overwrite; SYSCTL_LONG(_debug, OID_AUTO, ncmount_cache_overwrite, CTLFLAG_RW, &ncmount_cache_overwrite, 0, "mpcache entry overwrites"); +static __inline void _cache_drop(struct namecache *ncp); static int cache_resolve_mp(struct mount *mp); static struct vnode *cache_dvpref(struct namecache *ncp); static void _cache_lock(struct namecache *ncp); @@ -228,12 +234,6 @@ SYSCTL_INT(_vfs_cache, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, static int numcache; SYSCTL_INT(_vfs_cache, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "Number of namecaches entries"); -static u_long numcalls; -SYSCTL_ULONG(_vfs_cache, OID_AUTO, numcalls, CTLFLAG_RD, &numcalls, 0, - "Number of namecache lookups"); -static u_long numchecks; -SYSCTL_ULONG(_vfs_cache, OID_AUTO, numchecks, CTLFLAG_RD, &numchecks, 0, - "Number of checked entries in namecache lookups"); struct nchstats nchstats[SMP_MAXCPU]; /* @@ -265,6 +265,100 @@ SYSCTL_PROC(_vfs_cache, OID_AUTO, nchstats, CTLTYPE_OPAQUE|CTLFLAG_RD, static struct namecache *cache_zap(struct namecache *ncp, int nonblock); /* + * Cache mount points and namecache records in order to avoid unnecessary + * atomic ops on mnt_refs and ncp->refs. This improves concurrent SMP + * performance and is particularly important on multi-socket systems to + * reduce cache-line ping-ponging. + * + * Try to keep the pcpu structure within one cache line (~64 bytes). + */ +#define MNTCACHE_COUNT 5 + +struct mntcache { + struct mount *mntary[MNTCACHE_COUNT]; + struct namecache *ncp1; + struct namecache *ncp2; + int iter; + int unused01; +} __cachealign; + +static struct mntcache pcpu_mntcache[MAXCPU]; + +static +void +_cache_mntref(struct mount *mp) +{ + struct mntcache *cache = &pcpu_mntcache[mycpu->gd_cpuid]; + int i; + + for (i = 0; i < MNTCACHE_COUNT; ++i) { + if (cache->mntary[i] != mp) + continue; + if (atomic_cmpset_ptr((void *)&cache->mntary[i], mp, NULL)) + return; + } + atomic_add_int(&mp->mnt_refs, 1); +} + +static +void +_cache_mntrel(struct mount *mp) +{ + struct mntcache *cache = &pcpu_mntcache[mycpu->gd_cpuid]; + int i; + + for (i = 0; i < MNTCACHE_COUNT; ++i) { + if (cache->mntary[i] == NULL) { + mp = atomic_swap_ptr((void *)&cache->mntary[i], mp); + if (mp == NULL) + return; + } + } + i = (int)((uint32_t)++cache->iter % (uint32_t)MNTCACHE_COUNT); + mp = atomic_swap_ptr((void *)&cache->mntary[i], mp); + if (mp) + atomic_add_int(&mp->mnt_refs, -1); +} + +/* + * Clears all cached mount points on all cpus. This routine should only + * be called when we are waiting for a mount to clear, e.g. so we can + * unmount. + */ +void +cache_clearmntcache(void) +{ + int n; + + for (n = 0; n < ncpus; ++n) { + struct mntcache *cache = &pcpu_mntcache[n]; + struct namecache *ncp; + struct mount *mp; + int i; + + for (i = 0; i < MNTCACHE_COUNT; ++i) { + if (cache->mntary[i]) { + mp = atomic_swap_ptr( + (void *)&cache->mntary[i], NULL); + if (mp) + atomic_add_int(&mp->mnt_refs, -1); + } + } + if (cache->ncp1) { + ncp = atomic_swap_ptr((void *)&cache->ncp1, NULL); + if (ncp) + _cache_drop(ncp); + } + if (cache->ncp2) { + ncp = atomic_swap_ptr((void *)&cache->ncp2, NULL); + if (ncp) + _cache_drop(ncp); + } + } +} + + +/* * Namespace locking. The caller must already hold a reference to the * namecache structure in order to lock/unlock it. This function prevents * the namespace from being created or destroyed by accessors other then @@ -903,52 +997,91 @@ cache_zero(struct nchandle *nch) * * WARNING: Caller may hold an unrelated read spinlock, which means we can't * use read spinlocks here. - * - * MPSAFE if nch is */ struct nchandle * cache_hold(struct nchandle *nch) { _cache_hold(nch->ncp); - atomic_add_int(&nch->mount->mnt_refs, 1); + _cache_mntref(nch->mount); return(nch); } /* * Create a copy of a namecache handle for an already-referenced * entry. - * - * MPSAFE if nch is */ void cache_copy(struct nchandle *nch, struct nchandle *target) { + struct mntcache *cache = &pcpu_mntcache[mycpu->gd_cpuid]; + struct namecache *ncp; + *target = *nch; - if (target->ncp) - _cache_hold(target->ncp); - atomic_add_int(&nch->mount->mnt_refs, 1); + _cache_mntref(target->mount); + ncp = target->ncp; + if (ncp) { + if (ncp == cache->ncp1) { + if (atomic_cmpset_ptr((void *)&cache->ncp1, ncp, NULL)) + return; + } + if (ncp == cache->ncp2) { + if (atomic_cmpset_ptr((void *)&cache->ncp2, ncp, NULL)) + return; + } + _cache_hold(ncp); + } } -/* - * MPSAFE if nch is - */ void cache_changemount(struct nchandle *nch, struct mount *mp) { - atomic_add_int(&nch->mount->mnt_refs, -1); + _cache_mntref(mp); + _cache_mntrel(nch->mount); nch->mount = mp; - atomic_add_int(&nch->mount->mnt_refs, 1); } void cache_drop(struct nchandle *nch) { - atomic_add_int(&nch->mount->mnt_refs, -1); + _cache_mntrel(nch->mount); _cache_drop(nch->ncp); nch->ncp = NULL; nch->mount = NULL; } +/* + * Drop the nchandle, but try to cache the ref to avoid global atomic + * ops. This is typically done on the system root and jail root nchandles. + */ +void +cache_drop_and_cache(struct nchandle *nch) +{ + struct mntcache *cache = &pcpu_mntcache[mycpu->gd_cpuid]; + struct namecache *ncp; + + _cache_mntrel(nch->mount); + ncp = nch->ncp; + if (cache->ncp1 == NULL) { + ncp = atomic_swap_ptr((void *)&cache->ncp1, ncp); + if (ncp == NULL) + goto done; + } + if (cache->ncp2 == NULL) { + ncp = atomic_swap_ptr((void *)&cache->ncp2, ncp); + if (ncp == NULL) + goto done; + } + if (++cache->iter & 1) + ncp = atomic_swap_ptr((void *)&cache->ncp2, ncp); + else + ncp = atomic_swap_ptr((void *)&cache->ncp1, ncp); + if (ncp) + _cache_drop(ncp); +done: + nch->ncp = NULL; + nch->mount = NULL; +} + int cache_lockstatus(struct nchandle *nch) { @@ -1171,7 +1304,7 @@ cache_get(struct nchandle *nch, struct nchandle *target) KKASSERT(nch->ncp->nc_refs > 0); target->mount = nch->mount; target->ncp = _cache_get(nch->ncp); - atomic_add_int(&target->mount->mnt_refs, 1); + _cache_mntref(target->mount); } void @@ -1180,7 +1313,7 @@ cache_get_maybe_shared(struct nchandle *nch, struct nchandle *target, int excl) KKASSERT(nch->ncp->nc_refs > 0); target->mount = nch->mount; target->ncp = _cache_get_maybe_shared(nch->ncp, excl); - atomic_add_int(&target->mount->mnt_refs, 1); + _cache_mntref(target->mount); } /* @@ -1200,7 +1333,7 @@ _cache_put(struct namecache *ncp) void cache_put(struct nchandle *nch) { - atomic_add_int(&nch->mount->mnt_refs, -1); + _cache_mntrel(nch->mount); _cache_put(nch->ncp); nch->ncp = NULL; nch->mount = NULL; @@ -2755,7 +2888,6 @@ cache_nlookup(struct nchandle *par_nch, struct nlcomponent *nlc) globaldata_t gd; int par_locked; - numcalls++; gd = mycpu; mp = par_nch->mount; par_locked = 0; @@ -2780,8 +2912,6 @@ restart: spin_lock_shared(&nchpp->spin); LIST_FOREACH(ncp, &nchpp->list, nc_hash) { - numchecks++; - /* * Break out if we find a matching entry. Note that * UNRESOLVED entries may match, but DESTROYED entries @@ -2881,7 +3011,8 @@ found: ++gd->gd_nchstats->ncs_neghits; nch.mount = mp; nch.ncp = ncp; - atomic_add_int(&nch.mount->mnt_refs, 1); + _cache_mntref(nch.mount); + return(nch); } @@ -2906,7 +3037,6 @@ cache_nlookup_maybe_shared(struct nchandle *par_nch, struct nlcomponent *nlc, if (ncp_shared_lock_disable || excl) return(EWOULDBLOCK); - numcalls++; gd = mycpu; mp = par_nch->mount; @@ -2926,8 +3056,6 @@ cache_nlookup_maybe_shared(struct nchandle *par_nch, struct nlcomponent *nlc, spin_lock_shared(&nchpp->spin); LIST_FOREACH(ncp, &nchpp->list, nc_hash) { - numchecks++; - /* * Break out if we find a matching entry. Note that * UNRESOLVED entries may match, but DESTROYED entries @@ -2973,7 +3101,7 @@ found: res_nch->mount = mp; res_nch->ncp = ncp; ++gd->gd_nchstats->ncs_goodhits; - atomic_add_int(&res_nch->mount->mnt_refs, 1); + _cache_mntref(res_nch->mount); KKASSERT(ncp->nc_error != EWOULDBLOCK); return(ncp->nc_error); @@ -2996,7 +3124,6 @@ cache_nlookup_nonblock(struct nchandle *par_nch, struct nlcomponent *nlc) globaldata_t gd; int par_locked; - numcalls++; gd = mycpu; mp = par_nch->mount; par_locked = 0; @@ -3011,8 +3138,6 @@ cache_nlookup_nonblock(struct nchandle *par_nch, struct nlcomponent *nlc) restart: spin_lock(&nchpp->spin); LIST_FOREACH(ncp, &nchpp->list, nc_hash) { - numchecks++; - /* * Break out if we find a matching entry. Note that * UNRESOLVED entries may match, but DESTROYED entries @@ -3107,7 +3232,8 @@ found: ++gd->gd_nchstats->ncs_neghits; nch.mount = mp; nch.ncp = ncp; - atomic_add_int(&nch.mount->mnt_refs, 1); + _cache_mntref(nch.mount); + return(nch); failed: if (new_ncp) { @@ -3163,7 +3289,7 @@ cache_findmount_callback(struct mount *mp, void *data) mp->mnt_ncmounton.ncp == info->nch_ncp ) { info->result = mp; - atomic_add_int(&mp->mnt_refs, 1); + _cache_mntref(mp); return(-1); } return(0); @@ -3193,7 +3319,7 @@ cache_findmount(struct nchandle *nch) /* * Cache hit (positive) */ - atomic_add_int(&mp->mnt_refs, 1); + _cache_mntref(mp); spin_unlock_shared(&ncc->spin); ++ncmount_cache_hit; return(mp); @@ -3241,7 +3367,7 @@ skip: spin_lock(&ncc->spin); if (info.result == NULL) { if (ncc->isneg == 0 && ncc->mp) - atomic_add_int(&ncc->mp->mnt_refs, -1); + _cache_mntrel(ncc->mp); ncc->ncp = nch->ncp; ncc->mp = nch->mount; ncc->isneg = 1; @@ -3249,8 +3375,8 @@ skip: ++ncmount_cache_overwrite; } else if ((info.result->mnt_kern_flag & MNTK_UNMOUNT) == 0) { if (ncc->isneg == 0 && ncc->mp) - atomic_add_int(&ncc->mp->mnt_refs, -1); - atomic_add_int(&info.result->mnt_refs, 1); + _cache_mntrel(ncc->mp); + _cache_mntref(info.result); ncc->ncp = nch->ncp; ncc->mp = info.result; ncc->isneg = 0; @@ -3267,7 +3393,7 @@ skip: void cache_dropmount(struct mount *mp) { - atomic_add_int(&mp->mnt_refs, -1); + _cache_mntrel(mp); } void @@ -3301,7 +3427,7 @@ cache_unmounting(struct mount *mp) spin_lock(&ncc->spin); if (ncc->isneg == 0 && ncc->ncp == nch->ncp && ncc->mp == mp) { - atomic_add_int(&mp->mnt_refs, -1); + _cache_mntrel(mp); ncc->ncp = NULL; ncc->mp = NULL; } @@ -3725,7 +3851,7 @@ cache_allocroot(struct nchandle *nch, struct mount *mp, struct vnode *vp) { nch->ncp = cache_alloc(0); nch->mount = mp; - atomic_add_int(&mp->mnt_refs, 1); + _cache_mntref(mp); if (vp) _cache_setvp(nch->mount, nch->ncp, vp); } diff --git a/sys/kern/vfs_lock.c b/sys/kern/vfs_lock.c index 0ae048d4bf..10df38069d 100644 --- a/sys/kern/vfs_lock.c +++ b/sys/kern/vfs_lock.c @@ -280,6 +280,28 @@ vref(struct vnode *vp) } /* + * Count number of cached vnodes. This is middling expensive so be + * careful not to make this call in the critical path, particularly + * not updating the global. Each cpu tracks its own accumulator. + * The individual accumulators are not accurate and must be summed + * together. + */ +int +countcachedvnodes(int gupdate) +{ + int i; + int n = 0; + + for (i = 0; i < ncpus; ++i) { + globaldata_t gd = globaldata_find(i); + n += gd->gd_cachedvnodes; + } + if (gupdate) + cachedvnodes = n; + return n; +} + +/* * Release a ref on an active or inactive vnode. * * Caller has no other requirements. @@ -331,7 +353,7 @@ vrele(struct vnode *vp) vx_unlock(vp); } else { if (atomic_cmpset_int(&vp->v_refcnt, count, 0)) { - atomic_add_int(&cachedvnodes, 1); + atomic_add_int(&mycpu->gd_cachedvnodes, 1); break; } } @@ -469,7 +491,7 @@ vget(struct vnode *vp, int flags) * not protect our access to the refcnt or other fields. */ if ((atomic_fetchadd_int(&vp->v_refcnt, 1) & VREF_MASK) == 0) - atomic_add_int(&cachedvnodes, -1); + atomic_add_int(&mycpu->gd_cachedvnodes, -1); if ((error = vn_lock(vp, flags | LK_FAILRECLAIM)) != 0) { /* @@ -593,7 +615,7 @@ void vx_get(struct vnode *vp) { if ((atomic_fetchadd_int(&vp->v_refcnt, 1) & VREF_MASK) == 0) - atomic_add_int(&cachedvnodes, -1); + atomic_add_int(&mycpu->gd_cachedvnodes, -1); lockmgr(&vp->v_lock, LK_EXCLUSIVE); } @@ -607,7 +629,7 @@ vx_get_nonblock(struct vnode *vp) error = lockmgr(&vp->v_lock, LK_EXCLUSIVE | LK_NOWAIT); if (error == 0) { if ((atomic_fetchadd_int(&vp->v_refcnt, 1) & VREF_MASK) == 0) - atomic_add_int(&cachedvnodes, -1); + atomic_add_int(&mycpu->gd_cachedvnodes, -1); } return(error); } @@ -649,7 +671,7 @@ cleanfreevnode(int maxcount) /* * Try to deactivate some vnodes cached on the active list. */ - if (cachedvnodes < inactivevnodes) + if (countcachedvnodes(0) < inactivevnodes) goto skip; for (count = 0; count < maxcount * 2; count++) { @@ -698,7 +720,7 @@ cleanfreevnode(int maxcount) * Try to deactivate the vnode. */ if ((atomic_fetchadd_int(&vp->v_refcnt, 1) & VREF_MASK) == 0) - atomic_add_int(&cachedvnodes, -1); + atomic_add_int(&mycpu->gd_cachedvnodes, -1); atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); spin_unlock(&vfs_spin); @@ -919,7 +941,7 @@ void allocvnode_gc(void) { if (numvnodes >= maxvnodes && - cachedvnodes + inactivevnodes >= maxvnodes * 5 / 10) { + countcachedvnodes(0) + inactivevnodes >= maxvnodes * 5 / 10) { freesomevnodes(batchfreevnodes); } } diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index a5d2b2bbf7..0bae734fb9 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -459,6 +459,8 @@ vnlru_proc(void) SHUTDOWN_PRI_FIRST); for (;;) { + int ncached; + kproc_suspend_loop(); /* @@ -468,12 +470,13 @@ vnlru_proc(void) * * (long) -> deal with 64 bit machines, intermediate overflow */ + ncached = countcachedvnodes(1); if (numvnodes >= maxvnodes * 9 / 10 && - cachedvnodes + inactivevnodes >= maxvnodes * 5 / 10) { + ncached + inactivevnodes >= maxvnodes * 5 / 10) { int count = numvnodes - maxvnodes * 9 / 10; - if (count > (cachedvnodes + inactivevnodes) / 100) - count = (cachedvnodes + inactivevnodes) / 100; + if (count > (ncached + inactivevnodes) / 100) + count = (ncached + inactivevnodes) / 100; if (count < 5) count = 5; freesomevnodes(count); @@ -490,8 +493,9 @@ vnlru_proc(void) * Nothing to do if most of our vnodes are already on * the free list. */ + ncached = countcachedvnodes(1); if (numvnodes <= maxvnodes * 9 / 10 || - cachedvnodes + inactivevnodes <= maxvnodes * 5 / 10) { + ncached + inactivevnodes <= maxvnodes * 5 / 10) { tsleep(vnlruthread, 0, "vlruwt", hz); continue; } diff --git a/sys/kern/vfs_nlookup.c b/sys/kern/vfs_nlookup.c index 837ee26a7c..daf07a713d 100644 --- a/sys/kern/vfs_nlookup.c +++ b/sys/kern/vfs_nlookup.c @@ -117,12 +117,14 @@ nlookup_init(struct nlookupdata *nd, cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch); if (p->p_fd->fd_njdir.ncp) cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch); - nd->nl_cred = crhold(p->p_ucred); + nd->nl_cred = td->td_ucred; + nd->nl_flags |= NLC_BORROWCRED; } else { cache_copy(&rootnch, &nd->nl_nch); cache_copy(&nd->nl_nch, &nd->nl_rootnch); cache_copy(&nd->nl_nch, &nd->nl_jailnch); - nd->nl_cred = crhold(proc0.p_ucred); + nd->nl_cred = proc0.p_ucred; + nd->nl_flags |= NLC_BORROWCRED; } nd->nl_td = td; nd->nl_flags |= flags; @@ -271,6 +273,7 @@ nlookup_init_root(struct nlookupdata *nd, return(error); } +#if 0 /* * Set a different credential; this credential will be used by future * operations performed on nd.nl_open_vp and nlookupdata structure. @@ -282,10 +285,13 @@ nlookup_set_cred(struct nlookupdata *nd, struct ucred *cred) if (nd->nl_cred != cred) { cred = crhold(cred); - crfree(nd->nl_cred); + if ((nd->nl_flags & NLC_BORROWCRED) == 0) + crfree(nd->nl_cred); + nd->nl_flags &= ~NLC_BORROWCRED; nd->nl_cred = cred; } } +#endif /* * Cleanup a nlookupdata structure after we are through with it. This may @@ -305,16 +311,18 @@ nlookup_done(struct nlookupdata *nd) cache_drop(&nd->nl_nch); /* NULL's out the nch */ } if (nd->nl_rootnch.ncp) - cache_drop(&nd->nl_rootnch); + cache_drop_and_cache(&nd->nl_rootnch); if (nd->nl_jailnch.ncp) - cache_drop(&nd->nl_jailnch); + cache_drop_and_cache(&nd->nl_jailnch); if ((nd->nl_flags & NLC_HASBUF) && nd->nl_path) { objcache_put(namei_oc, nd->nl_path); nd->nl_path = NULL; } if (nd->nl_cred) { - crfree(nd->nl_cred); + if ((nd->nl_flags & NLC_BORROWCRED) == 0) + crfree(nd->nl_cred); nd->nl_cred = NULL; + nd->nl_flags &= ~NLC_BORROWCRED; } if (nd->nl_open_vp) { if (nd->nl_flags & NLC_LOCKVP) { diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 3885f9add8..5dba7909b6 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -713,11 +713,13 @@ dounmount(struct mount *mp, int flags) cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); cache_unlock(&mp->mnt_ncmountpt); + cache_clearmntcache(); if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { allproc_scan(&unmount_allproc_cb, mp); } + cache_clearmntcache(); if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { @@ -761,9 +763,12 @@ dounmount(struct mount *mp, int flags) * Scans can get temporary refs on a mountpoint (thought really * heavy duty stuff like cache_findmount() do not). */ + if (mp->mnt_refs != 1) + cache_clearmntcache(); for (retry = 0; retry < 10 && mp->mnt_refs != 1; ++retry) { cache_unmounting(mp); tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); + cache_clearmntcache(); } if (mp->mnt_refs != 1) { if ((flags & MNT_FORCE) == 0) { @@ -860,10 +865,13 @@ dounmount(struct mount *mp, int flags) * to busy the mount after we decided to do the unmount. */ if (freeok) { + if (mp->mnt_refs > 1) + cache_clearmntcache(); while (mp->mnt_refs > 1) { cache_unmounting(mp); wakeup(mp); tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); + cache_clearmntcache(); } lwkt_reltoken(&mp->mnt_token); mount_drop(mp); diff --git a/sys/sys/globaldata.h b/sys/sys/globaldata.h index 091cbbb730..ce6ff18f8a 100644 --- a/sys/sys/globaldata.h +++ b/sys/sys/globaldata.h @@ -168,7 +168,8 @@ struct globaldata { int gd_timer_running; u_int gd_idle_repeat; /* repeated switches to idle */ int gd_quick_color; /* page-coloring helper */ - int gd_ireserved[6]; + int gd_cachedvnodes; /* accum across all cpus */ + int gd_ireserved[5]; const char *gd_infomsg; /* debugging */ struct lwkt_tokref gd_handoff; /* hand-off tokref */ void *gd_delayed_wakeup[2]; diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 0ded258491..8072c7f61a 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -240,6 +240,7 @@ struct mount { struct vop_ops *mnt_vn_fifo_ops; /* for use by the VFS */ struct nchandle mnt_ncmountpt; /* mount point */ struct nchandle mnt_ncmounton; /* mounted on */ + char mnt_pad[24]; /* (try to cache-align refs) */ int mnt_refs; /* nchandle references */ int mnt_hold; /* prevent kfree */ struct lwkt_token mnt_token; /* token lock if not MPSAFE */ diff --git a/sys/sys/namecache.h b/sys/sys/namecache.h index 9c4601d3c7..0890370bdc 100644 --- a/sys/sys/namecache.h +++ b/sys/sys/namecache.h @@ -176,6 +176,7 @@ struct componentname; struct nlcomponent; struct mount; +void cache_clearmntcache(void); void cache_lock(struct nchandle *nch); void cache_lock_maybe_shared(struct nchandle *nch, int excl); void cache_relock(struct nchandle *nch1, struct ucred *cred1, @@ -216,6 +217,7 @@ void cache_copy(struct nchandle *nch, struct nchandle *target); void cache_changemount(struct nchandle *nch, struct mount *mp); void cache_put(struct nchandle *nch); void cache_drop(struct nchandle *nch); +void cache_drop_and_cache(struct nchandle *nch); void cache_zero(struct nchandle *nch); void cache_rename(struct nchandle *fnch, struct nchandle *tnch); void cache_unlink(struct nchandle *nch); diff --git a/sys/sys/nchstats.h b/sys/sys/nchstats.h index d77d34aa50..36696ed2d1 100644 --- a/sys/sys/nchstats.h +++ b/sys/sys/nchstats.h @@ -39,6 +39,8 @@ /* * Statistics on the usefulness of namei caches. * (per-cpu) + * + * Allocated in an array so make sure this is cache-aligned. */ struct nchstats { unsigned long ncs_goodhits; /* hits that we can really use */ @@ -49,6 +51,6 @@ struct nchstats { unsigned long ncs_longhits; /* path lookup hits */ unsigned long ncs_longmiss; /* path lookup misses */ unsigned long ncs_unused; /* number of times we attempt it */ -}; +} __cachealign; #endif /* _SYS_NCHSTATS_H_ */ diff --git a/sys/sys/nlookup.h b/sys/sys/nlookup.h index e503d246f2..673e69855d 100644 --- a/sys/sys/nlookup.h +++ b/sys/sys/nlookup.h @@ -130,7 +130,7 @@ struct nlookupdata { #define NLC_EXEC 0x01000000 /* require execute access */ #define NLC_EXCL 0x02000000 /* open check: exclusive */ #define NLC_OWN 0x04000000 /* open check: owner override */ -#define NLC_UNUSED08000000 0x08000000 +#define NLC_BORROWCRED 0x08000000 /* cred ref borrowed */ #define NLC_STICKY 0x10000000 /* indicate sticky case */ #define NLC_APPENDONLY 0x20000000 /* indicate append-only */ #define NLC_IMMUTABLE 0x40000000 /* indicate immutable set */ @@ -151,7 +151,6 @@ int nlookup_init_at(struct nlookupdata *, struct file **, int, const char *, enum uio_seg, int); int nlookup_init_raw(struct nlookupdata *, const char *, enum uio_seg, int, struct ucred *, struct nchandle *); int nlookup_init_root(struct nlookupdata *, const char *, enum uio_seg, int, struct ucred *, struct nchandle *, struct nchandle *); -void nlookup_set_cred(struct nlookupdata *nd, struct ucred *cred); void nlookup_zero(struct nlookupdata *); void nlookup_done(struct nlookupdata *); void nlookup_done_at(struct nlookupdata *, struct file *); diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 8696da91ad..4fddd2132f 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -356,7 +356,6 @@ extern struct vattr va_null; /* predefined null vattr structure */ extern int numvnodes; extern int inactivevnodes; extern int activevnodes; -extern int cachedvnodes; /* * This macro is very helpful in defining those offsets in the vdesc struct. @@ -565,6 +564,7 @@ void vfs_subr_init(void); void vfs_mount_init(void); void vfs_lock_init(void); void mount_init(struct mount *mp); +int countcachedvnodes(int gupdate); void vn_syncer_add(struct vnode *, int); void vn_syncer_remove(struct vnode *, int); -- 2.11.4.GIT