From 2247fe02f4e80c2f2acaa71e60bf6b98eb848dca Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sun, 27 Dec 2009 22:36:07 -0800 Subject: [PATCH] kernel - fine-grained namecache and partial vnode MPSAFE work Namecache subsystem * All vnode->v_flag modifications now use vsetflags() and vclrflags(). Because some flags are set and cleared by vhold()/vdrop() which do not require any locks to be held, all modifications must use atomic ops. * Clean up and revamp the namecache MPSAFE work. Namecache operations now use a fine-grained MPSAFE locking model which loosely follows these rules: - lock ordering is child to parent. e.g. lock file, then lock parent directory. This allows resolver recursions up the parent directory chain. - Downward-traversing namecache invalidations and path lookups will unlock the parent (but leave it referenced) before attempting to lock the child. - Namecache hash table lookups utilize a per-bucket spinlock. - vnode locks may be acquired while holding namecache locks but not vise-versa. VNodes are not destroyed until all namecache references go away, but can enter reclamation. Namecache lookups detect the case and re-resolve to overcome the race. Namecache entries are not destroyed while referenced. * Remove vfs_token, the namecache MPSAFE model is now totally fine-grained. * Revamp namecache locking primitves (cache_lock/cache_unlock and friends). Use atomic ops and nc_exlocks instead of nc_locktd and build-in a request flag. This solves busy/tsleep races between lock holder and lock requester. * Revamp namecache parent/child linkages. Instead of using vfs_token to lock such operations we simply lock both child and parent namecache entries. Hash table operations are also fully integrated with the parent/child linking operations. * The vnode->v_namecache list is locked via vnode->v_spinlock, which is actually vnode->v_lock.lk_spinlock. * Revamp cache_vref() and cache_vget(). The passed namecache entry must be referenced and locked. Internals are simplified. * Fix a deadlock by moving the call to _cache_hysteresis() to a place where the current thread otherwise does not hold any locked ncp's. * Revamp nlookup() to follow the new namecache locking rules. * Fix a number of places, e.g. in vfs/nfs/nfs_subs.c, where ncp->nc_parent or ncp->nc_vp was being accessed with an unlocked ncp. nc_parent and nc_vp accesses are only valid if the ncp is locked. * Add the vfs.cache_mpsafe sysctl, which defaults to 0. This may be set to 1 to enable MPSAFE namecache operations for [l,f]stat() and open() system calls (for the moment). VFS/VNODE subsystem * Use a global spinlock for now called vfs_spin to manage vnode_free_list. Use vnode->v_spinlock (and vfs_spin) to manage vhold/vdrop ops and to interlock v_auxrefs tests against vnode terminations. * Integrate per-mount mnt_token and (for now) the MP lock into VOP_*() and VFS_*() operations. This allows the MP lock to be shifted further inward from the system calls, but we don't do it quite yet. * HAMMER: VOP_GETATTR, VOP_READ, and VOP_INACTIVE are now MPSAFE. The corresponding sysctls have been removed. * FIFOFS: Needed some MPSAFE work in order to allow HAMMER to make things MPSAFE above, since HAMMER forwards vops for in-filesystem fifos to fifofs. * Add some debugging kprintf()s when certain MP races are averted, for testing only. MISC * Add some assertions to the VM system. * Document existing and newly MPSAFE code. --- .../linux/i386/linprocfs/linprocfs_subr.c | 2 +- sys/emulation/linux/linux_misc.c | 2 +- sys/kern/imgact_aout.c | 2 +- sys/kern/imgact_elf.c | 2 +- sys/kern/kern_checkpoint.c | 2 +- sys/kern/kern_iosched.c | 9 + sys/kern/kern_lockf.c | 4 +- sys/kern/vfs_cache.c | 802 +++++++++++++-------- sys/kern/vfs_conf.c | 6 +- sys/kern/vfs_lock.c | 152 +++- sys/kern/vfs_mount.c | 2 + sys/kern/vfs_nlookup.c | 84 ++- sys/kern/vfs_subr.c | 28 +- sys/kern/vfs_sync.c | 4 +- sys/kern/vfs_syscalls.c | 66 +- sys/kern/vfs_vfsops.c | 49 +- sys/kern/vfs_vnops.c | 58 +- sys/kern/vfs_vopops.c | 131 ++-- sys/platform/pc32/i386/pmap.c | 6 +- sys/sys/mount.h | 42 +- sys/sys/namecache.h | 63 +- sys/vfs/devfs/devfs_core.c | 3 +- sys/vfs/devfs/devfs_vnops.c | 2 +- sys/vfs/fdesc/fdesc_vfsops.c | 2 +- sys/vfs/fifofs/fifo_vnops.c | 2 +- sys/vfs/gnu/ext2fs/ext2_quota.c | 4 +- sys/vfs/hammer/hammer.h | 1 + sys/vfs/hammer/hammer_inode.c | 12 +- sys/vfs/hammer/hammer_vfsops.c | 3 +- sys/vfs/hpfs/hpfs_vfsops.c | 2 +- sys/vfs/isofs/cd9660/cd9660_vfsops.c | 2 +- sys/vfs/msdosfs/msdosfs_denode.c | 2 +- sys/vfs/nfs/nfs_subs.c | 3 + sys/vfs/nfs/nfs_vfsops.c | 4 +- sys/vfs/ntfs/ntfs_vfsops.c | 4 +- sys/vfs/nwfs/nwfs_vfsops.c | 2 +- sys/vfs/portal/portal_vfsops.c | 2 +- sys/vfs/smbfs/smbfs_vfsops.c | 2 +- sys/vfs/udf/udf_vfsops.c | 2 +- sys/vfs/ufs/ufs_quota.c | 4 +- sys/vfs/ufs/ufs_vnops.c | 2 +- sys/vfs/union/union_subr.c | 2 +- sys/vm/vm_object.c | 2 +- sys/vm/vnode_pager.c | 10 +- 44 files changed, 1007 insertions(+), 583 deletions(-) diff --git a/sys/emulation/linux/i386/linprocfs/linprocfs_subr.c b/sys/emulation/linux/i386/linprocfs/linprocfs_subr.c index 8cda14030d..2058332f95 100644 --- a/sys/emulation/linux/i386/linprocfs/linprocfs_subr.c +++ b/sys/emulation/linux/i386/linprocfs/linprocfs_subr.c @@ -151,7 +151,7 @@ loop: (VREAD|VEXEC) >> 3 | (VREAD|VEXEC) >> 6; vp->v_type = VDIR; - vp->v_flag |= VROOT; + vsetflags(vp, VROOT); break; case Pself: /* /proc/self = lr--r--r-- */ diff --git a/sys/emulation/linux/linux_misc.c b/sys/emulation/linux/linux_misc.c index dd09b7b051..d282a65edf 100644 --- a/sys/emulation/linux/linux_misc.c +++ b/sys/emulation/linux/linux_misc.c @@ -383,7 +383,7 @@ sys_linux_uselib(struct linux_uselib_args *args) } /* prevent more writers */ - vp->v_flag |= VTEXT; + vsetflags(vp, VTEXT); /* * Check if file_offset page aligned. Currently we cannot handle diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c index 95508f941c..49727fb144 100644 --- a/sys/kern/imgact_aout.c +++ b/sys/kern/imgact_aout.c @@ -239,7 +239,7 @@ exec_aout_imgact(struct image_params *imgp) imgp->proc->p_sysent = &aout_sysvec; /* Indicate that this file should not be modified */ - imgp->vp->v_flag |= VTEXT; + vsetflags(imgp->vp, VTEXT); return (0); } diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 8f0f536279..f20c64da2a 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -528,7 +528,7 @@ elf_load_file(struct proc *p, const char *file, u_long *addr, u_long *entry) * its VTEXT flag, too. */ if (error == 0) - imgp->vp->v_flag |= VTEXT; + vsetflags(imgp->vp, VTEXT); vn_unlock(imgp->vp); if (error) goto fail; diff --git a/sys/kern/kern_checkpoint.c b/sys/kern/kern_checkpoint.c index 76e51f3c8f..8bda8322b5 100644 --- a/sys/kern/kern_checkpoint.c +++ b/sys/kern/kern_checkpoint.c @@ -271,7 +271,7 @@ ckpt_thaw_proc(struct lwp *lp, struct file *fp) if (p->p_textvp) vrele(p->p_textvp); p->p_textvp = (struct vnode *)fp->f_data; - p->p_textvp->v_flag |= VCKPT; + vsetflags(p->p_textvp, VCKPT); vref(p->p_textvp); } done: diff --git a/sys/kern/kern_iosched.c b/sys/kern/kern_iosched.c index 310130eed7..9a9a40e9ce 100644 --- a/sys/kern/kern_iosched.c +++ b/sys/kern/kern_iosched.c @@ -62,6 +62,9 @@ SYSCTL_INT(_iosched, OID_AUTO, debug, CTLFLAG_RW, &iosched_debug, 0, ""); static struct iosched_data ioscpu[SMP_MAXCPU]; +/* + * MPSAFE + */ static int badjiosched(thread_t td, size_t bytes) { @@ -116,6 +119,8 @@ biosched_done(thread_t td) /* * Caller intends to write (bytes) + * + * MPSAFE */ void bwillwrite(int bytes) @@ -133,6 +138,8 @@ bwillwrite(int bytes) /* * Caller intends to read (bytes) + * + * MPSAFE */ void bwillread(int bytes) @@ -141,6 +148,8 @@ bwillread(int bytes) /* * Call intends to do an inode-modifying operation of some sort. + * + * MPSAFE */ void bwillinode(int n) diff --git a/sys/kern/kern_lockf.c b/sys/kern/kern_lockf.c index 597d0a6241..043c5b7ff2 100644 --- a/sys/kern/kern_lockf.c +++ b/sys/kern/kern_lockf.c @@ -254,14 +254,14 @@ lf_advlock(struct vop_advlock_args *ap, struct lockf *lock, u_quad_t size) * then before. */ error = lf_setlock(lock, owner, type, flags, start, end); - ap->a_vp->v_flag |= VMAYHAVELOCKS; + vsetflags(ap->a_vp, VMAYHAVELOCKS); break; case F_UNLCK: error = lf_setlock(lock, owner, type, flags, start, end); if (TAILQ_EMPTY(&lock->lf_range) && TAILQ_EMPTY(&lock->lf_blocked)) { - ap->a_vp->v_flag &= ~VMAYHAVELOCKS; + vclrflags(ap->a_vp, VMAYHAVELOCKS); } break; diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 0458c1230b..c7491830ac 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. + * Copyright (c) 2003,2004,2009 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -64,10 +64,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 - * $FreeBSD: src/sys/kern/vfs_cache.c,v 1.42.2.6 2001/10/05 20:07:03 dillon Exp $ - * $DragonFly: src/sys/kern/vfs_cache.c,v 1.91 2008/06/14 05:34:06 dillon Exp $ */ #include @@ -97,16 +93,31 @@ /* * Random lookups in the cache are accomplished with a hash table using - * a hash key of (nc_src_vp, name). + * a hash key of (nc_src_vp, name). Each hash chain has its own spin lock. + * + * Negative entries may exist and correspond to resolved namecache + * structures where nc_vp is NULL. In a negative entry, NCF_WHITEOUT + * will be set if the entry corresponds to a whited-out directory entry + * (verses simply not finding the entry at all). ncneglist is locked + * with a global spinlock (ncspin). + * + * MPSAFE RULES: + * + * (1) A ncp must be referenced before it can be locked. + * + * (2) A ncp must be locked in order to modify it. + * + * (3) ncp locks are always ordered child -> parent. That may seem + * backwards but forward scans use the hash table and thus can hold + * the parent unlocked when traversing downward. * - * Negative entries may exist and correspond to structures where nc_vp - * is NULL. In a negative entry, NCF_WHITEOUT will be set if the entry - * corresponds to a whited-out directory entry (verses simply not finding the - * entry at all). + * This allows insert/rename/delete/dot-dot and other operations + * to use ncp->nc_parent links. * - * Upon reaching the last segment of a path, if the reference is for DELETE, - * or NOCACHE is set (rewrite), and the name is located in the cache, it - * will be dropped. + * This also prevents a locked up e.g. NFS node from creating a + * chain reaction all the way back to the root vnode / namecache. + * + * (4) parent linkages require both the parent and child to be locked. */ /* @@ -127,7 +138,6 @@ struct nchash_head { static struct nchash_head *nchashtbl; static struct namecache_list ncneglist; static struct spinlock ncspin; -struct lwkt_token vfs_token; /* * ncvp_debug - debug cache_fromvp(). This is used by the NFS server @@ -163,9 +173,11 @@ SYSCTL_INT(_debug, OID_AUTO, numunres, CTLFLAG_RD, &numunres, 0, ""); SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); +int cache_mpsafe; +SYSCTL_INT(_vfs, OID_AUTO, cache_mpsafe, CTLFLAG_RW, &cache_mpsafe, 0, ""); + static int cache_resolve_mp(struct mount *mp); static struct vnode *cache_dvpref(struct namecache *ncp); -static void _cache_rehash(struct namecache *ncp); static void _cache_lock(struct namecache *ncp); static void _cache_setunresolved(struct namecache *ncp); @@ -231,6 +243,9 @@ static struct namecache *cache_zap(struct namecache *ncp); * The lock owner has full authority to associate/disassociate vnodes * and resolve/unresolve the locked ncp. * + * The primary lock field is nc_exlocks. nc_locktd is set after the + * fact (when locking) or cleared prior to unlocking. + * * WARNING! Holding a locked ncp will prevent a vnode from being destroyed * or recycled, but it does NOT help you if the vnode had already * initiated a recyclement. If this is important, use cache_get() @@ -238,32 +253,27 @@ static struct namecache *cache_zap(struct namecache *ncp); * way the refs counter is handled). Or, alternatively, make an * unconditional call to cache_validate() or cache_resolve() * after cache_lock() returns. + * + * MPSAFE */ static void _cache_lock(struct namecache *ncp) { thread_t td; - thread_t xtd; int didwarn; int error; + u_int count; KKASSERT(ncp->nc_refs != 0); didwarn = 0; td = curthread; for (;;) { - xtd = ncp->nc_locktd; - - if (xtd == td) { - ++ncp->nc_exlocks; - break; - } - if (xtd == NULL) { - if (atomic_cmpset_ptr(&ncp->nc_locktd, NULL, td)) { - KKASSERT(ncp->nc_exlocks == 0); - ncp->nc_exlocks = 1; + count = ncp->nc_exlocks; + if (count == 0) { + if (atomic_cmpset_int(&ncp->nc_exlocks, 0, 1)) { /* * The vp associated with a locked ncp must * be held to prevent it from being recycled. @@ -274,61 +284,71 @@ _cache_lock(struct namecache *ncp) * cache_vget() on the locked ncp to * validate the vp or set the cache entry * to unresolved. + * + * NOTE! vhold() is allowed if we hold a + * lock on the ncp (which we do). */ + ncp->nc_locktd = td; if (ncp->nc_vp) vhold(ncp->nc_vp); /* MPSAFE */ break; } + /* cmpset failed */ + continue; + } + if (ncp->nc_locktd == td) { + if (atomic_cmpset_int(&ncp->nc_exlocks, count, + count + 1)) { + break; + } + /* cmpset failed */ continue; } - - /* - * Memory interlock (XXX) - */ - ncp->nc_lockreq = 1; tsleep_interlock(ncp, 0); - cpu_mfence(); - if (xtd != ncp->nc_locktd) + if (atomic_cmpset_int(&ncp->nc_exlocks, count, + count | NC_EXLOCK_REQ) == 0) { + /* cmpset failed */ continue; + } error = tsleep(ncp, PINTERLOCKED, "clock", nclockwarn); if (error == EWOULDBLOCK) { - if (didwarn) - continue; - didwarn = 1; - kprintf("[diagnostic] cache_lock: blocked on %p", ncp); - kprintf(" \"%*.*s\"\n", - ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name); + if (didwarn == 0) { + didwarn = ticks; + kprintf("[diagnostic] cache_lock: blocked " + "on %p", + ncp); + kprintf(" \"%*.*s\"\n", + ncp->nc_nlen, ncp->nc_nlen, + ncp->nc_name); + } } } - - if (didwarn == 1) { - kprintf("[diagnostic] cache_lock: unblocked %*.*s\n", - ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name); + if (didwarn) { + kprintf("[diagnostic] cache_lock: unblocked %*.*s after " + "%d secs\n", + ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name, + (int)(ticks - didwarn) / hz); } } +/* + * MPSAFE + */ static int _cache_lock_nonblock(struct namecache *ncp) { thread_t td; - thread_t xtd; + u_int count; KKASSERT(ncp->nc_refs != 0); td = curthread; for (;;) { - xtd = ncp->nc_locktd; - - if (xtd == td) { - ++ncp->nc_exlocks; - break; - } - if (xtd == NULL) { - if (atomic_cmpset_ptr(&ncp->nc_locktd, NULL, td)) { - KKASSERT(ncp->nc_exlocks == 0); - ncp->nc_exlocks = 1; + count = ncp->nc_exlocks; + if (count == 0) { + if (atomic_cmpset_int(&ncp->nc_exlocks, 0, 1)) { /* * The vp associated with a locked ncp must * be held to prevent it from being recycled. @@ -339,11 +359,24 @@ _cache_lock_nonblock(struct namecache *ncp) * cache_vget() on the locked ncp to * validate the vp or set the cache entry * to unresolved. + * + * NOTE! vhold() is allowed if we hold a + * lock on the ncp (which we do). */ + ncp->nc_locktd = td; if (ncp->nc_vp) vhold(ncp->nc_vp); /* MPSAFE */ break; } + /* cmpset failed */ + continue; + } + if (ncp->nc_locktd == td) { + if (atomic_cmpset_int(&ncp->nc_exlocks, count, + count + 1)) { + break; + } + /* cmpset failed */ continue; } return(EWOULDBLOCK); @@ -355,26 +388,42 @@ _cache_lock_nonblock(struct namecache *ncp) * Helper function * * NOTE: nc_refs can be 0 (degenerate case during _cache_drop). + * + * NOTE: nc_locktd must be NULLed out prior to nc_exlocks getting cleared. + * + * MPSAFE */ static void _cache_unlock(struct namecache *ncp) { thread_t td __debugvar = curthread; + u_int count; KKASSERT(ncp->nc_refs >= 0); KKASSERT(ncp->nc_exlocks > 0); KKASSERT(ncp->nc_locktd == td); - if (--ncp->nc_exlocks == 0) { + count = ncp->nc_exlocks; + if ((count & ~NC_EXLOCK_REQ) == 1) { + ncp->nc_locktd = NULL; if (ncp->nc_vp) vdrop(ncp->nc_vp); - ncp->nc_locktd = NULL; - cpu_mfence(); - if (ncp->nc_lockreq) { - ncp->nc_lockreq = 0; - wakeup(ncp); + } + for (;;) { + if ((count & ~NC_EXLOCK_REQ) == 1) { + if (atomic_cmpset_int(&ncp->nc_exlocks, count, 0)) { + if (count & NC_EXLOCK_REQ) + wakeup(ncp); + break; + } + } else { + if (atomic_cmpset_int(&ncp->nc_exlocks, count, + count - 1)) { + break; + } } + count = ncp->nc_exlocks; } } @@ -416,6 +465,8 @@ _cache_hold(struct namecache *ncp) * * NOTE: cache_zap() may return a non-NULL referenced parent which must * be dropped in a loop. + * + * MPSAFE */ static __inline void @@ -444,20 +495,30 @@ _cache_drop(struct namecache *ncp) if (atomic_cmpset_int(&ncp->nc_refs, refs, refs - 1)) break; } + cpu_pause(); } } /* - * Link a new namecache entry to its parent. Be careful to avoid races - * if vhold() blocks in the future. + * Link a new namecache entry to its parent and to the hash table. Be + * careful to avoid races if vhold() blocks in the future. + * + * Both ncp and par must be referenced and locked. + * + * NOTE: The hash table spinlock is likely held during this call, we + * can't do anything fancy. * - * MPSAFE - ncp must be locked and vfs_token must be held. + * MPSAFE */ static void -_cache_link_parent(struct namecache *ncp, struct namecache *par) +_cache_link_parent(struct namecache *ncp, struct namecache *par, + struct nchash_head *nchpp) { KKASSERT(ncp->nc_parent == NULL); ncp->nc_parent = par; + ncp->nc_head = nchpp; + LIST_INSERT_HEAD(&nchpp->list, ncp, nc_hash); + if (TAILQ_EMPTY(&par->nc_list)) { TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry); /* @@ -465,18 +526,21 @@ _cache_link_parent(struct namecache *ncp, struct namecache *par) * be held to prevent it from being recycled. */ if (par->nc_vp) - vhold(par->nc_vp); /* MPSAFE */ + vhold(par->nc_vp); } else { TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry); } } /* - * Remove the parent association from a namecache structure. If this is - * the last child of the parent the cache_drop(par) will attempt to - * recursively zap the parent. + * Remove the parent and hash associations from a namecache structure. + * If this is the last child of the parent the cache_drop(par) will + * attempt to recursively zap the parent. + * + * ncp must be locked. This routine will acquire a temporary lock on + * the parent as wlel as the appropriate hash chain. * - * MPSAFE - ncp must be locked and vfs_token must be held. + * MPSAFE */ static void _cache_unlink_parent(struct namecache *ncp) @@ -485,12 +549,19 @@ _cache_unlink_parent(struct namecache *ncp) struct vnode *dropvp; if ((par = ncp->nc_parent) != NULL) { - ncp->nc_parent = NULL; + KKASSERT(ncp->nc_parent == par); _cache_hold(par); + _cache_lock(par); + spin_lock_wr(&ncp->nc_head->spin); + LIST_REMOVE(ncp, nc_hash); TAILQ_REMOVE(&par->nc_list, ncp, nc_entry); dropvp = NULL; if (par->nc_vp && TAILQ_EMPTY(&par->nc_list)) dropvp = par->nc_vp; + spin_unlock_wr(&ncp->nc_head->spin); + ncp->nc_parent = NULL; + ncp->nc_head = NULL; + _cache_unlock(par); _cache_drop(par); /* @@ -504,6 +575,8 @@ _cache_unlink_parent(struct namecache *ncp) /* * Allocate a new namecache structure. Most of the code does not require * zero-termination of the string but it makes vop_compat_ncreate() easier. + * + * MPSAFE */ static struct namecache * cache_alloc(int nlen) @@ -526,6 +599,8 @@ cache_alloc(int nlen) /* * Can only be called for the case where the ncp has never been * associated with anything (so no spinlocks are needed). + * + * MPSAFE */ static void _cache_free(struct namecache *ncp) @@ -536,6 +611,9 @@ _cache_free(struct namecache *ncp) kfree(ncp, M_VFSCACHE); } +/* + * MPSAFE + */ void cache_zero(struct nchandle *nch) { @@ -546,8 +624,12 @@ cache_zero(struct nchandle *nch) /* * Ref and deref a namecache structure. * - * Warning: caller may hold an unrelated read spinlock, which means we can't - * use read spinlocks here. + * The caller must specify a stable ncp pointer, typically meaning the + * ncp is already referenced but this can also occur indirectly through + * e.g. holding a lock on a direct child. + * + * WARNING: Caller may hold an unrelated read spinlock, which means we can't + * use read spinlocks here. * * MPSAFE if nch is */ @@ -585,6 +667,9 @@ cache_changemount(struct nchandle *nch, struct mount *mp) atomic_add_int(&nch->mount->mnt_refs, 1); } +/* + * MPSAFE + */ void cache_drop(struct nchandle *nch) { @@ -594,12 +679,61 @@ cache_drop(struct nchandle *nch) nch->mount = NULL; } +/* + * MPSAFE + */ void cache_lock(struct nchandle *nch) { _cache_lock(nch->ncp); } +/* + * Relock nch1 given an unlocked nch1 and a locked nch2. The caller + * is responsible for checking both for validity on return as they + * may have become invalid. + * + * We have to deal with potential deadlocks here, just ping pong + * the lock until we get it (we will always block somewhere when + * looping so this is not cpu-intensive). + * + * which = 0 nch1 not locked, nch2 is locked + * which = 1 nch1 is locked, nch2 is not locked + */ +void +cache_relock(struct nchandle *nch1, struct ucred *cred1, + struct nchandle *nch2, struct ucred *cred2) +{ + int which; + + which = 0; + + for (;;) { + if (which == 0) { + if (cache_lock_nonblock(nch1) == 0) { + cache_resolve(nch1, cred1); + break; + } + cache_unlock(nch2); + cache_lock(nch1); + cache_resolve(nch1, cred1); + which = 1; + } else { + if (cache_lock_nonblock(nch2) == 0) { + cache_resolve(nch2, cred2); + break; + } + cache_unlock(nch1); + cache_lock(nch2); + cache_resolve(nch2, cred2); + which = 0; + } + } +} + +/* + * MPSAFE + */ int cache_lock_nonblock(struct nchandle *nch) { @@ -607,6 +741,9 @@ cache_lock_nonblock(struct nchandle *nch) } +/* + * MPSAFE + */ void cache_unlock(struct nchandle *nch) { @@ -622,6 +759,8 @@ cache_unlock(struct nchandle *nch) * * We want cache_get() to return a definitively usable vnode or a * definitively unresolved ncp. + * + * MPSAFE */ static struct namecache * @@ -635,21 +774,23 @@ _cache_get(struct namecache *ncp) } /* - * This is a special form of _cache_get() which only succeeds if + * This is a special form of _cache_lock() which only succeeds if * it can get a pristine, non-recursive lock. The caller must have * already ref'd the ncp. * * On success the ncp will be locked, on failure it will not. The * ref count does not change either way. * - * We want _cache_get_nonblock() (on success) to return a definitively + * We want _cache_lock_special() (on success) to return a definitively * usable vnode or a definitively unresolved ncp. + * + * MPSAFE */ static int -_cache_get_nonblock(struct namecache *ncp) +_cache_lock_special(struct namecache *ncp) { if (_cache_lock_nonblock(ncp) == 0) { - if (ncp->nc_exlocks == 1) { + if ((ncp->nc_exlocks & ~NC_EXLOCK_REQ) == 1) { if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED)) _cache_setunresolved(ncp); return(0); @@ -662,6 +803,8 @@ _cache_get_nonblock(struct namecache *ncp) /* * NOTE: The same nchandle can be passed for both arguments. + * + * MPSAFE */ void cache_get(struct nchandle *nch, struct nchandle *target) @@ -672,18 +815,9 @@ cache_get(struct nchandle *nch, struct nchandle *target) atomic_add_int(&target->mount->mnt_refs, 1); } -#if 0 -int -cache_get_nonblock(struct nchandle *nch) -{ - int error; - - if ((error = _cache_get_nonblock(nch->ncp)) == 0) - atomic_add_int(&nch->mount->mnt_refs, 1); - return (error); -} -#endif - +/* + * MPSAFE + */ static __inline void _cache_put(struct namecache *ncp) @@ -692,6 +826,9 @@ _cache_put(struct namecache *ncp) _cache_drop(ncp); } +/* + * MPSAFE + */ void cache_put(struct nchandle *nch) { @@ -706,12 +843,15 @@ cache_put(struct nchandle *nch) * vnode is NULL, a negative cache entry is created. * * The ncp should be locked on entry and will remain locked on return. + * + * MPSAFE */ static void _cache_setvp(struct mount *mp, struct namecache *ncp, struct vnode *vp) { KKASSERT(ncp->nc_flag & NCF_UNRESOLVED); + if (vp != NULL) { /* * Any vp associated with an ncp which has children must @@ -752,7 +892,6 @@ _cache_setvp(struct mount *mp, struct namecache *ncp, struct vnode *vp) */ ncp->nc_vp = NULL; spin_lock_wr(&ncspin); - lwkt_token_init(&vfs_token); TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode); ++numneg; spin_unlock_wr(&ncspin); @@ -763,12 +902,18 @@ _cache_setvp(struct mount *mp, struct namecache *ncp, struct vnode *vp) ncp->nc_flag &= ~NCF_UNRESOLVED; } +/* + * MPSAFE + */ void cache_setvp(struct nchandle *nch, struct vnode *vp) { _cache_setvp(nch->mount, nch->ncp, vp); } +/* + * MPSAFE + */ void cache_settimeout(struct nchandle *nch, int nticks) { @@ -791,6 +936,8 @@ cache_settimeout(struct nchandle *nch, int nticks) * avoid complex namespace operations. This disconnects a directory vnode * from its namecache and can cause the OLDAPI and NEWAPI to get out of * sync. + * + * MPSAFE */ static void @@ -835,6 +982,8 @@ _cache_setunresolved(struct namecache *ncp) * set a resolved cache element to unresolved if it has timed out * or if it is a negative cache hit and the mount point namecache_gen * has changed. + * + * MPSAFE */ static __inline void _cache_auto_unresolve(struct mount *mp, struct namecache *ncp) @@ -868,6 +1017,9 @@ _cache_auto_unresolve(struct mount *mp, struct namecache *ncp) } } +/* + * MPSAFE + */ void cache_setunresolved(struct nchandle *nch) { @@ -879,6 +1031,8 @@ cache_setunresolved(struct nchandle *nch) * looking for matches. This flag tells the lookup code when it must * check for a mount linkage and also prevents the directories in question * from being deleted or renamed. + * + * MPSAFE */ static int @@ -893,6 +1047,9 @@ cache_clrmountpt_callback(struct mount *mp, void *data) return(0); } +/* + * MPSAFE + */ void cache_clrmountpt(struct nchandle *nch) { @@ -908,7 +1065,10 @@ cache_clrmountpt(struct nchandle *nch) * Invalidate portions of the namecache topology given a starting entry. * The passed ncp is set to an unresolved state and: * - * The passed ncp must be locked. + * The passed ncp must be referencxed and locked. The routine may unlock + * and relock ncp several times, and will recheck the children and loop + * to catch races. When done the passed ncp will be returned with the + * reference and lock intact. * * CINV_DESTROY - Set a flag in the passed ncp entry indicating * that the physical underlying nodes have been @@ -931,7 +1091,9 @@ cache_clrmountpt(struct nchandle *nch) * cleaning out any unreferenced nodes in the topology * from the leaves up as the recursion backs out. * - * Note that the topology for any referenced nodes remains intact. + * Note that the topology for any referenced nodes remains intact, but + * the nodes will be marked as having been destroyed and will be set + * to an unresolved state. * * It is possible for cache_inval() to race a cache_resolve(), meaning that * the namecache entry may not actually be invalidated on return if it was @@ -950,6 +1112,8 @@ cache_clrmountpt(struct nchandle *nch) * node using a depth-first algorithm in order to allow multiple deep * recursions to chain through each other, then we restart the invalidation * from scratch. + * + * MPSAFE */ struct cinvtrack { @@ -995,18 +1159,21 @@ cache_inval(struct nchandle *nch, int flags) return(_cache_inval(nch->ncp, flags)); } +/* + * Helper for _cache_inval(). The passed ncp is refd and locked and + * remains that way on return, but may be unlocked/relocked multiple + * times by the routine. + */ static int _cache_inval_internal(struct namecache *ncp, int flags, struct cinvtrack *track) { struct namecache *kid; struct namecache *nextkid; - lwkt_tokref nlock; int rcnt = 0; KKASSERT(ncp->nc_exlocks); _cache_setunresolved(ncp); - lwkt_gettoken(&nlock, &vfs_token); if (flags & CINV_DESTROY) ncp->nc_flag |= NCF_DESTROYED; if ((flags & CINV_CHILDREN) && @@ -1039,7 +1206,6 @@ _cache_inval_internal(struct namecache *ncp, int flags, struct cinvtrack *track) --track->depth; _cache_lock(ncp); } - lwkt_reltoken(&nlock); /* * Someone could have gotten in there while ncp was unlocked, @@ -1065,6 +1231,8 @@ _cache_inval_internal(struct namecache *ncp, int flags, struct cinvtrack *track) * * In addition, the v_namecache list itself must be locked via * the vnode's spinlock. + * + * MPSAFE */ int cache_inval_vp(struct vnode *vp, int flags) @@ -1094,13 +1262,14 @@ restart: _cache_inval(ncp, flags); _cache_put(ncp); /* also releases reference */ ncp = next; + spin_lock_wr(&vp->v_spinlock); if (ncp && ncp->nc_vp != vp) { + spin_unlock_wr(&vp->v_spinlock); kprintf("Warning: cache_inval_vp: race-B detected on " "%s\n", ncp->nc_name); _cache_drop(ncp); goto restart; } - spin_lock_wr(&vp->v_spinlock); } spin_unlock_wr(&vp->v_spinlock); return(TAILQ_FIRST(&vp->v_namecache) != NULL); @@ -1112,6 +1281,8 @@ restart: * * Return 0 on success, non-zero if not all namecache records could be * disassociated from the vnode (for various reasons). + * + * MPSAFE */ int cache_inval_vp_nonblock(struct vnode *vp) @@ -1132,7 +1303,7 @@ cache_inval_vp_nonblock(struct vnode *vp) _cache_drop(ncp); if (next) _cache_drop(next); - break; + goto done; } if (ncp->nc_vp != vp) { kprintf("Warning: cache_inval_vp: race-A detected on " @@ -1140,20 +1311,22 @@ cache_inval_vp_nonblock(struct vnode *vp) _cache_put(ncp); if (next) _cache_drop(next); - break; + goto done; } _cache_inval(ncp, 0); _cache_put(ncp); /* also releases reference */ ncp = next; + spin_lock_wr(&vp->v_spinlock); if (ncp && ncp->nc_vp != vp) { + spin_unlock_wr(&vp->v_spinlock); kprintf("Warning: cache_inval_vp: race-B detected on " "%s\n", ncp->nc_name); _cache_drop(ncp); - break; + goto done; } - spin_lock_wr(&vp->v_spinlock); } spin_unlock_wr(&vp->v_spinlock); +done: return(TAILQ_FIRST(&vp->v_namecache) != NULL); } @@ -1165,30 +1338,50 @@ cache_inval_vp_nonblock(struct vnode *vp) * Because there may be references to the source ncp we cannot copy its * contents to the target. Instead the source ncp is relinked as the target * and the target ncp is removed from the namecache topology. + * + * MPSAFE */ void cache_rename(struct nchandle *fnch, struct nchandle *tnch) { struct namecache *fncp = fnch->ncp; struct namecache *tncp = tnch->ncp; + struct namecache *tncp_par; + struct nchash_head *nchpp; + u_int32_t hash; char *oname; - lwkt_tokref nlock; - lwkt_gettoken(&nlock, &vfs_token); - _cache_setunresolved(tncp); + /* + * Rename fncp (unlink) + */ _cache_unlink_parent(fncp); - _cache_link_parent(fncp, tncp->nc_parent); - _cache_unlink_parent(tncp); oname = fncp->nc_name; fncp->nc_name = tncp->nc_name; fncp->nc_nlen = tncp->nc_nlen; + tncp_par = tncp->nc_parent; + _cache_hold(tncp_par); + _cache_lock(tncp_par); + + /* + * Rename fncp (relink) + */ + hash = fnv_32_buf(fncp->nc_name, fncp->nc_nlen, FNV1_32_INIT); + hash = fnv_32_buf(&tncp_par, sizeof(tncp_par), hash); + nchpp = NCHHASH(hash); + + spin_lock_wr(&nchpp->spin); + _cache_link_parent(fncp, tncp_par, nchpp); + spin_unlock_wr(&nchpp->spin); + + _cache_put(tncp_par); + + /* + * Get rid of the overwritten tncp (unlink) + */ + _cache_setunresolved(tncp); + _cache_unlink_parent(tncp); tncp->nc_name = NULL; tncp->nc_nlen = 0; - if (fncp->nc_head) - _cache_rehash(fncp); - if (tncp->nc_head) - _cache_rehash(tncp); - lwkt_reltoken(&nlock); if (oname) kfree(oname, M_VFSCACHE); @@ -1196,9 +1389,7 @@ cache_rename(struct nchandle *fnch, struct nchandle *tnch) /* * vget the vnode associated with the namecache entry. Resolve the namecache - * entry if necessary and deal with namecache/vp races. The passed ncp must - * be referenced and may be locked. The ncp's ref/locking state is not - * effected by this call. + * entry if necessary. The passed ncp must be referenced and locked. * * lk_type may be LK_SHARED, LK_EXCLUSIVE. A ref'd, possibly locked * (depending on the passed lk_type) will be returned in *vpp with an error @@ -1207,10 +1398,17 @@ cache_rename(struct nchandle *fnch, struct nchandle *tnch) * cache hit and there is no vnode to retrieve, but other errors can occur * too. * - * The main race we have to deal with are namecache zaps. The ncp itself - * will not disappear since it is referenced, and it turns out that the - * validity of the vp pointer can be checked simply by rechecking the - * contents of ncp->nc_vp. + * The vget() can race a reclaim. If this occurs we re-resolve the + * namecache entry. + * + * There are numerous places in the kernel where vget() is called on a + * vnode while one or more of its namecache entries is locked. Releasing + * a vnode never deadlocks against locked namecache entries (the vnode + * will not get recycled while referenced ncp's exist). This means we + * can safely acquire the vnode. In fact, we MUST NOT release the ncp + * lock when acquiring the vp lock or we might cause a deadlock. + * + * MPSAFE */ int cache_vget(struct nchandle *nch, struct ucred *cred, @@ -1221,38 +1419,36 @@ cache_vget(struct nchandle *nch, struct ucred *cred, int error; ncp = nch->ncp; + KKASSERT(ncp->nc_locktd == curthread); again: vp = NULL; - if (ncp->nc_flag & NCF_UNRESOLVED) { - _cache_lock(ncp); + if (ncp->nc_flag & NCF_UNRESOLVED) error = cache_resolve(nch, cred); - _cache_unlock(ncp); - } else { + else error = 0; - } + if (error == 0 && (vp = ncp->nc_vp) != NULL) { - /* - * Accessing the vnode from the namecache is a bit - * dangerous. Because there are no refs on the vnode, it - * could be in the middle of a reclaim. - */ - if (vp->v_flag & VRECLAIMED) { - kprintf("Warning: vnode reclaim race detected in cache_vget on %p (%s)\n", vp, ncp->nc_name); - _cache_lock(ncp); - _cache_setunresolved(ncp); - _cache_unlock(ncp); - goto again; - } error = vget(vp, lk_type); if (error) { - if (vp != ncp->nc_vp) + /* + * VRECLAIM race + */ + if (error == ENOENT) { + kprintf("Warning: vnode reclaim race detected " + "in cache_vget on %p (%s)\n", + vp, ncp->nc_name); + _cache_setunresolved(ncp); goto again; + } + + /* + * Not a reclaim race, some other error. + */ + KKASSERT(ncp->nc_vp == vp); vp = NULL; - } else if (vp != ncp->nc_vp) { - vput(vp); - goto again; - } else if (vp->v_flag & VRECLAIMED) { - panic("vget succeeded on a VRECLAIMED node! vp %p", vp); + } else { + KKASSERT(ncp->nc_vp == vp); + KKASSERT((vp->v_flag & VRECLAIMED) == 0); } } if (error == 0 && vp == NULL) @@ -1269,35 +1465,36 @@ cache_vref(struct nchandle *nch, struct ucred *cred, struct vnode **vpp) int error; ncp = nch->ncp; - + KKASSERT(ncp->nc_locktd == curthread); again: vp = NULL; - if (ncp->nc_flag & NCF_UNRESOLVED) { - _cache_lock(ncp); + if (ncp->nc_flag & NCF_UNRESOLVED) error = cache_resolve(nch, cred); - _cache_unlock(ncp); - } else { + else error = 0; - } + if (error == 0 && (vp = ncp->nc_vp) != NULL) { - /* - * Since we did not obtain any locks, a cache zap - * race can occur here if the vnode is in the middle - * of being reclaimed and has not yet been able to - * clean out its cache node. If that case occurs, - * we must lock and unresolve the cache, then loop - * to retry. - */ - if ((error = vget(vp, LK_SHARED)) != 0) { + error = vget(vp, LK_SHARED); + if (error) { + /* + * VRECLAIM race + */ if (error == ENOENT) { - kprintf("Warning: vnode reclaim race detected on cache_vref %p (%s)\n", vp, ncp->nc_name); - _cache_lock(ncp); + kprintf("Warning: vnode reclaim race detected " + "in cache_vget on %p (%s)\n", + vp, ncp->nc_name); _cache_setunresolved(ncp); - _cache_unlock(ncp); goto again; } - /* fatal error */ + + /* + * Not a reclaim race, some other error. + */ + KKASSERT(ncp->nc_vp == vp); + vp = NULL; } else { + KKASSERT(ncp->nc_vp == vp); + KKASSERT((vp->v_flag & VRECLAIMED) == 0); /* caller does not want a lock */ vn_unlock(vp); } @@ -1320,6 +1517,9 @@ again: * We have to leave par unlocked when vget()ing dvp to avoid a deadlock, * so use vhold()/vdrop() while holding the lock to prevent dvp from * getting destroyed. + * + * MPSAFE - Note vhold() is allowed when dvp has 0 refs if we hold a + * lock on the ncp in question.. */ static struct vnode * cache_dvpref(struct namecache *ncp) @@ -1330,21 +1530,20 @@ cache_dvpref(struct namecache *ncp) dvp = NULL; if ((par = ncp->nc_parent) != NULL) { _cache_hold(par); - if (_cache_lock_nonblock(par) == 0) { - if ((par->nc_flag & NCF_UNRESOLVED) == 0) { - if ((dvp = par->nc_vp) != NULL) - vhold(dvp); - } - _cache_unlock(par); - if (dvp) { - if (vget(dvp, LK_SHARED) == 0) { - vn_unlock(dvp); - vdrop(dvp); - /* return refd, unlocked dvp */ - } else { - vdrop(dvp); - dvp = NULL; - } + _cache_lock(par); + if ((par->nc_flag & NCF_UNRESOLVED) == 0) { + if ((dvp = par->nc_vp) != NULL) + vhold(dvp); + } + _cache_unlock(par); + if (dvp) { + if (vget(dvp, LK_SHARED) == 0) { + vn_unlock(dvp); + vdrop(dvp); + /* return refd, unlocked dvp */ + } else { + vdrop(dvp); + dvp = NULL; } } _cache_drop(par); @@ -1637,7 +1836,10 @@ cache_inefficient_scan(struct nchandle *nch, struct ucred *cred, vat.va_blocksize = 0; if ((error = VOP_GETATTR(dvp, &vat)) != 0) return (error); - if ((error = cache_vref(nch, cred, &pvp)) != 0) + cache_lock(nch); + error = cache_vref(nch, cred, &pvp); + cache_unlock(nch); + if (error) return (error); if (ncvp_debug) { kprintf("inefficient_scan: directory iosize %ld " @@ -1771,9 +1973,7 @@ static struct namecache * cache_zap(struct namecache *ncp) { struct namecache *par; - struct spinlock *hspin; struct vnode *dropvp; - lwkt_tokref nlock; int refs; /* @@ -1796,11 +1996,10 @@ cache_zap(struct namecache *ncp) /* * Acquire locks */ - lwkt_gettoken(&nlock, &vfs_token); - hspin = NULL; - if (ncp->nc_head) { - hspin = &ncp->nc_head->spin; - spin_lock_wr(hspin); + if ((par = ncp->nc_parent) != NULL) { + _cache_hold(par); + _cache_lock(par); + spin_lock_wr(&ncp->nc_head->spin); } /* @@ -1814,12 +2013,14 @@ cache_zap(struct namecache *ncp) if (refs == 1 && TAILQ_EMPTY(&ncp->nc_list)) break; if (atomic_cmpset_int(&ncp->nc_refs, refs, refs - 1)) { - if (hspin) - spin_unlock_wr(hspin); - lwkt_reltoken(&nlock); + if (par) { + spin_unlock_wr(&ncp->nc_head->spin); + _cache_put(par); + } _cache_unlock(ncp); return(NULL); } + cpu_pause(); } /* @@ -1830,27 +2031,27 @@ cache_zap(struct namecache *ncp) * drop a ref on the parent's vp if the parent's list becomes * empty. */ - if (ncp->nc_head) { - LIST_REMOVE(ncp, nc_hash); - ncp->nc_head = NULL; - } dropvp = NULL; - if ((par = ncp->nc_parent) != NULL) { - par = _cache_hold(par); - TAILQ_REMOVE(&par->nc_list, ncp, nc_entry); - ncp->nc_parent = NULL; + if (par) { + struct nchash_head *nchpp = ncp->nc_head; + KKASSERT(nchpp != NULL); + LIST_REMOVE(ncp, nc_hash); + TAILQ_REMOVE(&par->nc_list, ncp, nc_entry); if (par->nc_vp && TAILQ_EMPTY(&par->nc_list)) dropvp = par->nc_vp; + ncp->nc_head = NULL; + ncp->nc_parent = NULL; + spin_unlock_wr(&nchpp->spin); + _cache_unlock(par); + } else { + KKASSERT(ncp->nc_head == NULL); } /* * ncp should not have picked up any refs. Physically * destroy the ncp. */ - if (hspin) - spin_unlock_wr(hspin); - lwkt_reltoken(&nlock); KKASSERT(ncp->nc_refs == 1); atomic_add_int(&numunres, -1); /* _cache_unlock(ncp) not required */ @@ -1902,13 +2103,16 @@ _cache_hysteresis(void) /* * NEW NAMECACHE LOOKUP API * - * Lookup an entry in the cache. A locked, referenced, non-NULL - * entry is *always* returned, even if the supplied component is illegal. + * Lookup an entry in the namecache. The passed par_nch must be referenced + * and unlocked. A referenced and locked nchandle with a non-NULL nch.ncp + * is ALWAYS returned, eve if the supplied component is illegal. + * * The resulting namecache entry should be returned to the system with - * cache_put() or _cache_unlock() + cache_drop(). + * cache_put() or cache_unlock() + cache_drop(). * * namecache locks are recursive but care must be taken to avoid lock order - * reversals. + * reversals (hence why the passed par_nch must be unlocked). Locking + * rules are to order for parent traversals, not for child traversals. * * Nobody else will be able to manipulate the associated namespace (e.g. * create, delete, rename, rename-target) until the caller unlocks the @@ -1940,11 +2144,18 @@ cache_nlookup(struct nchandle *par_nch, struct nlcomponent *nlc) struct mount *mp; u_int32_t hash; globaldata_t gd; - lwkt_tokref nlock; + int par_locked; numcalls++; gd = mycpu; mp = par_nch->mount; + par_locked = 0; + + /* + * This is a good time to call it, no ncp's are locked by + * the caller or us. + */ + _cache_hysteresis(); /* * Try to locate an existing entry @@ -1970,7 +2181,11 @@ restart: ) { _cache_hold(ncp); spin_unlock_wr(&nchpp->spin); - if (_cache_get_nonblock(ncp) == 0) { + if (par_locked) { + _cache_unlock(par_nch->ncp); + par_locked = 0; + } + if (_cache_lock_special(ncp) == 0) { _cache_auto_unresolve(mp, ncp); if (new_ncp) _cache_free(new_ncp); @@ -1982,39 +2197,44 @@ restart: goto restart; } } - spin_unlock_wr(&nchpp->spin); /* * We failed to locate an entry, create a new entry and add it to - * the cache. We have to relookup after possibly blocking in - * malloc. + * the cache. The parent ncp must also be locked so we + * can link into it. + * + * We have to relookup after possibly blocking in kmalloc or + * when locking par_nch. + * + * NOTE: nlc_namelen can be 0 and nlc_nameptr NULL as a special + * mount case, in which case nc_name will be NULL. */ if (new_ncp == NULL) { + spin_unlock_wr(&nchpp->spin); new_ncp = cache_alloc(nlc->nlc_namelen); + if (nlc->nlc_namelen) { + bcopy(nlc->nlc_nameptr, new_ncp->nc_name, + nlc->nlc_namelen); + new_ncp->nc_name[nlc->nlc_namelen] = 0; + } + goto restart; + } + if (par_locked == 0) { + spin_unlock_wr(&nchpp->spin); + _cache_lock(par_nch->ncp); + par_locked = 1; goto restart; } - - ncp = new_ncp; /* - * Initialize as a new UNRESOLVED entry, lock (non-blocking), - * and link to the parent. The mount point is usually inherited - * from the parent unless this is a special case such as a mount - * point where nlc_namelen is 0. If nlc_namelen is 0 nc_name will - * be NULL. + * WARNING! We still hold the spinlock. We have to set the hash + * table entry attomically. */ - if (nlc->nlc_namelen) { - bcopy(nlc->nlc_nameptr, ncp->nc_name, nlc->nlc_namelen); - ncp->nc_name[nlc->nlc_namelen] = 0; - } - nchpp = NCHHASH(hash); /* compiler optimization */ - spin_lock_wr(&nchpp->spin); - LIST_INSERT_HEAD(&nchpp->list, ncp, nc_hash); - ncp->nc_head = nchpp; + ncp = new_ncp; + _cache_link_parent(ncp, par_nch->ncp, nchpp); spin_unlock_wr(&nchpp->spin); - lwkt_gettoken(&nlock, &vfs_token); - _cache_link_parent(ncp, par_nch->ncp); - lwkt_reltoken(&nlock); + _cache_unlock(par_nch->ncp); + /* par_locked = 0 - not used */ found: /* * stats and namecache size management @@ -2025,7 +2245,6 @@ found: ++gd->gd_nchstats->ncs_goodhits; else ++gd->gd_nchstats->ncs_neghits; - _cache_hysteresis(); nch.mount = mp; nch.ncp = ncp; atomic_add_int(&nch.mount->mnt_refs, 1); @@ -2091,10 +2310,13 @@ cache_findmount(struct nchandle *nch) * Note that successful resolution does not necessarily return an error * code of 0. If the ncp resolves to a negative cache hit then ENOENT * will be returned. + * + * MPSAFE */ int cache_resolve(struct nchandle *nch, struct ucred *cred) { + struct namecache *par_tmp; struct namecache *par; struct namecache *ncp; struct nchandle nctmp; @@ -2162,13 +2384,20 @@ restart: */ if (ncp->nc_parent->nc_flag & NCF_DESTROYED) return(ENOENT); - par = ncp->nc_parent; - while (par->nc_parent && par->nc_parent->nc_vp == NULL) - par = par->nc_parent; + _cache_hold(par); + _cache_lock(par); + while ((par_tmp = par->nc_parent) != NULL && + par_tmp->nc_vp == NULL) { + _cache_hold(par_tmp); + _cache_lock(par_tmp); + _cache_put(par); + par = par_tmp; + } if (par->nc_parent == NULL) { kprintf("EXDEV case 2 %*.*s\n", par->nc_nlen, par->nc_nlen, par->nc_name); + _cache_put(par); return (EXDEV); } kprintf("[diagnostic] cache_resolve: had to recurse on %*.*s\n", @@ -2180,7 +2409,8 @@ restart: * be one of its parents. We resolve it anyway, the loop * will handle any moves. */ - _cache_get(par); + _cache_get(par); /* additional hold/lock */ + _cache_put(par); /* from earlier hold/lock */ if (par == nch->mount->mnt_ncmountpt.ncp) { cache_resolve_mp(nch->mount); } else if ((dvp = cache_dvpref(par)) == NULL) { @@ -2324,7 +2554,7 @@ cache_cleanneg(int count) TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode); _cache_hold(ncp); spin_unlock_wr(&ncspin); - if (_cache_get_nonblock(ncp) == 0) { + if (_cache_lock_special(ncp) == 0) { ncp = cache_zap(ncp); if (ncp) _cache_drop(ncp); @@ -2336,35 +2566,6 @@ cache_cleanneg(int count) } /* - * Rehash a ncp. Rehashing is typically required if the name changes (should - * not generally occur) or the parent link changes. This function will - * unhash the ncp if the ncp is no longer hashable. - */ -static void -_cache_rehash(struct namecache *ncp) -{ - struct nchash_head *nchpp; - u_int32_t hash; - - if ((nchpp = ncp->nc_head) != NULL) { - spin_lock_wr(&nchpp->spin); - LIST_REMOVE(ncp, nc_hash); - ncp->nc_head = NULL; - spin_unlock_wr(&nchpp->spin); - } - if (ncp->nc_nlen && ncp->nc_parent) { - hash = fnv_32_buf(ncp->nc_name, ncp->nc_nlen, FNV1_32_INIT); - hash = fnv_32_buf(&ncp->nc_parent, - sizeof(ncp->nc_parent), hash); - nchpp = NCHHASH(hash); - spin_lock_wr(&nchpp->spin); - LIST_INSERT_HEAD(&nchpp->list, ncp, nc_hash); - ncp->nc_head = nchpp; - spin_unlock_wr(&nchpp->spin); - } -} - -/* * Name cache initialization, from vfsinit() when we are booting */ void @@ -2545,6 +2746,7 @@ kern_getcwd(char *buf, size_t buflen, int *error) int i, slash_prefixed; struct filedesc *fdp; struct nchandle nch; + struct namecache *ncp; numcwdcalls++; bp = buf; @@ -2554,7 +2756,11 @@ kern_getcwd(char *buf, size_t buflen, int *error) slash_prefixed = 0; nch = fdp->fd_ncdir; - while (nch.ncp && (nch.ncp != fdp->fd_nrdir.ncp || + ncp = nch.ncp; + if (ncp) + _cache_hold(ncp); + + while (ncp && (ncp != fdp->fd_nrdir.ncp || nch.mount != fdp->fd_nrdir.mount) ) { /* @@ -2562,26 +2768,32 @@ kern_getcwd(char *buf, size_t buflen, int *error) * of the current mount we have to skip to the mount point * in the underlying filesystem. */ - if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { + if (ncp == nch.mount->mnt_ncmountpt.ncp) { nch = nch.mount->mnt_ncmounton; + _cache_drop(ncp); + ncp = nch.ncp; + if (ncp) + _cache_hold(ncp); continue; } /* * Prepend the path segment */ - for (i = nch.ncp->nc_nlen - 1; i >= 0; i--) { + for (i = ncp->nc_nlen - 1; i >= 0; i--) { if (bp == buf) { numcwdfail4++; *error = ERANGE; - return(NULL); + bp = NULL; + goto done; } - *--bp = nch.ncp->nc_name[i]; + *--bp = ncp->nc_name[i]; } if (bp == buf) { numcwdfail4++; *error = ERANGE; - return(NULL); + bp = NULL; + goto done; } *--bp = '/'; slash_prefixed = 1; @@ -2590,30 +2802,47 @@ kern_getcwd(char *buf, size_t buflen, int *error) * Go up a directory. This isn't a mount point so we don't * have to check again. */ - nch.ncp = nch.ncp->nc_parent; + while ((nch.ncp = ncp->nc_parent) != NULL) { + _cache_lock(ncp); + if (nch.ncp != ncp->nc_parent) { + _cache_unlock(ncp); + continue; + } + _cache_hold(nch.ncp); + _cache_unlock(ncp); + break; + } + _cache_drop(ncp); + ncp = nch.ncp; } - if (nch.ncp == NULL) { + if (ncp == NULL) { numcwdfail2++; *error = ENOENT; - return(NULL); + bp = NULL; + goto done; } if (!slash_prefixed) { if (bp == buf) { numcwdfail4++; *error = ERANGE; - return(NULL); + bp = NULL; + goto done; } *--bp = '/'; } numcwdfound++; *error = 0; +done: + if (ncp) + _cache_drop(ncp); return (bp); } /* * Thus begins the fullpath magic. + * + * The passed nchp is referenced but not locked. */ - #undef STATNODE #define STATNODE(name) \ static u_int name; \ @@ -2631,12 +2860,12 @@ STATNODE(numfullpathfail4); STATNODE(numfullpathfound); int -cache_fullpath(struct proc *p, struct nchandle *nchp, char **retbuf, char **freebuf) +cache_fullpath(struct proc *p, struct nchandle *nchp, + char **retbuf, char **freebuf) { struct nchandle fd_nrdir; struct nchandle nch; struct namecache *ncp; - lwkt_tokref nlock; struct mount *mp; char *bp, *buf; int slash_prefixed; @@ -2644,7 +2873,6 @@ cache_fullpath(struct proc *p, struct nchandle *nchp, char **retbuf, char **free int i; atomic_add_int(&numfullpathcalls, -1); - lwkt_gettoken(&nlock, &vfs_token); *retbuf = NULL; *freebuf = NULL; @@ -2657,8 +2885,10 @@ cache_fullpath(struct proc *p, struct nchandle *nchp, char **retbuf, char **free else fd_nrdir = rootnch; slash_prefixed = 0; - cache_copy(nchp, &nch); + nch = *nchp; ncp = nch.ncp; + if (ncp) + _cache_hold(ncp); mp = nch.mount; while (ncp && (ncp != fd_nrdir.ncp || mp != fd_nrdir.mount)) { @@ -2667,9 +2897,11 @@ cache_fullpath(struct proc *p, struct nchandle *nchp, char **retbuf, char **free * of the current mount we have to skip to the mount point. */ if (ncp == mp->mnt_ncmountpt.ncp) { - cache_drop(&nch); - cache_copy(&mp->mnt_ncmounton, &nch); + nch = mp->mnt_ncmounton; + _cache_drop(ncp); ncp = nch.ncp; + if (ncp) + _cache_hold(ncp); mp = nch.mount; continue; } @@ -2677,14 +2909,14 @@ cache_fullpath(struct proc *p, struct nchandle *nchp, char **retbuf, char **free /* * Prepend the path segment */ - for (i = nch.ncp->nc_nlen - 1; i >= 0; i--) { + for (i = ncp->nc_nlen - 1; i >= 0; i--) { if (bp == buf) { numfullpathfail4++; kfree(buf, M_TEMP); error = ENOMEM; goto done; } - *--bp = nch.ncp->nc_name[i]; + *--bp = ncp->nc_name[i]; } if (bp == buf) { numfullpathfail4++; @@ -2699,14 +2931,22 @@ cache_fullpath(struct proc *p, struct nchandle *nchp, char **retbuf, char **free * Go up a directory. This isn't a mount point so we don't * have to check again. * - * We need the ncp's spinlock to safely access nc_parent. + * We can only safely access nc_parent with ncp held locked. */ - if ((nch.ncp = ncp->nc_parent) != NULL) + while ((nch.ncp = ncp->nc_parent) != NULL) { + _cache_lock(ncp); + if (nch.ncp != ncp->nc_parent) { + _cache_unlock(ncp); + continue; + } _cache_hold(nch.ncp); + _cache_unlock(ncp); + break; + } _cache_drop(ncp); ncp = nch.ncp; } - if (nch.ncp == NULL) { + if (ncp == NULL) { numfullpathfail2++; kfree(buf, M_TEMP); error = ENOENT; @@ -2727,8 +2967,8 @@ cache_fullpath(struct proc *p, struct nchandle *nchp, char **retbuf, char **free *freebuf = buf; error = 0; done: - cache_drop(&nch); - lwkt_reltoken(&nlock); + if (ncp) + _cache_drop(ncp); return(error); } diff --git a/sys/kern/vfs_conf.c b/sys/kern/vfs_conf.c index f23376d844..5c9076d14e 100644 --- a/sys/kern/vfs_conf.c +++ b/sys/kern/vfs_conf.c @@ -271,7 +271,7 @@ vfs_mountroot_devfs(void) } vfsp = vfsconf_find_by_name("devfs"); - vp->v_flag |= VMOUNT; + vsetflags(vp, VMOUNT); /* * Allocate and initialize the filesystem. @@ -318,7 +318,7 @@ vfs_mountroot_devfs(void) nch.ncp->nc_flag |= NCF_ISMOUNTPT; /* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */ - vp->v_flag &= ~VMOUNT; + vclrflags(vp, VMOUNT); mountlist_insert(mp, MNTINS_LAST); vn_unlock(vp); //checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); @@ -335,7 +335,7 @@ vfs_mountroot_devfs(void) vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); - vp->v_flag &= ~VMOUNT; + vclrflags(vp, VMOUNT); mp->mnt_vfc->vfc_refcount--; vfs_unbusy(mp); kfree(mp, M_MOUNT); diff --git a/sys/kern/vfs_lock.c b/sys/kern/vfs_lock.c index 935481f495..0c8852ac86 100644 --- a/sys/kern/vfs_lock.c +++ b/sys/kern/vfs_lock.c @@ -85,6 +85,7 @@ static struct sysref_class vnode_sysref_class = { */ static TAILQ_HEAD(freelst, vnode) vnode_free_list; static struct vnode vnode_free_mid; +static struct spinlock vfs_spin = SPINLOCK_INITIALIZER(vfs_spin); int freevnodes = 0; SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, @@ -106,14 +107,45 @@ vfs_lock_init(void) { TAILQ_INIT(&vnode_free_list); TAILQ_INSERT_HEAD(&vnode_free_list, &vnode_free_mid, v_freelist); + spin_init(&vfs_spin); } /* - * Inline helper functions. vbusy() and vfree() must be called while in a - * critical section. + * Misc functions + */ +static __inline +void +_vsetflags(struct vnode *vp, int flags) +{ + atomic_set_int(&vp->v_flag, flags); +} + +static __inline +void +_vclrflags(struct vnode *vp, int flags) +{ + atomic_clear_int(&vp->v_flag, flags); +} + +void +vsetflags(struct vnode *vp, int flags) +{ + _vsetflags(vp, flags); +} + +void +vclrflags(struct vnode *vp, int flags) +{ + _vclrflags(vp, flags); +} + +/* + * Inline helper functions. vbusy() and vfree() must be called while + * vp->v_spinlock is held. + * + * WARNING! This functions is typically called with v_spinlock held. * - * Warning: must be callable if the caller holds a read spinlock to something - * else, meaning we can't use read spinlocks here. + * MPSAFE */ static __inline void @@ -123,11 +155,18 @@ __vbusy(struct vnode *vp) if ((ulong)vp == trackvnode) kprintf("__vbusy %p %08x\n", vp, vp->v_flag); #endif + spin_lock_wr(&vfs_spin); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); freevnodes--; - vp->v_flag &= ~VFREE; + _vclrflags(vp, VFREE); + spin_unlock_wr(&vfs_spin); } +/* + * WARNING! This functions is typically called with v_spinlock held. + * + * MPSAFE + */ static __inline void __vfree(struct vnode *vp) @@ -138,6 +177,7 @@ __vfree(struct vnode *vp) print_backtrace(); } #endif + spin_lock_wr(&vfs_spin); if (vp->v_flag & VRECLAIMED) TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); else if (vp->v_flag & (VAGE0 | VAGE1)) @@ -145,9 +185,15 @@ __vfree(struct vnode *vp) else TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); freevnodes++; - vp->v_flag |= VFREE; + _vsetflags(vp, VFREE); + spin_unlock_wr(&vfs_spin); } +/* + * WARNING! This functions is typically called with v_spinlock held. + * + * MPSAFE + */ static __inline void __vfreetail(struct vnode *vp) @@ -156,9 +202,11 @@ __vfreetail(struct vnode *vp) if ((ulong)vp == trackvnode) kprintf("__vfreetail %p %08x\n", vp, vp->v_flag); #endif + spin_lock_wr(&vfs_spin); TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); freevnodes++; - vp->v_flag |= VFREE; + _vsetflags(vp, VFREE); + spin_unlock_wr(&vfs_spin); } /* @@ -167,6 +215,10 @@ __vfreetail(struct vnode *vp) * * This routine is only valid if the vnode is already either VFREE or * VCACHED, or if it can become VFREE or VCACHED via vnode_terminate(). + * + * WARNING! This functions is typically called with v_spinlock held. + * + * MPSAFE */ static __inline boolean_t vshouldfree(struct vnode *vp) @@ -178,6 +230,8 @@ vshouldfree(struct vnode *vp) /* * Add a ref to an active vnode. This function should never be called * with an inactive vnode (use vget() instead). + * + * MPSAFE */ void vref(struct vnode *vp) @@ -224,16 +278,20 @@ vhold(struct vnode *vp) * * vdrop needs to check for a VCACHE->VFREE transition to catch cases * where a vnode is held past its reclamation. + * + * MPSAFE */ void vdrop(struct vnode *vp) { KKASSERT(vp->v_sysref.refcnt != 0 && vp->v_auxrefs > 0); + spin_lock_wr(&vp->v_spinlock); atomic_subtract_int(&vp->v_auxrefs, 1); if ((vp->v_flag & VCACHED) && vshouldfree(vp)) { - vp->v_flag &= ~VCACHED; + _vclrflags(vp, VCACHED); __vfree(vp); } + spin_unlock_wr(&vp->v_spinlock); } /* @@ -247,6 +305,8 @@ vdrop(struct vnode *vp) * v_auxrefs, we must interlock auxiliary references against termination * via the VX lock mechanism. It is possible for a vnode to be reactivated * while we were blocked on the lock. + * + * MPSAFE */ void vnode_terminate(struct vnode *vp) @@ -274,15 +334,17 @@ vnode_terminate(struct vnode *vp) * or dirty pages in its cached VM object still present. */ if ((vp->v_flag & VINACTIVE) == 0) { - vp->v_flag |= VINACTIVE; + _vsetflags(vp, VINACTIVE); if (vp->v_mount) VOP_INACTIVE(vp); } + spin_lock_wr(&vp->v_spinlock); KKASSERT((vp->v_flag & (VFREE|VCACHED)) == 0); if (vshouldfree(vp)) __vfree(vp); else - vp->v_flag |= VCACHED; /* inactive but not yet free */ + _vsetflags(vp, VCACHED); /* inactive but not yet free*/ + spin_unlock_wr(&vp->v_spinlock); vx_unlock(vp); } else { /* @@ -299,6 +361,8 @@ vnode_terminate(struct vnode *vp) * Physical vnode constructor / destructor. These are only executed on * the backend of the objcache. They are NOT executed on every vnode * allocation or deallocation. + * + * MPSAFE */ boolean_t vnode_ctor(void *obj, void *private, int ocflags) @@ -315,6 +379,9 @@ vnode_ctor(void *obj, void *private, int ocflags) return(TRUE); } +/* + * MPSAFE + */ void vnode_dtor(void *obj, void *private) { @@ -330,8 +397,9 @@ vnode_dtor(void *obj, void *private) * These functions lock vnodes for reclamation and deactivation related * activities. The caller must already be holding some sort of reference * on the vnode. + * + * MPSAFE */ - void vx_lock(struct vnode *vp) { @@ -361,6 +429,9 @@ vx_unlock(struct vnode *vp) * These functions are MANDATORY for any code chain accessing a vnode * whos activation state is not known. * + * vget() can be called with LK_NOWAIT and will return EBUSY if the + * lock cannot be immediately acquired. + * * vget()/vput() are used when reactivation is desired. * * vx_get() and vx_put() are used when reactivation is not desired. @@ -383,7 +454,6 @@ vget(struct vnode *vp, int flags) * transitions and refs during termination are allowed here so * call sysref directly. */ - sysref_get(&vp->v_sysref); if ((error = vn_lock(vp, flags)) != 0) { /* @@ -408,23 +478,34 @@ vget(struct vnode *vp, int flags) * sysref that was earmarking those cases and preventing * the vnode from being destroyed. Our sysref is still held. */ + spin_lock_wr(&vp->v_spinlock); if (vp->v_flag & VFREE) { __vbusy(vp); + spin_unlock_wr(&vp->v_spinlock); sysref_put(&vp->v_sysref); sysref_activate(&vp->v_sysref); } else if (vp->v_flag & VCACHED) { - vp->v_flag &= ~VCACHED; + _vclrflags(vp, VCACHED); + spin_unlock_wr(&vp->v_spinlock); sysref_put(&vp->v_sysref); sysref_activate(&vp->v_sysref); } else { - KKASSERT(sysref_isactive(&vp->v_sysref)); + spin_unlock_wr(&vp->v_spinlock); + if (sysref_isinactive(&vp->v_sysref)) { + sysref_activate(&vp->v_sysref); + kprintf("Warning vp %p reactivation race\n", + vp); + } } - vp->v_flag &= ~VINACTIVE; + _vclrflags(vp, VINACTIVE); error = 0; } return(error); } +/* + * MPSAFE + */ void vput(struct vnode *vp) { @@ -434,6 +515,8 @@ vput(struct vnode *vp) /* * XXX The vx_*() locks should use auxrefs, not the main reference counter. + * + * MPSAFE */ void vx_get(struct vnode *vp) @@ -442,6 +525,9 @@ vx_get(struct vnode *vp) lockmgr(&vp->v_lock, LK_EXCLUSIVE); } +/* + * MPSAFE + */ int vx_get_nonblock(struct vnode *vp) { @@ -459,41 +545,27 @@ vx_get_nonblock(struct vnode *vp) * * vx_put needs to check for a VCACHE->VFREE transition to catch the * case where e.g. vnlru issues a vgone*(). + * + * MPSAFE */ void vx_put(struct vnode *vp) { + spin_lock_wr(&vp->v_spinlock); if ((vp->v_flag & VCACHED) && vshouldfree(vp)) { - vp->v_flag &= ~VCACHED; + _vclrflags(vp, VCACHED); __vfree(vp); } + spin_unlock_wr(&vp->v_spinlock); lockmgr(&vp->v_lock, LK_RELEASE); sysref_put(&vp->v_sysref); } /* - * Misc functions - */ - -void -vsetflags(struct vnode *vp, int flags) -{ - crit_enter(); - vp->v_flag |= flags; - crit_exit(); -} - -void -vclrflags(struct vnode *vp, int flags) -{ - crit_enter(); - vp->v_flag &= ~flags; - crit_exit(); -} - -/* * Try to reuse a vnode from the free list. NOTE: The returned vnode * is not completely initialized. + * + * MPSAFE */ static struct vnode * @@ -513,6 +585,7 @@ allocfreevnode(void) * * XXX NOT MP SAFE */ + spin_lock_wr(&vfs_spin); vp = TAILQ_FIRST(&vnode_free_list); if (vp == &vnode_free_mid) vp = TAILQ_NEXT(vp, v_freelist); @@ -521,8 +594,10 @@ allocfreevnode(void) TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + spin_unlock_wr(&vfs_spin); continue; } + spin_unlock_wr(&vfs_spin); #ifdef TRACKVNODE if ((ulong)vp == trackvnode) kprintf("allocfreevnode %p %08x\n", vp, vp->v_flag); @@ -595,6 +670,8 @@ allocfreevnode(void) * All new vnodes set the VAGE flags. An open() of the vnode will * decrement the (2-bit) flags. Vnodes which are opened several times * are thus retained in the cache over vnodes which are merely stat()d. + * + * MPSAFE */ struct vnode * allocvnode(int lktimeout, int lkflags) @@ -689,6 +766,9 @@ allocvnode(int lktimeout, int lkflags) return (vp); } +/* + * MPSAFE + */ int freesomevnodes(int n) { diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index 63e254cd3b..310a263397 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -938,6 +938,8 @@ SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &vnlru_kp) /* * Move a vnode from one mount queue to another. + * + * MPSAFE */ void insmntque(struct vnode *vp, struct mount *mp) diff --git a/sys/kern/vfs_nlookup.c b/sys/kern/vfs_nlookup.c index 142169fc7a..385b5e1488 100644 --- a/sys/kern/vfs_nlookup.c +++ b/sys/kern/vfs_nlookup.c @@ -257,7 +257,7 @@ nlookup_done(struct nlookupdata *nd) nd->nl_flags &= ~NLC_NCPISLOCKED; cache_unlock(&nd->nl_nch); } - cache_drop(&nd->nl_nch); + cache_drop(&nd->nl_nch); /* NULL's out the nch */ } if (nd->nl_rootnch.ncp) cache_drop(&nd->nl_rootnch); @@ -301,7 +301,7 @@ nlookup_done_at(struct nlookupdata *nd, struct file *fp) void nlookup_zero(struct nlookupdata *nd) { - bzero(nd, sizeof(struct nlookupdata)); + bzero(nd, sizeof(struct nlookupdata)); } /* @@ -380,17 +380,12 @@ nlookup(struct nlookupdata *nd) bzero(&nlc, sizeof(nlc)); /* - * Setup for the loop. The current working namecache element must - * be in a refd + unlocked state. This typically the case on entry except - * when stringing nlookup()'s along in a chain, since nlookup() always - * returns nl_nch in a locked state. + * Setup for the loop. The current working namecache element is + * always at least referenced. We lock it as required, but always + * return a locked, resolved namecache entry. */ nd->nl_loopcnt = 0; - if (nd->nl_flags & NLC_NCPISLOCKED) { - nd->nl_flags &= ~NLC_NCPISLOCKED; - cache_unlock(&nd->nl_nch); - } - if (nd->nl_dvp ) { + if (nd->nl_dvp) { vrele(nd->nl_dvp); nd->nl_dvp = NULL; } @@ -402,6 +397,15 @@ nlookup(struct nlookupdata *nd) */ for (;;) { /* + * Make sure nl_nch is locked so we can access the vnode, resolution + * state, etc. + */ + if ((nd->nl_flags & NLC_NCPISLOCKED) == 0) { + nd->nl_flags |= NLC_NCPISLOCKED; + cache_lock(&nd->nl_nch); + } + + /* * Check if the root directory should replace the current * directory. This is done at the start of a translation * or after a symbolic link has been found. In other cases @@ -411,9 +415,9 @@ nlookup(struct nlookupdata *nd) do { ++ptr; } while (*ptr == '/'); - cache_copy(&nd->nl_rootnch, &nch); - cache_drop(&nd->nl_nch); - nd->nl_nch = nch; + cache_get(&nd->nl_rootnch, &nch); + cache_put(&nd->nl_nch); + nd->nl_nch = nch; /* remains locked */ /* * Fast-track termination. There is no parent directory of @@ -422,13 +426,10 @@ nlookup(struct nlookupdata *nd) * e.g. 'rmdir /' is not allowed. */ if (*ptr == 0) { - if (nd->nl_flags & NLC_REFDVP) { + if (nd->nl_flags & NLC_REFDVP) error = EPERM; - } else { - cache_lock(&nd->nl_nch); - nd->nl_flags |= NLC_NCPISLOCKED; + else error = 0; - } break; } continue; @@ -498,12 +499,17 @@ nlookup(struct nlookupdata *nd) nctmp = nctmp.mount->mnt_ncmounton; nctmp.ncp = nctmp.ncp->nc_parent; KKASSERT(nctmp.ncp != NULL); - cache_copy(&nctmp, &nch); /* XXX hack */ - cache_get(&nch, &nch); + cache_hold(&nctmp); + cache_get(&nctmp, &nch); cache_drop(&nctmp); /* NOTE: zero's nctmp */ } wasdotordotdot = 2; } else { + /* + * Must unlock nl_nch when traversing down the path. + */ + cache_unlock(&nd->nl_nch); + nd->nl_flags &= ~NLC_NCPISLOCKED; nch = cache_nlookup(&nd->nl_nch, &nlc); while ((error = cache_resolve(&nch, nd->nl_cred)) == EAGAIN) { kprintf("[diagnostic] nlookup: relookup %*.*s\n", @@ -526,14 +532,24 @@ nlookup(struct nlookupdata *nd) if ((par.ncp = nch.ncp->nc_parent) != NULL) { par.mount = nch.mount; cache_hold(&par); - dflags = 0; + cache_lock(&par); error = naccess(&par, 0, nd->nl_cred, &dflags); - cache_drop(&par); + cache_put(&par); } } + if (nd->nl_flags & NLC_NCPISLOCKED) { + cache_unlock(&nd->nl_nch); + nd->nl_flags &= ~NLC_NCPISLOCKED; + } /* - * [end of subsection] ncp is locked and ref'd. nd->nl_nch is ref'd + * [end of subsection] + * + * nch is locked and referenced. + * nd->nl_nch is unlocked and referenced. + * + * nl_nch must be unlocked or we could chain lock to the root + * if a resolve gets stuck (e.g. in NFS). */ /* @@ -693,6 +709,7 @@ nlookup(struct nlookupdata *nd) if (*ptr && (nch.ncp->nc_flag & NCF_ISDIR)) { cache_drop(&nd->nl_nch); cache_unlock(&nch); + KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); nd->nl_nch = nch; continue; } @@ -733,7 +750,9 @@ nlookup(struct nlookupdata *nd) * If NLC_REFDVP is set acquire a referenced parent dvp. */ if (nd->nl_flags & NLC_REFDVP) { + cache_lock(&nd->nl_nch); error = cache_vref(&nd->nl_nch, nd->nl_cred, &nd->nl_dvp); + cache_unlock(&nd->nl_nch); if (error) { kprintf("NLC_REFDVP: Cannot ref dvp of %p\n", nch.ncp); cache_put(&nch); @@ -858,9 +877,7 @@ fail: * The directory sticky bit is tested for NLC_DELETE and NLC_RENAME_DST, * the latter is only tested if the target exists. * - * The passed ncp may or may not be locked. The caller should use a - * locked ncp on leaf lookups, especially for NLC_CREATE, NLC_RENAME_DST, - * NLC_DELETE, and NLC_EXCL checks. + * The passed ncp must be referenced and locked. */ int naccess(struct nchandle *nch, int nflags, struct ucred *cred, int *nflagsp) @@ -870,16 +887,17 @@ naccess(struct nchandle *nch, int nflags, struct ucred *cred, int *nflagsp) int error; int sticky; + ASSERT_NCH_LOCKED(nch); if (nch->ncp->nc_flag & NCF_UNRESOLVED) { - cache_lock(nch); cache_resolve(nch, cred); - cache_unlock(nch); } error = nch->ncp->nc_error; /* * Directory permissions checks. Silently ignore ENOENT if these * tests pass. It isn't an error. + * + * We have to lock nch.ncp to safely resolve nch.ncp->nc_parent */ if (nflags & (NLC_CREATE | NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) { if (((nflags & NLC_CREATE) && nch->ncp->nc_vp == NULL) || @@ -887,21 +905,19 @@ naccess(struct nchandle *nch, int nflags, struct ucred *cred, int *nflagsp) ((nflags & NLC_RENAME_SRC) && nch->ncp->nc_vp != NULL) || (nflags & NLC_RENAME_DST) ) { - lwkt_tokref nlock; struct nchandle par; - lwkt_gettoken(&nlock, &vfs_token); if ((par.ncp = nch->ncp->nc_parent) == NULL) { if (error != EAGAIN) error = EINVAL; } else if (error == 0 || error == ENOENT) { par.mount = nch->mount; - cache_hold(&par); sticky = 0; + cache_hold(&par); + cache_lock(&par); error = naccess(&par, NLC_WRITE, cred, NULL); - cache_drop(&par); + cache_put(&par); } - lwkt_reltoken(&nlock); } } diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 697137f159..369867982a 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -86,6 +86,7 @@ #include #include #include +#include static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); @@ -870,7 +871,7 @@ brelvp(struct buf *bp) bp->b_flags &= ~B_HASHED; } if ((vp->v_flag & VONWORKLST) && RB_EMPTY(&vp->v_rbdirty_tree)) { - vp->v_flag &= ~VONWORKLST; + vclrflags(vp, VONWORKLST); LIST_REMOVE(vp, v_synclist); } bp->b_vp = NULL; @@ -954,7 +955,7 @@ reassignbuf(struct buf *bp) } if ((vp->v_flag & VONWORKLST) && RB_EMPTY(&vp->v_rbdirty_tree)) { - vp->v_flag &= ~VONWORKLST; + vclrflags(vp, VONWORKLST); LIST_REMOVE(vp, v_synclist); } } @@ -1080,7 +1081,7 @@ vclean_vxlocked(struct vnode *vp, int flags) */ if (vp->v_flag & VRECLAIMED) return; - vp->v_flag |= VRECLAIMED; + vsetflags(vp, VRECLAIMED); /* * Scrap the vfs cache @@ -1132,10 +1133,13 @@ vclean_vxlocked(struct vnode *vp, int flags) * * This can occur if a file with a link count of 0 needs to be * truncated. + * + * If the vnode is already dead don't try to deactivate it. */ if ((vp->v_flag & VINACTIVE) == 0) { - vp->v_flag |= VINACTIVE; - VOP_INACTIVE(vp); + vsetflags(vp, VINACTIVE); + if (vp->v_mount) + VOP_INACTIVE(vp); vinvalbuf(vp, V_SAVE, 0, 0); } @@ -1149,14 +1153,14 @@ vclean_vxlocked(struct vnode *vp, int flags) } else { vm_pager_deallocate(object); } - vp->v_flag &= ~VOBJBUF; + vclrflags(vp, VOBJBUF); } KKASSERT((vp->v_flag & VOBJBUF) == 0); /* - * Reclaim the vnode. + * Reclaim the vnode if not already dead. */ - if (VOP_RECLAIM(vp)) + if (vp->v_mount && VOP_RECLAIM(vp)) panic("vclean: cannot reclaim"); /* @@ -1173,7 +1177,7 @@ vclean_vxlocked(struct vnode *vp, int flags) * as inactive or reclaimed. */ if (active && (flags & DOCLOSE)) { - vp->v_flag &= ~(VINACTIVE|VRECLAIMED); + vclrflags(vp, VINACTIVE | VRECLAIMED); } } @@ -1285,7 +1289,6 @@ vmaxiosize(struct vnode *vp) * Instead, it happens automatically when the caller releases the VX lock * (assuming there aren't any other references). */ - void vgone_vxlocked(struct vnode *vp) { @@ -1295,6 +1298,8 @@ vgone_vxlocked(struct vnode *vp) */ KKASSERT(vp->v_lock.lk_exclusivecount == 1); + get_mplock(); + /* * Clean out the filesystem specific data and set the VRECLAIMED * bit. Also deactivate the vnode if necessary. @@ -1321,6 +1326,7 @@ vgone_vxlocked(struct vnode *vp) * Set us to VBAD */ vp->v_type = VBAD; + rel_mplock(); } /* @@ -1408,7 +1414,7 @@ retry: } } KASSERT(vp->v_object != NULL, ("vinitvmio: NULL object")); - vp->v_flag |= VOBJBUF; + vsetflags(vp, VOBJBUF); return (error); } diff --git a/sys/kern/vfs_sync.c b/sys/kern/vfs_sync.c index 9cbeb71b3a..0567e0d713 100644 --- a/sys/kern/vfs_sync.c +++ b/sys/kern/vfs_sync.c @@ -169,7 +169,7 @@ vn_syncer_add_to_worklist(struct vnode *vp, int delay) slot = (syncer_delayno + delay) & syncer_mask; LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); - vp->v_flag |= VONWORKLST; + vsetflags(vp, VONWORKLST); lwkt_reltoken(&ilock); } @@ -457,7 +457,7 @@ sync_reclaim(struct vop_reclaim_args *ap) KKASSERT(vp->v_mount->mnt_syncer != vp); if (vp->v_flag & VONWORKLST) { LIST_REMOVE(vp, v_synclist); - vp->v_flag &= ~VONWORKLST; + vclrflags(vp, VONWORKLST); } lwkt_reltoken(&ilock); diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 26a5979f11..56dc082896 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -260,7 +260,7 @@ sys_mount(struct mount_args *uap) error = EBUSY; goto done; } - vp->v_flag |= VMOUNT; + vsetflags(vp, VMOUNT); mp->mnt_flag |= uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); vn_unlock(vp); @@ -329,7 +329,7 @@ sys_mount(struct mount_args *uap) error = EBUSY; goto done; } - vp->v_flag |= VMOUNT; + vsetflags(vp, VMOUNT); /* * Allocate and initialize the filesystem. @@ -377,7 +377,7 @@ update: mp->mnt_kern_flag = flag2; } vfs_unbusy(mp); - vp->v_flag &= ~VMOUNT; + vclrflags(vp, VMOUNT); vrele(vp); cache_drop(&nch); goto done; @@ -406,7 +406,7 @@ update: nch.ncp->nc_flag |= NCF_ISMOUNTPT; /* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */ - vp->v_flag &= ~VMOUNT; + vclrflags(vp, VMOUNT); mountlist_insert(mp, MNTINS_LAST); vn_unlock(vp); checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); @@ -420,7 +420,7 @@ update: vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); - vp->v_flag &= ~VMOUNT; + vclrflags(vp, VMOUNT); mp->mnt_vfc->vfc_refcount--; vfs_unbusy(mp); kfree(mp, M_MOUNT); @@ -1916,17 +1916,18 @@ kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) int sys_open(struct open_args *uap) { + CACHE_MPLOCK_DECLARE; struct nlookupdata nd; int error; - get_mplock(); + CACHE_GETMPLOCK1(); error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); if (error == 0) { error = kern_open(&nd, uap->flags, uap->mode, &uap->sysmsg_result); } nlookup_done(&nd); - rel_mplock(); + CACHE_RELMPLOCK(); return (error); } @@ -1938,18 +1939,19 @@ sys_open(struct open_args *uap) int sys_openat(struct openat_args *uap) { + CACHE_MPLOCK_DECLARE; struct nlookupdata nd; int error; struct file *fp; - get_mplock(); + CACHE_GETMPLOCK1(); error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); if (error == 0) { error = kern_open(&nd, uap->flags, uap->mode, &uap->sysmsg_result); } nlookup_done_at(&nd, fp); - rel_mplock(); + CACHE_RELMPLOCK(); return (error); } @@ -2581,6 +2583,9 @@ sys_faccessat(struct faccessat_args *uap) } +/* + * MPSAFE + */ int kern_stat(struct nlookupdata *nd, struct stat *st) { @@ -2620,16 +2625,17 @@ again: * * Get file status; this version follows links. * - * MPALMOSTSAFE + * MPSAFE */ int sys_stat(struct stat_args *uap) { + CACHE_MPLOCK_DECLARE; struct nlookupdata nd; struct stat st; int error; - get_mplock(); + CACHE_GETMPLOCK1(); error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); if (error == 0) { error = kern_stat(&nd, &st); @@ -2637,7 +2643,7 @@ sys_stat(struct stat_args *uap) error = copyout(&st, uap->ub, sizeof(*uap->ub)); } nlookup_done(&nd); - rel_mplock(); + CACHE_RELMPLOCK(); return (error); } @@ -2651,11 +2657,12 @@ sys_stat(struct stat_args *uap) int sys_lstat(struct lstat_args *uap) { + CACHE_MPLOCK_DECLARE; struct nlookupdata nd; struct stat st; int error; - get_mplock(); + CACHE_GETMPLOCK1(); error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); if (error == 0) { error = kern_stat(&nd, &st); @@ -2663,7 +2670,7 @@ sys_lstat(struct lstat_args *uap) error = copyout(&st, uap->ub, sizeof(*uap->ub)); } nlookup_done(&nd); - rel_mplock(); + CACHE_RELMPLOCK(); return (error); } @@ -2677,6 +2684,7 @@ sys_lstat(struct lstat_args *uap) int sys_fstatat(struct fstatat_args *uap) { + CACHE_MPLOCK_DECLARE; struct nlookupdata nd; struct stat st; int error; @@ -2688,7 +2696,7 @@ sys_fstatat(struct fstatat_args *uap) flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; - get_mplock(); + CACHE_GETMPLOCK1(); error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, flags); if (error == 0) { @@ -2697,7 +2705,7 @@ sys_fstatat(struct fstatat_args *uap) error = copyout(&st, uap->sb, sizeof(*uap->sb)); } nlookup_done_at(&nd, fp); - rel_mplock(); + CACHE_RELMPLOCK(); return (error); } @@ -3598,22 +3606,16 @@ kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) } /* - * relock the source ncp. NOTE AFTER RELOCKING: the source ncp - * may have become invalid while it was unlocked, nc_vp and nc_mount - * could be NULL. + * Relock the source ncp. cache_relock() will deal with any + * deadlocks against the already-locked tond and will also + * make sure both are resolved. + * + * NOTE AFTER RELOCKING: The source or target ncp may have become + * invalid while they were unlocked, nc_vp and nc_mount could + * be NULL. */ - if (cache_lock_nonblock(&fromnd->nl_nch) == 0) { - cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); - } else if (fromnd->nl_nch.ncp > tond->nl_nch.ncp) { - cache_lock(&fromnd->nl_nch); - cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); - } else { - cache_unlock(&tond->nl_nch); - cache_lock(&fromnd->nl_nch); - cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); - cache_lock(&tond->nl_nch); - cache_resolve(&tond->nl_nch, tond->nl_cred); - } + cache_relock(&fromnd->nl_nch, fromnd->nl_cred, + &tond->nl_nch, tond->nl_cred); fromnd->nl_flags |= NLC_NCPISLOCKED; /* @@ -3672,6 +3674,8 @@ kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) * You cannot rename a source into itself or a subdirectory of itself. * We check this by travsersing the target directory upwards looking * for a match against the source. + * + * XXX MPSAFE */ if (error == 0) { for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { diff --git a/sys/kern/vfs_vfsops.c b/sys/kern/vfs_vfsops.c index 10d87bd31c..e843ca9cc8 100644 --- a/sys/kern/vfs_vfsops.c +++ b/sys/kern/vfs_vfsops.c @@ -80,29 +80,6 @@ #include #include -#define VFS_MPLOCK_DECLARE struct lwkt_tokref xlock; int xlock_mpsafe - -#define VFS_MPLOCK(mp) VFS_MPLOCK_FLAG(mp, MNTK_MPSAFE) - -#define VFS_MPLOCK_FLAG(mp, flag) \ - do { \ - if (mp->mnt_kern_flag & flag) { \ - xlock_mpsafe = 1; \ - } else { \ - get_mplock(); /* TEMPORARY */ \ - lwkt_gettoken(&xlock, &mp->mnt_token); \ - xlock_mpsafe = 0; \ - } \ - } while (0) - -#define VFS_MPUNLOCK(mp) \ - do { \ - if (xlock_mpsafe == 0) { \ - lwkt_reltoken(&xlock); \ - rel_mplock(); /* TEMPORARY */ \ - } \ - } while(0) - /* * MPSAFE */ @@ -112,7 +89,7 @@ vfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) VFS_MPLOCK_DECLARE; int error; - VFS_MPLOCK(mp); + VFS_MPLOCK1(mp); error = (mp->mnt_op->vfs_mount)(mp, path, data, cred); VFS_MPUNLOCK(mp); return (error); @@ -127,7 +104,7 @@ vfs_start(struct mount *mp, int flags) VFS_MPLOCK_DECLARE; int error; - VFS_MPLOCK(mp); + VFS_MPLOCK1(mp); error = (mp->mnt_op->vfs_start)(mp, flags); VFS_MPUNLOCK(mp); return (error); @@ -142,7 +119,7 @@ vfs_unmount(struct mount *mp, int mntflags) VFS_MPLOCK_DECLARE; int error; - VFS_MPLOCK(mp); + VFS_MPLOCK1(mp); error = (mp->mnt_op->vfs_unmount)(mp, mntflags); VFS_MPUNLOCK(mp); return (error); @@ -157,7 +134,7 @@ vfs_root(struct mount *mp, struct vnode **vpp) VFS_MPLOCK_DECLARE; int error; - VFS_MPLOCK(mp); + VFS_MPLOCK1(mp); error = (mp->mnt_op->vfs_root)(mp, vpp); VFS_MPUNLOCK(mp); return (error); @@ -173,7 +150,7 @@ vfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t arg, VFS_MPLOCK_DECLARE; int error; - VFS_MPLOCK(mp); + VFS_MPLOCK1(mp); error = (mp->mnt_op->vfs_quotactl)(mp, cmds, uid, arg, cred); VFS_MPUNLOCK(mp); return (error); @@ -188,7 +165,7 @@ vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) VFS_MPLOCK_DECLARE; int error; - VFS_MPLOCK(mp); + VFS_MPLOCK1(mp); error = (mp->mnt_op->vfs_statfs)(mp, sbp, cred); VFS_MPUNLOCK(mp); return (error); @@ -200,7 +177,7 @@ vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) VFS_MPLOCK_DECLARE; int error; - VFS_MPLOCK(mp); + VFS_MPLOCK1(mp); error = (mp->mnt_op->vfs_statvfs)(mp, sbp, cred); VFS_MPUNLOCK(mp); return (error); @@ -215,7 +192,7 @@ vfs_sync(struct mount *mp, int waitfor) VFS_MPLOCK_DECLARE; int error; - VFS_MPLOCK(mp); + VFS_MPLOCK1(mp); error = (mp->mnt_op->vfs_sync)(mp, waitfor); VFS_MPUNLOCK(mp); return (error); @@ -230,7 +207,7 @@ vfs_vget(struct mount *mp, struct vnode *dvp, ino_t ino, struct vnode **vpp) VFS_MPLOCK_DECLARE; int error; - VFS_MPLOCK(mp); + VFS_MPLOCK1(mp); error = (mp->mnt_op->vfs_vget)(mp, dvp, ino, vpp); VFS_MPUNLOCK(mp); return (error); @@ -246,7 +223,7 @@ vfs_fhtovp(struct mount *mp, struct vnode *rootvp, VFS_MPLOCK_DECLARE; int error; - VFS_MPLOCK(mp); + VFS_MPLOCK1(mp); error = (mp->mnt_op->vfs_fhtovp)(mp, rootvp, fhp, vpp); VFS_MPUNLOCK(mp); return (error); @@ -262,7 +239,7 @@ vfs_checkexp(struct mount *mp, struct sockaddr *nam, VFS_MPLOCK_DECLARE; int error; - VFS_MPLOCK(mp); + VFS_MPLOCK1(mp); error = (mp->mnt_op->vfs_checkexp)(mp, nam, extflagsp, credanonp); VFS_MPUNLOCK(mp); return (error); @@ -277,7 +254,7 @@ vfs_vptofh(struct vnode *vp, struct fid *fhp) VFS_MPLOCK_DECLARE; int error; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); error = (vp->v_mount->mnt_op->vfs_vptofh)(vp, fhp); VFS_MPUNLOCK(vp->v_mount); return (error); @@ -323,7 +300,7 @@ vfs_extattrctl(struct mount *mp, int cmd, const char *attrname, VFS_MPLOCK_DECLARE; int error; - VFS_MPLOCK(mp); + VFS_MPLOCK1(mp); error = (mp->mnt_op->vfs_extattrctl)(mp, cmd, attrname, arg, cred); VFS_MPUNLOCK(mp); return (error); diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 062a042477..899eb71292 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -373,6 +373,8 @@ ncp_writechk(struct nchandle *nch) /* * Vnode close call + * + * MPSAFE */ int vn_close(struct vnode *vp, int flags) @@ -388,6 +390,11 @@ vn_close(struct vnode *vp, int flags) return (error); } +/* + * Sequential heuristic. + * + * MPSAFE (f_seqcount and f_nextoff are allowed to race) + */ static __inline int sequential_heuristic(struct uio *uio, struct file *fp) @@ -400,12 +407,7 @@ sequential_heuristic(struct uio *uio, struct file *fp) if ((uio->uio_offset == 0 && fp->f_seqcount > 0) || uio->uio_offset == fp->f_nextoff) { int tmpseq = fp->f_seqcount; - /* - * XXX we assume that the filesystem block size is - * the default. Not true, but still gives us a pretty - * good indicator of how sequential the read operations - * are. - */ + tmpseq += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE; if (tmpseq > IO_SEQMAX) tmpseq = IO_SEQMAX; @@ -432,6 +434,8 @@ sequential_heuristic(struct uio *uio, struct file *fp) * These routines serve the dual purpose of serializing access to the * f_offset field (at least on i386) and guaranteeing operational integrity * when multiple read()ers and write()ers are present on the same fp. + * + * MPSAFE */ static __inline off_t vn_get_fpf_offset(struct file *fp) @@ -465,6 +469,9 @@ vn_get_fpf_offset(struct file *fp) return(fp->f_offset); } +/* + * MPSAFE + */ static __inline void vn_set_fpf_offset(struct file *fp, off_t offset) { @@ -490,6 +497,9 @@ vn_set_fpf_offset(struct file *fp, off_t offset) } } +/* + * MPSAFE + */ static __inline off_t vn_poll_fpf_offset(struct file *fp) { @@ -504,6 +514,8 @@ vn_poll_fpf_offset(struct file *fp) /* * Package up an I/O request on a vnode into a uio and do it. + * + * MPSAFE */ int vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, int len, @@ -550,6 +562,8 @@ vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, int len, * check bwillwrite() before calling vn_rdwr(). We also call uio_yield() * to give other processes a chance to lock the vnode (either other processes * core'ing the same binary, or unrelated processes scanning the directory). + * + * MPSAFE */ int vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, caddr_t base, int len, @@ -582,7 +596,7 @@ vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, caddr_t base, int len, } } error = vn_rdwr(rw, vp, base, chunk, offset, segflg, - ioflg, cred, aresid); + ioflg, cred, aresid); len -= chunk; /* aresid calc already includes length */ if (error) break; @@ -596,12 +610,12 @@ vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, caddr_t base, int len, } /* - * MPSAFE - acquires mplock - * * File pointers can no longer get ripped up by revoke so * we don't need to lock access to the vp. * * f_offset updates are not guaranteed against multiple readers + * + * MPSAFE */ static int vn_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags) @@ -645,7 +659,7 @@ vn_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags) } /* - * MPSAFE - acquires mplock + * MPSAFE */ static int vn_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags) @@ -866,8 +880,6 @@ vn_ioctl(struct file *fp, u_long com, caddr_t data, struct ucred *ucred, int error; off_t size; - get_mplock(); - switch (vp->v_type) { case VREG: case VDIR: @@ -915,10 +927,12 @@ vn_ioctl(struct file *fp, u_long com, caddr_t data, struct ucred *ucred, break; } + get_mplock(); sess = p->p_session; /* Do nothing if reassigning same control tty */ if (sess->s_ttyvp == vp) { error = 0; + rel_mplock(); break; } @@ -928,24 +942,22 @@ vn_ioctl(struct file *fp, u_long com, caddr_t data, struct ucred *ucred, sess->s_ttyvp = vp; if (ovp) vrele(ovp); + rel_mplock(); } break; } - rel_mplock(); return (error); } /* - * MPALMOSTSAFE - acquires mplock + * MPSAFE */ static int vn_poll(struct file *fp, int events, struct ucred *cred) { int error; - get_mplock(); error = VOP_POLL(((struct vnode *)fp->f_data), events, cred); - rel_mplock(); return (error); } @@ -987,12 +999,18 @@ debug_vn_lock(struct vnode *vp, int flags, const char *filename, int line) return (error); } +/* + * MPSAFE + */ void vn_unlock(struct vnode *vp) { lockmgr(&vp->v_lock, LK_RELEASE); } +/* + * MPSAFE + */ int vn_islocked(struct vnode *vp) { @@ -1000,30 +1018,26 @@ vn_islocked(struct vnode *vp) } /* - * MPALMOSTSAFE - acquires mplock + * MPSAFE */ static int vn_closefile(struct file *fp) { int error; - get_mplock(); fp->f_ops = &badfileops; error = vn_close(((struct vnode *)fp->f_data), fp->f_flag); - rel_mplock(); return (error); } /* - * MPALMOSTSAFE - acquires mplock + * MPSAFE */ static int vn_kqfilter(struct file *fp, struct knote *kn) { int error; - get_mplock(); error = VOP_KQFILTER(((struct vnode *)fp->f_data), kn); - rel_mplock(); return (error); } diff --git a/sys/kern/vfs_vopops.c b/sys/kern/vfs_vopops.c index 45e2eb919e..ae116c4ee1 100644 --- a/sys/kern/vfs_vopops.c +++ b/sys/kern/vfs_vopops.c @@ -149,29 +149,6 @@ VNODEOP_DESC_INIT(nrename); #define DO_OPS(ops, error, ap, vop_field) \ error = ops->vop_field(ap); -#define VFS_MPLOCK_DECLARE struct lwkt_tokref xlock; int xlock_mpsafe - -#define VFS_MPLOCK(mp) VFS_MPLOCK_FLAG(mp, MNTK_MPSAFE) - -#define VFS_MPLOCK_FLAG(mp, flag) \ - do { \ - if (mp->mnt_kern_flag & flag) { \ - xlock_mpsafe = 1; \ - } else { \ - get_mplock(); /* TEMPORARY */ \ - lwkt_gettoken(&xlock, &mp->mnt_token); \ - xlock_mpsafe = 0; \ - } \ - } while(0) - -#define VFS_MPUNLOCK(mp) \ - do { \ - if (xlock_mpsafe == 0) { \ - lwkt_reltoken(&xlock); \ - rel_mplock(); /* TEMPORARY */ \ - } \ - } while(0) - /************************************************************************ * PRIMARY HIGH LEVEL VNODE OPERATIONS CALLS * ************************************************************************ @@ -203,7 +180,7 @@ vop_old_lookup(struct vop_ops *ops, struct vnode *dvp, ap.a_dvp = dvp; ap.a_vpp = vpp; ap.a_cnp = cnp; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_old_lookup); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -227,7 +204,7 @@ vop_old_create(struct vop_ops *ops, struct vnode *dvp, ap.a_cnp = cnp; ap.a_vap = vap; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_old_create); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -250,7 +227,7 @@ vop_old_whiteout(struct vop_ops *ops, struct vnode *dvp, ap.a_cnp = cnp; ap.a_flags = flags; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_old_whiteout); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -274,7 +251,7 @@ vop_old_mknod(struct vop_ops *ops, struct vnode *dvp, ap.a_cnp = cnp; ap.a_vap = vap; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_old_mknod); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -295,10 +272,10 @@ vop_open(struct vop_ops *ops, struct vnode *vp, int mode, struct ucred *cred, * Decrement 3-2-1-0. Does not decrement beyond 0 */ if (vp->v_flag & VAGE0) { - vp->v_flag &= ~VAGE0; + vclrflags(vp, VAGE0); } else if (vp->v_flag & VAGE1) { - vp->v_flag &= ~VAGE1; - vp->v_flag |= VAGE0; + vclrflags(vp, VAGE1); + vsetflags(vp, VAGE0); } ap.a_head.a_desc = &vop_open_desc; @@ -308,7 +285,7 @@ vop_open(struct vop_ops *ops, struct vnode *vp, int mode, struct ucred *cred, ap.a_mode = mode; ap.a_cred = cred; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_open); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -329,7 +306,7 @@ vop_close(struct vop_ops *ops, struct vnode *vp, int fflag) ap.a_vp = vp; ap.a_fflag = fflag; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_close); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -353,7 +330,7 @@ vop_access(struct vop_ops *ops, struct vnode *vp, int mode, int flags, ap.a_flags = flags; ap.a_cred = cred; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_access); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -398,7 +375,7 @@ vop_setattr(struct vop_ops *ops, struct vnode *vp, struct vattr *vap, ap.a_vap = vap; ap.a_cred = cred; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_setattr); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -472,7 +449,7 @@ vop_ioctl(struct vop_ops *ops, struct vnode *vp, u_long command, caddr_t data, ap.a_cred = cred; ap.a_sysmsg = msg; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_ioctl); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -494,7 +471,7 @@ vop_poll(struct vop_ops *ops, struct vnode *vp, int events, struct ucred *cred) ap.a_events = events; ap.a_cred = cred; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_poll); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -515,7 +492,7 @@ vop_kqfilter(struct vop_ops *ops, struct vnode *vp, struct knote *kn) ap.a_vp = vp; ap.a_kn = kn; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_kqfilter); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -537,7 +514,7 @@ vop_mmap(struct vop_ops *ops, struct vnode *vp, int fflags, struct ucred *cred) ap.a_fflags = fflags; ap.a_cred = cred; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_mmap); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -559,7 +536,7 @@ vop_fsync(struct vop_ops *ops, struct vnode *vp, int waitfor, int flags) ap.a_waitfor = waitfor; ap.a_flags = flags; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_fsync); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -582,7 +559,7 @@ vop_old_remove(struct vop_ops *ops, struct vnode *dvp, ap.a_vp = vp; ap.a_cnp = cnp; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_old_remove); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -605,7 +582,7 @@ vop_old_link(struct vop_ops *ops, struct vnode *tdvp, ap.a_vp = vp; ap.a_cnp = cnp; - VFS_MPLOCK(tdvp->v_mount); + VFS_MPLOCK1(tdvp->v_mount); DO_OPS(ops, error, &ap, vop_old_link); VFS_MPUNLOCK(tdvp->v_mount); return(error); @@ -632,7 +609,7 @@ vop_old_rename(struct vop_ops *ops, ap.a_tvp = tvp; ap.a_tcnp = tcnp; - VFS_MPLOCK(tdvp->v_mount); + VFS_MPLOCK1(tdvp->v_mount); DO_OPS(ops, error, &ap, vop_old_rename); VFS_MPUNLOCK(tdvp->v_mount); return(error); @@ -656,7 +633,7 @@ vop_old_mkdir(struct vop_ops *ops, struct vnode *dvp, ap.a_cnp = cnp; ap.a_vap = vap; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_old_mkdir); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -679,7 +656,7 @@ vop_old_rmdir(struct vop_ops *ops, struct vnode *dvp, ap.a_vp = vp; ap.a_cnp = cnp; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_old_rmdir); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -705,7 +682,7 @@ vop_old_symlink(struct vop_ops *ops, struct vnode *dvp, ap.a_vap = vap; ap.a_target = target; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_old_symlink); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -731,7 +708,7 @@ vop_readdir(struct vop_ops *ops, struct vnode *vp, struct uio *uio, ap.a_ncookies = ncookies; ap.a_cookies = cookies; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_readdir); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -754,7 +731,7 @@ vop_readlink(struct vop_ops *ops, struct vnode *vp, struct uio *uio, ap.a_uio = uio; ap.a_cred = cred; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_readlink); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -774,7 +751,7 @@ vop_inactive(struct vop_ops *ops, struct vnode *vp) ap.a_head.a_ops = ops; ap.a_vp = vp; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK_FLAG(vp->v_mount, MNTK_IN_MPSAFE); DO_OPS(ops, error, &ap, vop_inactive); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -794,7 +771,7 @@ vop_reclaim(struct vop_ops *ops, struct vnode *vp) ap.a_head.a_ops = ops; ap.a_vp = vp; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_reclaim); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -820,7 +797,7 @@ vop_bmap(struct vop_ops *ops, struct vnode *vp, off_t loffset, ap.a_runb = runb; ap.a_cmd = cmd; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_bmap); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -841,7 +818,7 @@ vop_strategy(struct vop_ops *ops, struct vnode *vp, struct bio *bio) ap.a_vp = vp; ap.a_bio = bio; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_strategy); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -861,7 +838,7 @@ vop_print(struct vop_ops *ops, struct vnode *vp) ap.a_head.a_ops = ops; ap.a_vp = vp; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_print); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -884,7 +861,7 @@ vop_pathconf(struct vop_ops *ops, struct vnode *vp, int name, ap.a_name = name; ap.a_retval = retval; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_pathconf); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -909,7 +886,7 @@ vop_advlock(struct vop_ops *ops, struct vnode *vp, caddr_t id, int op, ap.a_fl = fl; ap.a_flags = flags; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_advlock); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -936,7 +913,7 @@ vop_balloc(struct vop_ops *ops, struct vnode *vp, off_t startoffset, ap.a_flags = flags; ap.a_bpp = bpp; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_balloc); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -958,7 +935,7 @@ vop_reallocblks(struct vop_ops *ops, struct vnode *vp, ap.a_vp = vp; ap.a_buflist = buflist; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_reallocblks); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -983,7 +960,7 @@ vop_getpages(struct vop_ops *ops, struct vnode *vp, vm_page_t *m, int count, ap.a_reqpage = reqpage; ap.a_offset = offset; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_getpages); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -1009,7 +986,7 @@ vop_putpages(struct vop_ops *ops, struct vnode *vp, vm_page_t *m, int count, ap.a_rtvals = rtvals; ap.a_offset = offset; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_putpages); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -1031,7 +1008,7 @@ vop_freeblks(struct vop_ops *ops, struct vnode *vp, off_t offset, int length) ap.a_offset = offset; ap.a_length = length; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_freeblks); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -1055,7 +1032,7 @@ vop_getacl(struct vop_ops *ops, struct vnode *vp, acl_type_t type, ap.a_aclp = aclp; ap.a_cred = cred; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_getacl); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -1079,7 +1056,7 @@ vop_setacl(struct vop_ops *ops, struct vnode *vp, acl_type_t type, ap.a_aclp = aclp; ap.a_cred = cred; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_setacl); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -1103,7 +1080,7 @@ vop_aclcheck(struct vop_ops *ops, struct vnode *vp, acl_type_t type, ap.a_aclp = aclp; ap.a_cred = cred; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_aclcheck); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -1127,7 +1104,7 @@ vop_getextattr(struct vop_ops *ops, struct vnode *vp, char *name, ap.a_uio = uio; ap.a_cred = cred; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_getextattr); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -1151,7 +1128,7 @@ vop_setextattr(struct vop_ops *ops, struct vnode *vp, char *name, ap.a_uio = uio; ap.a_cred = cred; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_setextattr); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -1178,7 +1155,7 @@ vop_mountctl(struct vop_ops *ops, struct vnode *vp, int op, struct file *fp, ap.a_buflen = buflen; ap.a_res = res; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_mountctl); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -1199,7 +1176,7 @@ vop_markatime(struct vop_ops *ops, struct vnode *vp, struct ucred *cred) ap.a_vp = vp; ap.a_cred = cred; - VFS_MPLOCK(vp->v_mount); + VFS_MPLOCK1(vp->v_mount); DO_OPS(ops, error, &ap, vop_markatime); VFS_MPUNLOCK(vp->v_mount); return(error); @@ -1230,7 +1207,7 @@ vop_nresolve(struct vop_ops *ops, struct nchandle *nch, ap.a_dvp = dvp; ap.a_cred = cred; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_nresolve); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -1258,7 +1235,7 @@ vop_nlookupdotdot(struct vop_ops *ops, struct vnode *dvp, ap.a_cred = cred; ap.a_fakename = fakename; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_nlookupdotdot); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -1292,7 +1269,7 @@ vop_ncreate(struct vop_ops *ops, struct nchandle *nch, struct vnode *dvp, ap.a_cred = cred; ap.a_vap = vap; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_ncreate); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -1326,7 +1303,7 @@ vop_nmkdir(struct vop_ops *ops, struct nchandle *nch, struct vnode *dvp, ap.a_cred = cred; ap.a_vap = vap; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_nmkdir); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -1360,7 +1337,7 @@ vop_nmknod(struct vop_ops *ops, struct nchandle *nch, struct vnode *dvp, ap.a_cred = cred; ap.a_vap = vap; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_nmknod); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -1394,7 +1371,7 @@ vop_nlink(struct vop_ops *ops, struct nchandle *nch, struct vnode *dvp, ap.a_vp = vp; ap.a_cred = cred; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_nlink); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -1431,7 +1408,7 @@ vop_nsymlink(struct vop_ops *ops, struct nchandle *nch, struct vnode *dvp, ap.a_vap = vap; ap.a_target = target; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_nsymlink); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -1463,7 +1440,7 @@ vop_nwhiteout(struct vop_ops *ops, struct nchandle *nch, struct vnode *dvp, ap.a_cred = cred; ap.a_flags = flags; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_nwhiteout); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -1494,7 +1471,7 @@ vop_nremove(struct vop_ops *ops, struct nchandle *nch, struct vnode *dvp, ap.a_dvp = dvp; ap.a_cred = cred; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_nremove); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -1525,7 +1502,7 @@ vop_nrmdir(struct vop_ops *ops, struct nchandle *nch, struct vnode *dvp, ap.a_dvp = dvp; ap.a_cred = cred; - VFS_MPLOCK(dvp->v_mount); + VFS_MPLOCK1(dvp->v_mount); DO_OPS(ops, error, &ap, vop_nrmdir); VFS_MPUNLOCK(dvp->v_mount); return(error); @@ -1563,7 +1540,7 @@ vop_nrename(struct vop_ops *ops, ap.a_tdvp = tdvp; ap.a_cred = cred; - VFS_MPLOCK(fdvp->v_mount); + VFS_MPLOCK1(fdvp->v_mount); DO_OPS(ops, error, &ap, vop_nrename); VFS_MPUNLOCK(fdvp->v_mount); return(error); diff --git a/sys/platform/pc32/i386/pmap.c b/sys/platform/pc32/i386/pmap.c index 53cc386363..b07f2cf0fd 100644 --- a/sys/platform/pc32/i386/pmap.c +++ b/sys/platform/pc32/i386/pmap.c @@ -934,6 +934,7 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, pmap_inval_info_t info) */ vm_page_busy(m); pmap_inval_add(info, pmap, -1); + KKASSERT(pmap->pm_pdir[m->pindex]); pmap->pm_pdir[m->pindex] = 0; KKASSERT(pmap->pm_stats.resident_count > 0); @@ -1153,8 +1154,9 @@ pmap_release_free_page(struct pmap *pmap, vm_page_t p) /* * Remove the page table page from the processes address space. */ - pde[p->pindex] = 0; KKASSERT(pmap->pm_stats.resident_count > 0); + KKASSERT(pde[p->pindex]); + pde[p->pindex] = 0; --pmap->pm_stats.resident_count; if (p->hold_count) { @@ -1604,6 +1606,7 @@ pmap_remove_pte(struct pmap *pmap, unsigned *ptq, vm_offset_t va, pmap_inval_add(info, pmap, va); oldpte = loadandclear(ptq); + KKASSERT(oldpte); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; /* @@ -2850,6 +2853,7 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) npv = TAILQ_NEXT(pv, pv_plist); continue; } + KKASSERT(*pte); tpte = loadandclear(pte); m = PHYS_TO_VM_PAGE(tpte); diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 9908890196..399a6ea7c2 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -286,9 +286,10 @@ struct mount { */ #define MNTK_UNMOUNTF 0x00000001 /* forced unmount in progress */ #define MNTK_MPSAFE 0x00010000 /* call vops without mnt_token lock */ -#define MNTK_RD_MPSAFE 0x00020000 /* reads do not require mnt_token */ -#define MNTK_WR_MPSAFE 0x00040000 /* writes do not require mnt_token */ -#define MNTK_GA_MPSAFE 0x00080000 /* getattrs do not require mnt_token */ +#define MNTK_RD_MPSAFE 0x00020000 /* vop_read is MPSAFE */ +#define MNTK_WR_MPSAFE 0x00040000 /* vop_write is MPSAFE */ +#define MNTK_GA_MPSAFE 0x00080000 /* vop_getattr is MPSAFE */ +#define MNTK_IN_MPSAFE 0x00100000 /* vop_inactive is MPSAFE */ #define MNTK_NCALIASED 0x00800000 /* namecached aliased */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ @@ -325,6 +326,41 @@ struct mount { as next argument */ /* + * VFS MPLOCK helper. + */ +#define VFS_MPLOCK_DECLARE struct lwkt_tokref xlock; int xlock_mpsafe + +#define VFS_MPLOCK1(mp) VFS_MPLOCK_FLAG(mp, MNTK_MPSAFE) + +#define VFS_MPLOCK2(mp) \ + do { \ + if (xlock_mpsafe) { \ + get_mplock(); /* TEMPORARY */ \ + lwkt_gettoken(&xlock, &mp->mnt_token); \ + xlock_mpsafe = 0; \ + } \ + } while(0) + +#define VFS_MPLOCK_FLAG(mp, flag) \ + do { \ + if (mp->mnt_kern_flag & flag) { \ + xlock_mpsafe = 1; \ + } else { \ + get_mplock(); /* TEMPORARY */ \ + lwkt_gettoken(&xlock, &mp->mnt_token); \ + xlock_mpsafe = 0; \ + } \ + } while(0) + +#define VFS_MPUNLOCK(mp) \ + do { \ + if (xlock_mpsafe == 0) { \ + lwkt_reltoken(&xlock); \ + rel_mplock(); /* TEMPORARY */ \ + } \ + } while(0) + +/* * Flags for various system call interfaces. * * waitfor flags to vfs_sync() and getfsstat() diff --git a/sys/sys/namecache.h b/sys/sys/namecache.h index 82189f6ca5..b970b8fb13 100644 --- a/sys/sys/namecache.h +++ b/sys/sys/namecache.h @@ -92,9 +92,10 @@ TAILQ_HEAD(namecache_list, namecache); * vnodes cached by the system will reference one or more associated namecache * structures. * - * The namecache is disjoint, there may not always be a path to the system - * root through nc_parent links. If a namecache entry has no parent, that - * entry will not be hashed and can only be 'found' via '.' or '..'. + * The DragonFly namecache maintains elements from active nodes to the root + * in all but the NFS server case and the removed file/directory case. + * NFS servers use fhtovp() and may have to regenerate the topology to + * the leaf on the fly. * * Because the namecache structure maintains the path through mount points, * null, and union mounts, and other VFS overlays, several namecache @@ -108,6 +109,10 @@ TAILQ_HEAD(namecache_list, namecache); * confusion, but only the one representing the physical directory is passed * into lower layer VOP calls. * + * ncp locking is done using atomic ops on nc_exlocks, including a request + * flag for waiters. nc_locktd is set after locking or cleared before + * the last unlock. ncp locks are reentrant. + * * Many new API VOP operations do not pass vnodes. In these cases the * operations vector is typically obtained via nc_mount->mnt_vn_use_ops. */ @@ -122,11 +127,11 @@ struct namecache { int nc_refs; /* ref count prevents deletion */ u_short nc_flag; u_char nc_nlen; /* The length of the name, 255 max */ - u_char nc_lockreq; + u_char nc_unused; char *nc_name; /* Separately allocated seg name */ int nc_error; int nc_timeout; /* compared against ticks, or 0 */ - int nc_exlocks; /* namespace locking */ + u_int nc_exlocks; /* namespace locking */ struct thread *nc_locktd; /* namespace locking */ long nc_namecache_gen; /* cmp against mnt_namecache_gen */ }; @@ -140,6 +145,8 @@ struct nchandle { struct mount *mount; /* mount pt (possible overlay) */ }; +#define ASSERT_NCH_LOCKED(nch) KKASSERT(nch->ncp->nc_locktd == curthread) + /* * Flags in namecache.nc_flag (u_char) */ @@ -156,6 +163,8 @@ struct nchandle { #define NCF_DESTROYED 0x0400 /* name association is considered destroyed */ #define NCF_UNUSED800 0x0800 +#define NC_EXLOCK_REQ 0x80000000 /* ex_lock state */ + /* * cache_inval[_vp]() flags */ @@ -163,15 +172,57 @@ struct nchandle { #define CINV_UNUSED02 0x0002 #define CINV_CHILDREN 0x0004 /* recursively set children to unresolved */ +/* + * MP lock helper for namecache. + * + * CACHE_GETMPLOCK1() Conditionally gets the MP lock if cache_mpsafe + * is not set, otherwise does not. + * + * CACHE_GETMPLOCK2() Unconditionally gets the MP lock if it is not already + * held (e.g. from GETMPLOCK1). + * + * CACHE_RELMPLOCK() Releases the MP lock if it was previously acquired + * by GETMPLOCK1 or GETMPLOCK2. + */ +#define CACHE_MPLOCK_DECLARE int have_mplock + +#define CACHE_GETMPLOCK1() \ + do { \ + if (cache_mpsafe) { \ + have_mplock = 0; \ + } else { \ + get_mplock(); \ + have_mplock = 1; \ + } \ + } while (0) + +#define CACHE_GETMPLOCK2() \ + do { \ + if (have_mplock == 0) { \ + have_mplock = 1; \ + get_mplock(); \ + } \ + } while(0) + +#define CACHE_RELMPLOCK() \ + do { \ + if (have_mplock) { \ + have_mplock = 0; \ + rel_mplock(); \ + } \ + } while(0) + #ifdef _KERNEL -extern struct lwkt_token vfs_token; +extern int cache_mpsafe; struct componentname; struct nlcomponent; struct mount; void cache_lock(struct nchandle *nch); +void cache_relock(struct nchandle *nch1, struct ucred *cred1, + struct nchandle *nch2, struct ucred *cred2); int cache_lock_nonblock(struct nchandle *nch); void cache_unlock(struct nchandle *nch); void cache_setvp(struct nchandle *nch, struct vnode *vp); diff --git a/sys/vfs/devfs/devfs_core.c b/sys/vfs/devfs/devfs_core.c index cb0b72f21b..91caad68e1 100644 --- a/sys/vfs/devfs/devfs_core.c +++ b/sys/vfs/devfs/devfs_core.c @@ -315,7 +315,8 @@ try_again: switch (node->node_type) { case Proot: - vp->v_flag |= VROOT; + vsetflags(vp, VROOT); + /* fall through */ case Pdir: vp->v_type = VDIR; break; diff --git a/sys/vfs/devfs/devfs_vnops.c b/sys/vfs/devfs/devfs_vnops.c index 0398422047..ee6c5615e5 100644 --- a/sys/vfs/devfs/devfs_vnops.c +++ b/sys/vfs/devfs/devfs_vnops.c @@ -827,7 +827,7 @@ devfs_spec_open(struct vop_open_args *ap) dev->si_iosize_max = DFLTPHYS; if (dev_dflags(dev) & D_TTY) - vp->v_flag |= VISTTY; + vsetflags(vp, VISTTY); vn_unlock(vp); error = dev_dopen(dev, ap->a_mode, S_IFCHR, ap->a_cred); diff --git a/sys/vfs/fdesc/fdesc_vfsops.c b/sys/vfs/fdesc/fdesc_vfsops.c index 85cba431df..70477eb707 100644 --- a/sys/vfs/fdesc/fdesc_vfsops.c +++ b/sys/vfs/fdesc/fdesc_vfsops.c @@ -95,7 +95,7 @@ fdesc_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) MALLOC(fmp, struct fdescmount *, sizeof(struct fdescmount), M_FDESCMNT, M_WAITOK); /* XXX */ rvp->v_type = VDIR; - rvp->v_flag |= VROOT; + vsetflags(rvp, VROOT); fmp->f_root = rvp; /* XXX -- don't mark as local to work around fts() problems */ /*mp->mnt_flag |= MNT_LOCAL;*/ diff --git a/sys/vfs/fifofs/fifo_vnops.c b/sys/vfs/fifofs/fifo_vnops.c index 1b5ac40a97..3fb1f6bf26 100644 --- a/sys/vfs/fifofs/fifo_vnops.c +++ b/sys/vfs/fifofs/fifo_vnops.c @@ -256,7 +256,7 @@ fifo_open(struct vop_open_args *ap) } } } - vp->v_flag |= VNOTSEEKABLE; + vsetflags(vp, VNOTSEEKABLE); error = vop_stdopen(ap); lwkt_reltoken(&vlock); return (error); diff --git a/sys/vfs/gnu/ext2fs/ext2_quota.c b/sys/vfs/gnu/ext2fs/ext2_quota.c index 46432d7c48..426d98ec14 100644 --- a/sys/vfs/gnu/ext2fs/ext2_quota.c +++ b/sys/vfs/gnu/ext2fs/ext2_quota.c @@ -401,7 +401,7 @@ ext2_quotaon(struct ucred *cred, struct mount *mp, int type, caddr_t fname) ext2_quotaoff(mp, type); ump->um_qflags[type] |= QTF_OPENING; mp->mnt_flag |= MNT_QUOTA; - vp->v_flag |= VSYSTEM; + vsetflags(vp, VSYSTEM); *vpp = vp; /* XXX release duplicate vp if *vpp == vp? */ /* @@ -478,7 +478,7 @@ ext2_quotaoff(struct mount *mp, int type) vmntvnodescan(mp, VMSC_GETVP, NULL, ext2_quotaoff_scan, &scaninfo); } ext2_dqflush(qvp); - qvp->v_flag &= ~VSYSTEM; + vclrflags(qvp, VSYSTEM); error = vn_close(qvp, FREAD|FWRITE); ump->um_quotas[type] = NULLVP; crfree(ump->um_cred[type]); diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index 8b969afd07..22c17a787e 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -60,6 +60,7 @@ #include #include +#include #include "hammer_disk.h" #include "hammer_mount.h" #include "hammer_ioctl.h" diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index 49a7763422..00cd207d79 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -36,8 +36,6 @@ #include "hammer.h" #include -#include -#include static int hammer_unload_inode(struct hammer_inode *ip); static void hammer_free_inode(hammer_inode_t ip); @@ -160,6 +158,8 @@ RB_GENERATE2(hammer_pfs_rb_tree, hammer_pseudofs_inmem, rb_node, * it cached. * * This is called from the frontend. + * + * MPALMOSTSAFE */ int hammer_vop_inactive(struct vop_inactive_args *ap) @@ -186,11 +186,13 @@ hammer_vop_inactive(struct vop_inactive_args *ap) * otherwise namespace calls such as chmod will unnecessarily generate * multiple inode updates. */ - hammer_inode_unloadable_check(ip, 0); if (ip->ino_data.nlinks == 0) { + get_mplock(); + hammer_inode_unloadable_check(ip, 0); if (ip->flags & HAMMER_INODE_MODMASK) hammer_flush_inode(ip, 0); vrecycle(ap->a_vp); + rel_mplock(); } return(0); } @@ -294,9 +296,9 @@ hammer_get_vnode(struct hammer_inode *ip, struct vnode **vpp) if (ip->obj_id == HAMMER_OBJID_ROOT && ip->obj_asof == hmp->asof) { if (ip->obj_localization == 0) - vp->v_flag |= VROOT; + vsetflags(vp, VROOT); else - vp->v_flag |= VPFSROOT; + vsetflags(vp, VPFSROOT); } vp->v_data = (void *)ip; diff --git a/sys/vfs/hammer/hammer_vfsops.c b/sys/vfs/hammer/hammer_vfsops.c index 193c5de499..347d0dfcd2 100644 --- a/sys/vfs/hammer/hammer_vfsops.c +++ b/sys/vfs/hammer/hammer_vfsops.c @@ -582,7 +582,8 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, * on return, so even if we do not specify it we no longer get * the BGL regardlless of how we are flagged. */ - mp->mnt_kern_flag |= MNTK_RD_MPSAFE | MNTK_GA_MPSAFE; + mp->mnt_kern_flag |= MNTK_RD_MPSAFE | MNTK_GA_MPSAFE | + MNTK_IN_MPSAFE; /* * note: f_iosize is used by vnode_pager_haspage() when constructing diff --git a/sys/vfs/hpfs/hpfs_vfsops.c b/sys/vfs/hpfs/hpfs_vfsops.c index a2627919e0..30bd14dec1 100644 --- a/sys/vfs/hpfs/hpfs_vfsops.c +++ b/sys/vfs/hpfs/hpfs_vfsops.c @@ -498,7 +498,7 @@ hpfs_vget(struct mount *mp, struct vnode *dvp, ino_t ino, struct vnode **vpp) vp->v_data = hp; if (ino == (ino_t)hpmp->hpm_su.su_rootfno) - vp->v_flag |= VROOT; + vsetflags(vp, VROOT); lwkt_token_init(&hp->h_interlock); diff --git a/sys/vfs/isofs/cd9660/cd9660_vfsops.c b/sys/vfs/isofs/cd9660/cd9660_vfsops.c index 9c09c56ba2..bff1fac288 100644 --- a/sys/vfs/isofs/cd9660/cd9660_vfsops.c +++ b/sys/vfs/isofs/cd9660/cd9660_vfsops.c @@ -880,7 +880,7 @@ again: } if (ip->iso_extent == imp->root_extent) - vp->v_flag |= VROOT; + vsetflags(vp, VROOT); /* * Return the locked and refd vp diff --git a/sys/vfs/msdosfs/msdosfs_denode.c b/sys/vfs/msdosfs/msdosfs_denode.c index e344a8383e..cd0cdb1191 100644 --- a/sys/vfs/msdosfs/msdosfs_denode.c +++ b/sys/vfs/msdosfs/msdosfs_denode.c @@ -362,7 +362,7 @@ again: * exists), and then use the time and date from that entry * as the time and date for the root denode. */ - nvp->v_flag |= VROOT; /* should be further down XXX */ + vsetflags(nvp, VROOT); /* should be further down XXX */ ldep->de_Attributes = ATTR_DIRECTORY; ldep->de_LowerCase = 0; diff --git a/sys/vfs/nfs/nfs_subs.c b/sys/vfs/nfs/nfs_subs.c index 4a194e7c74..01a1c14abb 100644 --- a/sys/vfs/nfs/nfs_subs.c +++ b/sys/vfs/nfs/nfs_subs.c @@ -1148,8 +1148,11 @@ nfs_namei(struct nlookupdata *nd, struct ucred *cred, int nflags, if (nd->nl_nch.ncp->nc_parent) { nch = nd->nl_nch; nch.ncp = nch.ncp->nc_parent; + cache_hold(&nch); + cache_lock(&nch); error = cache_vget(&nch, nd->nl_cred, LK_EXCLUSIVE, dvpp); + cache_put(&nch); } else { error = ENXIO; } diff --git a/sys/vfs/nfs/nfs_vfsops.c b/sys/vfs/nfs/nfs_vfsops.c index 37bd8f306b..88431aafd7 100644 --- a/sys/vfs/nfs/nfs_vfsops.c +++ b/sys/vfs/nfs/nfs_vfsops.c @@ -664,7 +664,7 @@ nfs_mountroot(struct mount *mp) * Since the swap file is not the root dir of a file system, * hack it to a regular file. */ - vp->v_flag &= ~VROOT; + vclrflags(vp, VROOT); vref(vp); nfs_setvtype(vp, VREG); swaponvp(td, vp, nd->swap_nblks); @@ -1258,7 +1258,7 @@ nfs_root(struct mount *mp, struct vnode **vpp) } if (vp->v_type == VNON) nfs_setvtype(vp, VDIR); - vp->v_flag |= VROOT; + vsetflags(vp, VROOT); if (error) vput(vp); else diff --git a/sys/vfs/ntfs/ntfs_vfsops.c b/sys/vfs/ntfs/ntfs_vfsops.c index c20db7b071..d3d80d2e3a 100644 --- a/sys/vfs/ntfs/ntfs_vfsops.c +++ b/sys/vfs/ntfs/ntfs_vfsops.c @@ -503,7 +503,7 @@ ntfs_mountfs(struct vnode *devvp, struct mount *mp, struct ntfs_args *argsp, pi[i], &(ntmp->ntm_sysvn[pi[i]])); if(error) goto out1; - ntmp->ntm_sysvn[pi[i]]->v_flag |= VSYSTEM; + vsetflags(ntmp->ntm_sysvn[pi[i]], VSYSTEM); vref(ntmp->ntm_sysvn[pi[i]]); vput(ntmp->ntm_sysvn[pi[i]]); } @@ -906,7 +906,7 @@ ntfs_vgetex(struct mount *mp, ino_t ino, u_int32_t attrtype, char *attrname, vp->v_type = f_type; if (ino == NTFS_ROOTINO) - vp->v_flag |= VROOT; + vsetflags(vp, VROOT); /* * Normal files use the buffer cache diff --git a/sys/vfs/nwfs/nwfs_vfsops.c b/sys/vfs/nwfs/nwfs_vfsops.c index d178314147..66a05a3c10 100644 --- a/sys/vfs/nwfs/nwfs_vfsops.c +++ b/sys/vfs/nwfs/nwfs_vfsops.c @@ -331,7 +331,7 @@ nwfs_root(struct mount *mp, struct vnode **vpp) error = nwfs_nget(mp, nmp->n_rootent, &fattr, NULL, &vp); if (error) return (error); - vp->v_flag |= VROOT; + vsetflags(vp, VROOT); np = VTONW(vp); if (nmp->m.root_path[0] == 0) np->n_flag |= NVOLUME; diff --git a/sys/vfs/portal/portal_vfsops.c b/sys/vfs/portal/portal_vfsops.c index da22adc829..937c363a5a 100644 --- a/sys/vfs/portal/portal_vfsops.c +++ b/sys/vfs/portal/portal_vfsops.c @@ -123,7 +123,7 @@ portal_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) rvp->v_data = pn; rvp->v_type = VDIR; - rvp->v_flag |= VROOT; + vsetflags(rvp, VROOT); VTOPORTAL(rvp)->pt_arg = 0; VTOPORTAL(rvp)->pt_size = 0; VTOPORTAL(rvp)->pt_fileid = PORTAL_ROOTFILEID; diff --git a/sys/vfs/smbfs/smbfs_vfsops.c b/sys/vfs/smbfs/smbfs_vfsops.c index 0838269634..ddc6f53305 100644 --- a/sys/vfs/smbfs/smbfs_vfsops.c +++ b/sys/vfs/smbfs/smbfs_vfsops.c @@ -309,7 +309,7 @@ smbfs_root(struct mount *mp, struct vnode **vpp) error = smbfs_nget(mp, NULL, "TheRooT", 7, &fattr, &vp); if (error) return error; - vp->v_flag |= VROOT; + vsetflags(vp, VROOT); np = VTOSMB(vp); smp->sm_root = np; *vpp = vp; diff --git a/sys/vfs/udf/udf_vfsops.c b/sys/vfs/udf/udf_vfsops.c index 50db59c017..fe9e3b9b12 100644 --- a/sys/vfs/udf/udf_vfsops.c +++ b/sys/vfs/udf/udf_vfsops.c @@ -446,7 +446,7 @@ udf_root(struct mount *mp, struct vnode **vpp) return(error); vp = *vpp; - vp->v_flag |= VROOT; + vsetflags(vp, VROOT); udfmp->root_vp = vp; return(0); diff --git a/sys/vfs/ufs/ufs_quota.c b/sys/vfs/ufs/ufs_quota.c index bcf283f820..faf6af2648 100644 --- a/sys/vfs/ufs/ufs_quota.c +++ b/sys/vfs/ufs/ufs_quota.c @@ -442,7 +442,7 @@ ufs_quotaon(struct ucred *cred, struct mount *mp, int type, caddr_t fname) ufs_quotaoff(mp, type); ump->um_qflags[type] |= QTF_OPENING; mp->mnt_flag |= MNT_QUOTA; - vp->v_flag |= VSYSTEM; + vsetflags(vp, VSYSTEM); *vpp = vp; /* XXX release duplicate vp if *vpp == vp? */ /* @@ -519,7 +519,7 @@ ufs_quotaoff(struct mount *mp, int type) vmntvnodescan(mp, VMSC_GETVP, NULL, ufs_quotaoff_scan, &scaninfo); } ufs_dqflush(qvp); - qvp->v_flag &= ~VSYSTEM; + vclrflags(qvp, VSYSTEM); error = vn_close(qvp, FREAD|FWRITE); ump->um_quotas[type] = NULLVP; crfree(ump->um_cred[type]); diff --git a/sys/vfs/ufs/ufs_vnops.c b/sys/vfs/ufs/ufs_vnops.c index 51947e7531..6a014a5459 100644 --- a/sys/vfs/ufs/ufs_vnops.c +++ b/sys/vfs/ufs/ufs_vnops.c @@ -1964,7 +1964,7 @@ ufs_vinit(struct mount *mntp, struct vnode **vpp) } if (ip->i_number == ROOTINO) - vp->v_flag |= VROOT; + vsetflags(vp, VROOT); /* * Initialize modrev times */ diff --git a/sys/vfs/union/union_subr.c b/sys/vfs/union/union_subr.c index 155206514e..bfda6f6caa 100644 --- a/sys/vfs/union/union_subr.c +++ b/sys/vfs/union/union_subr.c @@ -544,7 +544,7 @@ loop: MALLOC((*vpp)->v_data, void *, sizeof(struct union_node), M_TEMP, M_WAITOK); - (*vpp)->v_flag |= vflag; + vsetflags(*vpp, vflag); if (uppervp) (*vpp)->v_type = uppervp->v_type; else diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index e71b452d7b..c4401e5afe 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -259,7 +259,7 @@ vm_object_vndeallocate(vm_object_t object) object->ref_count--; if (object->ref_count == 0) - vp->v_flag &= ~VTEXT; + vclrflags(vp, VTEXT); vrele(vp); } diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index 0528caa4d3..a063460385 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -124,10 +124,10 @@ vnode_pager_alloc(void *handle, off_t size, vm_prot_t prot, off_t offset) * can happen with NFS vnodes since the nfsnode isn't locked. */ while (vp->v_flag & VOLOCK) { - vp->v_flag |= VOWANT; + vsetflags(vp, VOWANT); tsleep(vp, 0, "vnpobj", 0); } - vp->v_flag |= VOLOCK; + vsetflags(vp, VOLOCK); /* * If the object is being terminated, wait for it to @@ -161,9 +161,9 @@ vnode_pager_alloc(void *handle, off_t size, vm_prot_t prot, off_t offset) } vref(vp); - vp->v_flag &= ~VOLOCK; + vclrflags(vp, VOLOCK); if (vp->v_flag & VOWANT) { - vp->v_flag &= ~VOWANT; + vclrflags(vp, VOWANT); wakeup(vp); } return (object); @@ -183,7 +183,7 @@ vnode_pager_dealloc(vm_object_t object) object->type = OBJT_DEAD; vp->v_object = NULL; vp->v_filesize = NOOFFSET; - vp->v_flag &= ~(VTEXT | VOBJBUF); + vclrflags(vp, VTEXT | VOBJBUF); } /* -- 2.11.4.GIT