From fb578eace6c2fb11150b7e39f0c516c979323d8d Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 12 Oct 2017 22:59:02 -0700 Subject: [PATCH] kernel - Improve mountlist_scan() performance, track vfs_getvfs() * Use a shared token whenever possible, and do not hold the token across the callback in the mountlist_scan() call. * vfs_getvfs() mount_hold()'s the returned mp. The caller is now expected to mount_drop() it when done. This fixes a very rare race. --- sys/kern/kern_checkpoint.c | 5 ++-- sys/kern/vfs_cache.c | 5 ++-- sys/kern/vfs_mount.c | 71 ++++++++++++++++++++++++++++++++------------ sys/kern/vfs_syscalls.c | 25 ++++++++++++++-- sys/vfs/nfs/nfs_serv.c | 4 ++- sys/vfs/nfs/nfs_subs.c | 5 +++- sys/vfs/nullfs/null_vfsops.c | 3 ++ sys/vfs/ufs/ffs_vfsops.c | 8 +++-- 8 files changed, 95 insertions(+), 31 deletions(-) diff --git a/sys/kern/kern_checkpoint.c b/sys/kern/kern_checkpoint.c index 70746e7c4c..98abd500b4 100644 --- a/sys/kern/kern_checkpoint.c +++ b/sys/kern/kern_checkpoint.c @@ -495,14 +495,13 @@ ckpt_fhtovp(fhandle_t *fh, struct vnode **vpp) return ESTALE; } error = VFS_FHTOVP(mp, NULL, &fh->fh_fid, vpp); + mount_drop(mp); if (error) { PRINTF(("failed with: %d\n", error)); TRACE_ERR; - TRACE_EXIT; - return error; } TRACE_EXIT; - return 0; + return error; } static int diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index c6315dad32..1fd9266d9a 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -1614,7 +1614,8 @@ cache_clrmountpt_callback(struct mount *mp, void *data) } /* - * + * Clear NCF_ISMOUNTPT on nch->ncp if it is no longer associated + * with a mount point. */ void cache_clrmountpt(struct nchandle *nch) @@ -3421,7 +3422,7 @@ skip: info.nch_mount = nch->mount; info.nch_ncp = nch->ncp; mountlist_scan(cache_findmount_callback, &info, - MNTSCAN_FORWARD|MNTSCAN_NOBUSY); + MNTSCAN_FORWARD|MNTSCAN_NOBUSY); /* * Cache the result. diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index 424c8bbc77..8cb5e038e6 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -374,16 +374,20 @@ mount_drop(struct mount *mp) /* * Lookup a mount point by filesystem identifier. + * + * If not NULL, the returned mp is held and the caller is expected to drop + * it via mount_drop(). */ struct mount * vfs_getvfs(fsid_t *fsid) { struct mount *mp; - lwkt_gettoken(&mountlist_token); + lwkt_gettoken_shared(&mountlist_token); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { + mount_hold(mp); break; } } @@ -407,6 +411,7 @@ void vfs_getnewfsid(struct mount *mp) { static u_int16_t mntid_base; + struct mount *mptmp; fsid_t tfsid; int mtype; @@ -418,8 +423,10 @@ vfs_getnewfsid(struct mount *mp) tfsid.val[0] = makeudev(255, mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF)); mntid_base++; - if (vfs_getvfs(&tfsid) == NULL) + mptmp = vfs_getvfs(&tfsid); + if (mptmp == NULL) break; + mount_drop(mptmp); } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; mp->mnt_stat.f_fsid.val[1] = tfsid.val[1]; @@ -433,16 +440,23 @@ vfs_getnewfsid(struct mount *mp) int vfs_setfsid(struct mount *mp, fsid_t *template) { + struct mount *mptmp; int didmunge = 0; bzero(&mp->mnt_stat.f_fsid, sizeof(mp->mnt_stat.f_fsid)); + + lwkt_gettoken(&mntid_token); for (;;) { - if (vfs_getvfs(template) == NULL) + mptmp = vfs_getvfs(template); + if (mptmp == NULL) break; + mount_drop(mptmp); didmunge = 1; ++template->val[1]; } mp->mnt_stat.f_fsid = *template; + lwkt_reltoken(&mntid_token); + return(didmunge); } @@ -535,9 +549,9 @@ mountlist_insert(struct mount *mp, int how) { lwkt_gettoken(&mountlist_token); if (how == MNTINS_FIRST) - TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); + TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); else - TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); + TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); lwkt_reltoken(&mountlist_token); } @@ -547,6 +561,8 @@ mountlist_insert(struct mount *mp, int how) * Execute the specified interlock function with the mountlist token * held. The function will be called in a serialized fashion verses * other functions called through this mechanism. + * + * The function is expected to be very short-lived. */ int mountlist_interlock(int (*callback)(struct mount *), struct mount *mp) @@ -592,7 +608,8 @@ mountlist_remove(struct mount *mp) if (msi->msi_how & MNTSCAN_FORWARD) msi->msi_node = TAILQ_NEXT(mp, mnt_list); else - msi->msi_node = TAILQ_PREV(mp, mntlist, mnt_list); + msi->msi_node = TAILQ_PREV(mp, mntlist, + mnt_list); } } TAILQ_REMOVE(&mountlist, mp, mnt_list); @@ -615,7 +632,7 @@ mountlist_exists(struct mount *mp) int node_exists = 0; struct mount* lmp; - lwkt_gettoken(&mountlist_token); + lwkt_gettoken_shared(&mountlist_token); TAILQ_FOREACH(lmp, &mountlist, mnt_list) { if (lmp == mp) { node_exists = 1; @@ -623,16 +640,20 @@ mountlist_exists(struct mount *mp) } } lwkt_reltoken(&mountlist_token); + return(node_exists); } /* - * mountlist_scan (MP SAFE) + * mountlist_scan * - * Safely scan the mount points on the mount list. Unless otherwise - * specified each mount point will be busied prior to the callback and - * unbusied afterwords. The callback may safely remove any mount point - * without interfering with the scan. If the current callback + * Safely scan the mount points on the mount list. Each mountpoint + * is held across the callback. The callback is responsible for + * acquiring any further tokens or locks. + * + * Unless otherwise specified each mount point will be busied prior to the + * callback and unbusied afterwords. The callback may safely remove any + * mount point without interfering with the scan. If the current callback * mount is removed the scanner will not attempt to unbusy it. * * If a mount node cannot be busied it is silently skipped. @@ -645,10 +666,7 @@ mountlist_exists(struct mount *mp) * MNTSCAN_NOBUSY - the scanner will make the callback without busying * the mount node. * - * NOTE: mount_hold()/mount_drop() sequence primarily helps us avoid - * confusion for the unbusy check, particularly if a kfree/kmalloc - * occurs quickly (lots of processes mounting and unmounting at the - * same time). + * NOTE: mountlist_token is not held across the callback. */ int mountlist_scan(int (*callback)(struct mount *, void *), void *data, int how) @@ -659,21 +677,26 @@ mountlist_scan(int (*callback)(struct mount *, void *), void *data, int how) int res; lwkt_gettoken(&mountlist_token); - info.msi_how = how; info.msi_node = NULL; /* paranoia */ TAILQ_INSERT_TAIL(&mountscan_list, &info, msi_entry); + lwkt_reltoken(&mountlist_token); res = 0; + lwkt_gettoken_shared(&mountlist_token); if (how & MNTSCAN_FORWARD) { info.msi_node = TAILQ_FIRST(&mountlist); while ((mp = info.msi_node) != NULL) { mount_hold(mp); if (how & MNTSCAN_NOBUSY) { + lwkt_reltoken(&mountlist_token); count = callback(mp, data); + lwkt_gettoken_shared(&mountlist_token); } else if (vfs_busy(mp, LK_NOWAIT) == 0) { + lwkt_reltoken(&mountlist_token); count = callback(mp, data); + lwkt_gettoken_shared(&mountlist_token); if (mp == info.msi_node) vfs_unbusy(mp); } else { @@ -691,9 +714,13 @@ mountlist_scan(int (*callback)(struct mount *, void *), void *data, int how) while ((mp = info.msi_node) != NULL) { mount_hold(mp); if (how & MNTSCAN_NOBUSY) { + lwkt_reltoken(&mountlist_token); count = callback(mp, data); + lwkt_gettoken_shared(&mountlist_token); } else if (vfs_busy(mp, LK_NOWAIT) == 0) { + lwkt_reltoken(&mountlist_token); count = callback(mp, data); + lwkt_gettoken_shared(&mountlist_token); if (mp == info.msi_node) vfs_unbusy(mp); } else { @@ -704,11 +731,16 @@ mountlist_scan(int (*callback)(struct mount *, void *), void *data, int how) break; res += count; if (mp == info.msi_node) - info.msi_node = TAILQ_PREV(mp, mntlist, mnt_list); + info.msi_node = TAILQ_PREV(mp, mntlist, + mnt_list); } } + lwkt_reltoken(&mountlist_token); + + lwkt_gettoken(&mountlist_token); TAILQ_REMOVE(&mountscan_list, &info, msi_entry); lwkt_reltoken(&mountlist_token); + return(res); } @@ -1129,12 +1161,13 @@ mount_get_by_nc(struct namecache *ncp) { struct mount *mp = NULL; - lwkt_gettoken(&mountlist_token); + lwkt_gettoken_shared(&mountlist_token); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (ncp == mp->mnt_ncmountpt.ncp) break; } lwkt_reltoken(&mountlist_token); + return (mp); } diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index b24dccbd88..6dcf0389de 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -996,11 +996,15 @@ sync_callback(struct mount *mp, void *data __unused) int asyncflag; if ((mp->mnt_flag & MNT_RDONLY) == 0) { + lwkt_gettoken(&mp->mnt_token); asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; + lwkt_reltoken(&mp->mnt_token); vfs_msync(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_NOWAIT); + lwkt_gettoken(&mp->mnt_token); mp->mnt_flag |= asyncflag; + lwkt_reltoken(&mp->mnt_token); } return(0); } @@ -4563,7 +4567,7 @@ sys_fhopen(struct fhopen_args *uap) mp = vfs_getvfs(&fhp.fh_fsid); if (mp == NULL) { error = ESTALE; - goto done; + goto done2; } /* now give me my vnode, it gets returned to me locked */ error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); @@ -4644,7 +4648,9 @@ sys_fhopen(struct fhopen_args *uap) * Assert that all regular files must be created with a VM object. */ if (vp->v_type == VREG && vp->v_object == NULL) { - kprintf("fhopen: regular file did not have VM object: %p\n", vp); + kprintf("fhopen: regular file did not " + "have VM object: %p\n", + vp); goto bad_drop; } @@ -4664,7 +4670,8 @@ sys_fhopen(struct fhopen_args *uap) else type = F_WAIT; vn_unlock(vp); - if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { + if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, + &lf, type)) != 0) { /* * release our private reference. */ @@ -4687,6 +4694,8 @@ sys_fhopen(struct fhopen_args *uap) fsetfd(fdp, fp, indx); fdrop(fp); uap->sysmsg_result = indx; + mount_drop(mp); + return (error); bad_drop: @@ -4695,6 +4704,8 @@ bad_drop: bad: vput(vp); done: + mount_drop(mp); +done2: return (error); } @@ -4732,6 +4743,9 @@ sys_fhstat(struct fhstat_args *uap) } if (error == 0) error = copyout(&sb, uap->sb, sizeof(sb)); + if (mp) + mount_drop(mp); + return (error); } @@ -4792,6 +4806,9 @@ sys_fhstatfs(struct fhstatfs_args *uap) } error = copyout(sp, uap->buf, sizeof(*sp)); done: + if (mp) + mount_drop(mp); + return (error); } @@ -4842,6 +4859,8 @@ sys_fhstatvfs(struct fhstatvfs_args *uap) sp->f_flag |= ST_NOSUID; error = copyout(sp, uap->buf, sizeof(*sp)); done: + if (mp) + mount_drop(mp); return (error); } diff --git a/sys/vfs/nfs/nfs_serv.c b/sys/vfs/nfs/nfs_serv.c index 1c87c781c2..4e6bac22b4 100644 --- a/sys/vfs/nfs/nfs_serv.c +++ b/sys/vfs/nfs/nfs_serv.c @@ -1606,7 +1606,7 @@ nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct vnode *dirp; struct vnode *dvp; struct vnode *vp; - struct mount *mp; + struct mount *mp = NULL; nfsfh_t nfh; fhandle_t *fhp; u_quad_t tempsize; @@ -1872,6 +1872,8 @@ nfsmout: } if (vp) vput(vp); + if (mp) + mount_drop(mp); return (error); } diff --git a/sys/vfs/nfs/nfs_subs.c b/sys/vfs/nfs/nfs_subs.c index dfb260fc2a..e7964cd787 100644 --- a/sys/vfs/nfs/nfs_subs.c +++ b/sys/vfs/nfs/nfs_subs.c @@ -1220,9 +1220,12 @@ nfsrv_fhtovp(fhandle_t *fhp, int lockflag, if (mp == NULL) return (ESTALE); error = VFS_CHECKEXP(mp, nam, &exflags, &credanon); - if (error) + if (error) { + mount_drop(mp); return (error); + } error = VFS_FHTOVP(mp, NULL, &fhp->fh_fid, vpp); + mount_drop(mp); if (error) return (ESTALE); #ifdef MNT_EXNORESPORT diff --git a/sys/vfs/nullfs/null_vfsops.c b/sys/vfs/nullfs/null_vfsops.c index ee3a5bd6de..96d7db009c 100644 --- a/sys/vfs/nullfs/null_vfsops.c +++ b/sys/vfs/nullfs/null_vfsops.c @@ -174,6 +174,9 @@ nullfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) /* * Try to create a unique but non-random fsid for the nullfs to * allow it to be exported via NFS. + * + * NOTE: This has some cpu overhead when the same filesystem + * is null-mounted many times (e.g. as used by synth). */ bzero(&fh, sizeof(fh)); fh.fh_fsid = rootvp->v_mount->mnt_stat.f_fsid; diff --git a/sys/vfs/ufs/ffs_vfsops.c b/sys/vfs/ufs/ffs_vfsops.c index 86a704f05f..0a13292836 100644 --- a/sys/vfs/ufs/ffs_vfsops.c +++ b/sys/vfs/ufs/ffs_vfsops.c @@ -586,6 +586,7 @@ ffs_reload_scan2(struct mount *mp, struct vnode *vp, void *data) int ffs_mountfs(struct vnode *devvp, struct mount *mp, struct malloc_type *mtype) { + struct mount *mptmp; struct ufsmount *ump; struct buf *bp; struct fs *fs; @@ -718,9 +719,12 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct malloc_type *mtype) mp->mnt_data = (qaddr_t)ump; mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0]; mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1]; - if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 || - vfs_getvfs(&mp->mnt_stat.f_fsid)) + if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0) { vfs_getnewfsid(mp); + } else if ((mptmp = vfs_getvfs(&mp->mnt_stat.f_fsid)) != NULL) { + vfs_getnewfsid(mp); + mount_drop(mptmp); + } mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; mp->mnt_flag |= MNT_LOCAL; ump->um_mountp = mp; -- 2.11.4.GIT