From 5fd5c68917c02ae3dd752fac0ce9f7a10d02e94f Mon Sep 17 00:00:00 2001 From: Jerry Jelinek Date: Mon, 6 Aug 2012 14:48:25 +0000 Subject: [PATCH] 5419 hung mount in one zone shouldn't interfere with zone boot/halt of another zone Reviewed by: Robert Mustacchi Reviewed by: Garrett D'Amore Approved by: Dan McDonald --- usr/src/uts/common/fs/vfs.c | 24 +++++++- usr/src/uts/common/os/zone.c | 127 ++++++++++++++++++++++-------------------- usr/src/uts/common/sys/zone.h | 12 +++- 3 files changed, 99 insertions(+), 64 deletions(-) diff --git a/usr/src/uts/common/fs/vfs.c b/usr/src/uts/common/fs/vfs.c index 409e69a00d..6e93d056df 100644 --- a/usr/src/uts/common/fs/vfs.c +++ b/usr/src/uts/common/fs/vfs.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -1129,6 +1130,7 @@ domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp, struct pathname pn, rpn; vsk_anchor_t *vskap; char fstname[FSTYPSZ]; + zone_t *zone; /* * The v_flag value for the mount point vp is permanently set @@ -1590,9 +1592,24 @@ domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp, } /* - * Serialize with zone creations. + * Serialize with zone state transitions. + * See vfs_list_add; zone mounted into is: + * zone_find_by_path(refstr_value(vfsp->vfs_mntpt)) + * not the zone doing the mount (curproc->p_zone), but if we're already + * inside a NGZ, then we know what zone we are. */ - mount_in_progress(); + if (INGLOBALZONE(curproc)) { + zone = zone_find_by_path(mountpt); + ASSERT(zone != NULL); + } else { + zone = curproc->p_zone; + /* + * zone_find_by_path does a hold, so do one here too so that + * we can do a zone_rele after mount_completed. + */ + zone_hold(zone); + } + mount_in_progress(zone); /* * Instantiate (or reinstantiate) the file system. If appropriate, * splice it into the file system name space. @@ -1761,7 +1778,8 @@ domount(char *fsname, struct mounta *uap, vnode_t *vp, struct cred *credp, vfs_unlock(vfsp); } - mount_completed(); + mount_completed(zone); + zone_rele(zone); if (splice) vn_vfsunlock(vp); diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index 5d146c03b5..56c654331e 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -378,13 +378,6 @@ rctl_hndl_t rc_zone_shmmax; rctl_hndl_t rc_zone_shmmni; rctl_hndl_t rc_zone_semmni; rctl_hndl_t rc_zone_msgmni; -/* - * Synchronization primitives used to synchronize between mounts and zone - * creation/destruction. - */ -static int mounts_in_progress; -static kcondvar_t mount_cv; -static kmutex_t mount_lock; const char * const zone_default_initname = "/sbin/init"; static char * const zone_prefix = "/zone/"; @@ -430,17 +423,20 @@ static const int ZONE_SYSCALL_API_VERSION = 6; /* * Certain filesystems (such as NFS and autofs) need to know which zone * the mount is being placed in. Because of this, we need to be able to - * ensure that a zone isn't in the process of being created such that - * nfs_mount() thinks it is in the global zone, while by the time it - * gets added the list of mounted zones, it ends up on zoneA's mount - * list. + * ensure that a zone isn't in the process of being created/destroyed such + * that nfs_mount() thinks it is in the global/NGZ zone, while by the time + * it gets added the list of mounted zones, it ends up on the wrong zone's + * mount list. Since a zone can't reside on an NFS file system, we don't + * have to worry about the zonepath itself. * * The following functions: block_mounts()/resume_mounts() and * mount_in_progress()/mount_completed() are used by zones and the VFS - * layer (respectively) to synchronize zone creation and new mounts. + * layer (respectively) to synchronize zone state transitions and new + * mounts within a zone. This syncronization is on a per-zone basis, so + * activity for one zone will not interfere with activity for another zone. * * The semantics are like a reader-reader lock such that there may - * either be multiple mounts (or zone creations, if that weren't + * either be multiple mounts (or zone state transitions, if that weren't * serialized by zonehash_lock) in progress at the same time, but not * both. * @@ -448,10 +444,8 @@ static const int ZONE_SYSCALL_API_VERSION = 6; * taking too long. * * The semantics are such that there is unfair bias towards the - * "current" operation. This means that zone creations may starve if - * there is a rapid succession of new mounts coming in to the system, or - * there is a remote possibility that zones will be created at such a - * rate that new mounts will not be able to proceed. + * "current" operation. This means that zone halt may starve if + * there is a rapid succession of new mounts coming in to the zone. */ /* * Prevent new mounts from progressing to the point of calling @@ -459,7 +453,7 @@ static const int ZONE_SYSCALL_API_VERSION = 6; * them to complete. */ static int -block_mounts(void) +block_mounts(zone_t *zp) { int retval = 0; @@ -468,19 +462,21 @@ block_mounts(void) * called with zonehash_lock held. */ ASSERT(MUTEX_NOT_HELD(&zonehash_lock)); - mutex_enter(&mount_lock); - while (mounts_in_progress > 0) { - if (cv_wait_sig(&mount_cv, &mount_lock) == 0) + mutex_enter(&zp->zone_mount_lock); + while (zp->zone_mounts_in_progress > 0) { + if (cv_wait_sig(&zp->zone_mount_cv, &zp->zone_mount_lock) == 0) goto signaled; } /* * A negative value of mounts_in_progress indicates that mounts - * have been blocked by (-mounts_in_progress) different callers. + * have been blocked by (-mounts_in_progress) different callers + * (remotely possible if two threads enter zone_shutdown at the same + * time). */ - mounts_in_progress--; + zp->zone_mounts_in_progress--; retval = 1; signaled: - mutex_exit(&mount_lock); + mutex_exit(&zp->zone_mount_lock); return (retval); } @@ -489,26 +485,26 @@ signaled: * Allow them to progress if we were the last obstacle. */ static void -resume_mounts(void) +resume_mounts(zone_t *zp) { - mutex_enter(&mount_lock); - if (++mounts_in_progress == 0) - cv_broadcast(&mount_cv); - mutex_exit(&mount_lock); + mutex_enter(&zp->zone_mount_lock); + if (++zp->zone_mounts_in_progress == 0) + cv_broadcast(&zp->zone_mount_cv); + mutex_exit(&zp->zone_mount_lock); } /* - * The VFS layer is busy with a mount; zones should wait until all - * mounts are completed to progress. + * The VFS layer is busy with a mount; this zone should wait until all + * of its mounts are completed to progress. */ void -mount_in_progress(void) +mount_in_progress(zone_t *zp) { - mutex_enter(&mount_lock); - while (mounts_in_progress < 0) - cv_wait(&mount_cv, &mount_lock); - mounts_in_progress++; - mutex_exit(&mount_lock); + mutex_enter(&zp->zone_mount_lock); + while (zp->zone_mounts_in_progress < 0) + cv_wait(&zp->zone_mount_cv, &zp->zone_mount_lock); + zp->zone_mounts_in_progress++; + mutex_exit(&zp->zone_mount_lock); } /* @@ -516,12 +512,12 @@ mount_in_progress(void) * callers if this is the last mount. */ void -mount_completed(void) +mount_completed(zone_t *zp) { - mutex_enter(&mount_lock); - if (--mounts_in_progress == 0) - cv_broadcast(&mount_cv); - mutex_exit(&mount_lock); + mutex_enter(&zp->zone_mount_lock); + if (--zp->zone_mounts_in_progress == 0) + cv_broadcast(&zp->zone_mount_cv); + mutex_exit(&zp->zone_mount_lock); } /* @@ -4410,7 +4406,7 @@ zone_create(const char *zone_name, const char *zone_root, return (zone_create_error(error, 0, extended_error)); } - if (block_mounts() == 0) { + if (block_mounts(zone) == 0) { mutex_enter(&pp->p_lock); if (curthread != pp->p_agenttp) continuelwps(pp); @@ -4561,7 +4557,7 @@ zone_create(const char *zone_name, const char *zone_root, /* * The zone is fully visible, so we can let mounts progress. */ - resume_mounts(); + resume_mounts(zone); if (rctls) nvlist_free(rctls); @@ -4577,7 +4573,7 @@ errout: continuelwps(pp); mutex_exit(&pp->p_lock); - resume_mounts(); + resume_mounts(zone); if (rctls) nvlist_free(rctls); /* @@ -4732,15 +4728,6 @@ zone_shutdown(zoneid_t zoneid) if (zoneid < MIN_USERZONEID || zoneid > MAX_ZONEID) return (set_errno(EINVAL)); - /* - * Block mounts so that VFS_MOUNT() can get an accurate view of - * the zone's status with regards to ZONE_IS_SHUTTING down. - * - * e.g. NFS can fail the mount if it determines that the zone - * has already begun the shutdown sequence. - */ - if (block_mounts() == 0) - return (set_errno(EINTR)); mutex_enter(&zonehash_lock); /* * Look for zone under hash lock to prevent races with other @@ -4748,9 +4735,30 @@ zone_shutdown(zoneid_t zoneid) */ if ((zone = zone_find_all_by_id(zoneid)) == NULL) { mutex_exit(&zonehash_lock); - resume_mounts(); return (set_errno(EINVAL)); } + + /* + * We have to drop zonehash_lock before calling block_mounts. + * Hold the zone so we can continue to use the zone_t. + */ + zone_hold(zone); + mutex_exit(&zonehash_lock); + + /* + * Block mounts so that VFS_MOUNT() can get an accurate view of + * the zone's status with regards to ZONE_IS_SHUTTING down. + * + * e.g. NFS can fail the mount if it determines that the zone + * has already begun the shutdown sequence. + * + */ + if (block_mounts(zone) == 0) { + zone_rele(zone); + return (set_errno(EINTR)); + } + + mutex_enter(&zonehash_lock); mutex_enter(&zone_status_lock); status = zone_status_get(zone); /* @@ -4759,7 +4767,8 @@ zone_shutdown(zoneid_t zoneid) if (status < ZONE_IS_READY) { mutex_exit(&zone_status_lock); mutex_exit(&zonehash_lock); - resume_mounts(); + resume_mounts(zone); + zone_rele(zone); return (set_errno(EINVAL)); } /* @@ -4769,7 +4778,8 @@ zone_shutdown(zoneid_t zoneid) if (status >= ZONE_IS_DOWN) { mutex_exit(&zone_status_lock); mutex_exit(&zonehash_lock); - resume_mounts(); + resume_mounts(zone); + zone_rele(zone); return (0); } /* @@ -4804,10 +4814,9 @@ zone_shutdown(zoneid_t zoneid) } } } - zone_hold(zone); /* so we can use the zone_t later */ mutex_exit(&zone_status_lock); mutex_exit(&zonehash_lock); - resume_mounts(); + resume_mounts(zone); if (error = zone_empty(zone)) { zone_rele(zone); diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 7e567c0cdb..7aab7c8d32 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -600,6 +600,14 @@ typedef struct zone { * DTrace-private per-zone state */ int zone_dtrace_getf; /* # of unprivileged getf()s */ + + /* + * Synchronization primitives used to synchronize between mounts and + * zone creation/destruction. + */ + int zone_mounts_in_progress; + kcondvar_t zone_mount_cv; + kmutex_t zone_mount_lock; } zone_t; /* @@ -819,8 +827,8 @@ extern int zone_dataset_visible(const char *, int *); extern int zone_kadmin(int, int, const char *, cred_t *); extern void zone_shutdown_global(void); -extern void mount_in_progress(void); -extern void mount_completed(void); +extern void mount_in_progress(zone_t *); +extern void mount_completed(zone_t *); extern int zone_walk(int (*)(zone_t *, void *), void *); -- 2.11.4.GIT