From 0a4a9c77c8050921de07a2085cd6de5fe83db343 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 2 Jul 2009 12:02:17 -0700 Subject: [PATCH] file descriptor enumeration - Fix races Add a soft refrence count, fd_softrefs, to struct filedesc. This is used to prevent the filedesc from being ripped out from under a file descriptor scan via the allproc scan, which is used to enumerate file descriptors and also used by the revoke code. Reported-by: hasso --- sys/kern/kern_descrip.c | 63 +++++++++++++++++++++++++++++++++++++++++++------ sys/kern/kern_exec.c | 3 +-- sys/kern/kern_exit.c | 3 +-- sys/kern/kern_fork.c | 6 ++--- sys/kern/vfs_aio.c | 3 +-- sys/sys/filedesc.h | 3 ++- 6 files changed, 63 insertions(+), 18 deletions(-) diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 6c500fe83f..e7bcc96e59 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -1319,10 +1319,19 @@ fdrevoke_proc_callback(struct proc *p, void *vinfo) } /* - * Locate and close any matching file descriptors. + * Softref the fdp to prevent it from being destroyed */ - if ((fdp = p->p_fd) == NULL) + spin_lock_wr(&p->p_spin); + if ((fdp = p->p_fd) == NULL) { + spin_unlock_wr(&p->p_spin); return(0); + } + atomic_add_int(&fdp->fd_softrefs, 1); + spin_unlock_wr(&p->p_spin); + + /* + * Locate and close any matching file descriptors. + */ spin_lock_wr(&fdp->fd_spin); for (n = 0; n < fdp->fd_nfiles; ++n) { if ((fp = fdp->fd_files[n].fp) == NULL) @@ -1337,6 +1346,7 @@ fdrevoke_proc_callback(struct proc *p, void *vinfo) } } spin_unlock_wr(&fdp->fd_spin); + atomic_subtract_int(&fdp->fd_softrefs, 1); return(0); } @@ -1789,9 +1799,9 @@ again: * NOT MPSAFE (MPSAFE for refs > 1, but the final cleanup code is not MPSAFE) */ void -fdfree(struct proc *p) +fdfree(struct proc *p, struct filedesc *repl) { - struct filedesc *fdp = p->p_fd; + struct filedesc *fdp; struct fdnode *fdnode; int i; struct filedesc_to_leader *fdtol; @@ -1799,12 +1809,22 @@ fdfree(struct proc *p) struct vnode *vp; struct flock lf; - /* Certain daemons might not have file descriptors. */ + /* + * Interlock against an allproc scan operations (typically frevoke). + */ + spin_lock_wr(&p->p_spin); + fdp = p->p_fd; + p->p_fd = repl; + spin_unlock_wr(&p->p_spin); + + /* + * Certain daemons might not have file descriptors. + */ if (fdp == NULL) return; /* - * Severe messing around to follow + * Severe messing around to follow. */ spin_lock_wr(&fdp->fd_spin); @@ -1885,6 +1905,20 @@ fdfree(struct proc *p) spin_unlock_wr(&fdp->fd_spin); /* + * Wait for any softrefs to go away. This race rarely occurs so + * we can use a non-critical-path style poll/sleep loop. The + * race only occurs against allproc scans. + * + * No new softrefs can occur with the fdp disconnected from the + * process. + */ + if (fdp->fd_softrefs) { + kprintf("pid %d: Warning, fdp race avoided\n", p->p_pid); + while (fdp->fd_softrefs) + tsleep(&fdp->fd_softrefs, 0, "fdsoft", 1); + } + + /* * we are the last reference to the structure, we can * safely assume it will not change out from under us. */ @@ -2560,8 +2594,22 @@ sysctl_kern_file_callback(struct proc *p, void *data) return(0); if (!PRISON_CHECK(info->req->td->td_proc->p_ucred, p->p_ucred) != 0) return(0); - if ((fdp = p->p_fd) == NULL) + + /* + * Softref the fdp to prevent it from being destroyed + */ + spin_lock_wr(&p->p_spin); + if ((fdp = p->p_fd) == NULL) { + spin_unlock_wr(&p->p_spin); return(0); + } + atomic_add_int(&fdp->fd_softrefs, 1); + spin_unlock_wr(&p->p_spin); + + /* + * The fdp's own spinlock prevents the contents from being + * modified. + */ spin_lock_rd(&fdp->fd_spin); for (n = 0; n < fdp->fd_nfiles; ++n) { if ((fp = fdp->fd_files[n].fp) == NULL) @@ -2579,6 +2627,7 @@ sysctl_kern_file_callback(struct proc *p, void *data) } } spin_unlock_rd(&fdp->fd_spin); + atomic_subtract_int(&fdp->fd_softrefs, 1); if (info->error) return(-1); return(0); diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 3092ced8ac..a3cdcfc4f1 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -329,8 +329,7 @@ interpret: struct filedesc *tmp; tmp = fdcopy(p); - fdfree(p); - p->p_fd = tmp; + fdfree(p, tmp); } /* diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 0975726d76..8d75ace0a9 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -347,8 +347,7 @@ exit1(int rv) * Close open files and release open-file table. * This may block! */ - fdfree(p); - p->p_fd = NULL; + fdfree(p, NULL); if(p->p_leader->p_peers) { q = p->p_leader; diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index bd7fb5c734..405128cd49 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -252,8 +252,7 @@ fork1(struct lwp *lp1, int flags, struct proc **procp) if (flags & RFCFDG) { struct filedesc *fdtmp; fdtmp = fdinit(p1); - fdfree(p1); - p1->p_fd = fdtmp; + fdfree(p1, fdtmp); } /* @@ -263,8 +262,7 @@ fork1(struct lwp *lp1, int flags, struct proc **procp) if (p1->p_fd->fd_refcnt > 1) { struct filedesc *newfd; newfd = fdcopy(p1); - fdfree(p1); - p1->p_fd = newfd; + fdfree(p1, newfd); } } *procp = NULL; diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index 851e7d38a6..48a2a6f0ba 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -659,8 +659,7 @@ aio_daemon(void *uproc, struct trapframe *frame) * filedescriptors, except as temporarily inherited from the client. * Credentials are also cloned, and made equivalent to "root". */ - fdfree(mycp); - mycp->p_fd = NULL; + fdfree(mycp, NULL); cr = cratom(&mycp->p_ucred); cr->cr_uid = 0; uireplace(&cr->cr_uidinfo, uifind(0)); diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index c3deef5f3d..764c273398 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -91,6 +91,7 @@ struct filedesc { int fd_freefile; /* approx. next free file */ int fd_cmask; /* mask for file creation */ int fd_refcnt; /* reference count */ + int fd_softrefs; /* softrefs to prevent destruction */ int fd_knlistsize; /* size of knlist */ struct klist *fd_knlist; /* list of attached knotes */ @@ -169,7 +170,7 @@ void fdinit_bootstrap(struct proc *p0, struct filedesc *fdp0, int cmask); struct filedesc *fdinit (struct proc *p); struct filedesc *fdshare (struct proc *p); struct filedesc *fdcopy (struct proc *p); -void fdfree (struct proc *p); +void fdfree (struct proc *p, struct filedesc *repl); int fdrevoke(void *f_data, short f_type, struct ucred *cred); int closef (struct file *fp, struct proc *p); void fdcloseexec (struct proc *p); -- 2.11.4.GIT