From 8f1f6170ad2220eb4fbd447009459d4ea74e5b44 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 8 May 2008 01:26:01 +0000 Subject: [PATCH] Recode the resource limit core (struct plimit) to fix a few races and generally make it work better. Also make changes with an eye towards making it MPSAFE. --- sys/kern/init_main.c | 4 +- sys/kern/kern_acct.c | 4 +- sys/kern/kern_exit.c | 4 +- sys/kern/kern_fork.c | 9 +- sys/kern/kern_plimit.c | 252 ++++++++++++++++++++++++++++++++----------------- sys/sys/resourcevar.h | 17 ++-- 6 files changed, 188 insertions(+), 102 deletions(-) diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 4ed14f28f7..4beadcb5b3 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -40,7 +40,7 @@ * * @(#)init_main.c 8.9 (Berkeley) 1/21/94 * $FreeBSD: src/sys/kern/init_main.c,v 1.134.2.8 2003/06/06 20:21:32 tegge Exp $ - * $DragonFly: src/sys/kern/init_main.c,v 1.84 2008/01/06 16:55:51 swildner Exp $ + * $DragonFly: src/sys/kern/init_main.c,v 1.85 2008/05/08 01:26:00 dillon Exp $ */ #include "opt_init_path.h" @@ -66,6 +66,7 @@ #include #include #include +#include #include @@ -159,6 +160,7 @@ mi_proc0init(struct globaldata *gd, struct user *proc0paddr) lwkt_init_thread(&thread0, proc0paddr, LWKT_THREAD_STACK, 0, gd); lwkt_set_comm(&thread0, "thread0"); RB_INIT(&proc0.p_lwp_tree); + spin_init(&proc0.p_spin); proc0.p_lasttid = 0; /* +1 = next TID */ lwp_rb_tree_RB_INSERT(&proc0.p_lwp_tree, &lwp0); lwp0.lwp_thread = &thread0; diff --git a/sys/kern/kern_acct.c b/sys/kern/kern_acct.c index 518ea479c6..4856779567 100644 --- a/sys/kern/kern_acct.c +++ b/sys/kern/kern_acct.c @@ -38,7 +38,7 @@ * * @(#)kern_acct.c 8.1 (Berkeley) 6/14/93 * $FreeBSD: src/sys/kern/kern_acct.c,v 1.23.2.1 2002/07/24 18:33:55 johan Exp $ - * $DragonFly: src/sys/kern/kern_acct.c,v 1.27 2007/01/01 22:51:17 corecode Exp $ + * $DragonFly: src/sys/kern/kern_acct.c,v 1.28 2008/05/08 01:26:00 dillon Exp $ */ #include @@ -254,7 +254,7 @@ acct_process(struct proc *p) */ rlim.rlim_cur = RLIM_INFINITY; rlim.rlim_max = RLIM_INFINITY; - plimit_modify(&p->p_limit, RLIMIT_FSIZE, &rlim); + plimit_modify(p, RLIMIT_FSIZE, &rlim); /* * Write the accounting information to the file. diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index f337c19072..156c2881d5 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -37,7 +37,7 @@ * * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94 * $FreeBSD: src/sys/kern/kern_exit.c,v 1.92.2.11 2003/01/13 22:51:16 dillon Exp $ - * $DragonFly: src/sys/kern/kern_exit.c,v 1.89 2008/04/01 18:06:34 nth Exp $ + * $DragonFly: src/sys/kern/kern_exit.c,v 1.90 2008/05/08 01:26:00 dillon Exp $ */ #include "opt_compat.h" @@ -529,7 +529,7 @@ exit1(int rv) * * Other substructures are freed from wait(). */ - plimit_free(&p->p_limit); + plimit_free(p); /* * Release the current user process designation on the process so diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index efb2e314c1..3f7bd6e556 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -37,7 +37,7 @@ * * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 * $FreeBSD: src/sys/kern/kern_fork.c,v 1.72.2.14 2003/06/26 04:15:10 silby Exp $ - * $DragonFly: src/sys/kern/kern_fork.c,v 1.74 2008/04/28 07:07:01 dillon Exp $ + * $DragonFly: src/sys/kern/kern_fork.c,v 1.75 2008/05/08 01:26:00 dillon Exp $ */ #include "opt_ktrace.h" @@ -68,6 +68,7 @@ #include #include #include +#include static MALLOC_DEFINE(M_ATFORK, "atfork", "atfork callback"); @@ -181,6 +182,7 @@ sys_lwp_create(struct lwp_create_args *uap) if (error) goto fail2; + plimit_lwp_fork(p); /* force exclusive access */ lp = lwp_fork(curthread->td_lwp, p, RFPROC); error = cpu_prepare_lwp(lp, ¶ms); if (params.tid1 != NULL && @@ -191,7 +193,7 @@ sys_lwp_create(struct lwp_create_args *uap) goto fail; /* - * Now schedule the new lwp. + * Now schedule the new lwp. */ p->p_usched->resetpriority(lp); crit_enter(); @@ -346,6 +348,7 @@ fork1(struct lwp *lp1, int flags, struct proc **procp) } RB_INIT(&p2->p_lwp_tree); + spin_init(&p2->p_spin); p2->p_lasttid = -1; /* first tid will be 0 */ /* @@ -429,7 +432,7 @@ fork1(struct lwp *lp1, int flags, struct proc **procp) } } p2->p_fdtol = fdtol; - p2->p_limit = plimit_fork(p1->p_limit); + p2->p_limit = plimit_fork(p1); /* * Preserve some more flags in subprocess. P_PROFIL has already diff --git a/sys/kern/kern_plimit.c b/sys/kern/kern_plimit.c index 009e144176..a1d7307a30 100644 --- a/sys/kern/kern_plimit.c +++ b/sys/kern/kern_plimit.c @@ -66,7 +66,7 @@ * * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 * $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $ - * $DragonFly: src/sys/kern/kern_plimit.c,v 1.2 2006/09/05 00:55:45 dillon Exp $ + * $DragonFly: src/sys/kern/kern_plimit.c,v 1.3 2008/05/08 01:26:00 dillon Exp $ */ #include @@ -84,8 +84,7 @@ #include -static struct plimit *plimit_copy(struct plimit *olimit); -static void plimit_exclusive(struct plimit **limitp); +static void plimit_copy(struct plimit *olimit, struct plimit *nlimit); /* * Initialize proc0's plimit structure. All later plimit structures @@ -116,114 +115,172 @@ plimit_init0(struct plimit *limit) /* * Return a plimit for use by a new forked process given the one - * contained in the parent process. p_exclusive will be set if - * the parent process has more then one thread, requiring a copy. - * Otherwise we can just inherit a reference. + * contained in the parent process. * * MPSAFE */ struct plimit * -plimit_fork(struct plimit *limit) +plimit_fork(struct proc *p1) { - if (limit->p_exclusive) { - limit = plimit_copy(limit); - } else { - spin_lock_wr(&limit->p_spin); - ++limit->p_refcnt; - spin_unlock_wr(&limit->p_spin); - } - return(limit); -} + struct plimit *olimit = p1->p_limit; + struct plimit *nlimit = NULL; + struct plimit *rlimit; -/* - * This routine is called when the second LWP is created for a process. - * The process's limit structure must be made exclusive and a copy is - * made if necessary. - * - * If the refcnt is 1 only the LWPs associated with the caller's process - * have access to the structure, and since all we do is set the exclusive - * but we don't need a spinlock. - * - * MPSAFE - */ -static -void -plimit_exclusive(struct plimit **limitp) -{ - struct plimit *olimit = *limitp; - struct plimit *nlimit; + /* + * If we are exclusive (but not threaded-exclusive), but have only + * one reference, we can convert the structure to copy-on-write + * again. + * + * If we were threaded but are no longer threaded we can do the same + * thing. + */ + if (olimit->p_exclusive == 1) { + KKASSERT(olimit->p_refcnt == 1); + olimit->p_exclusive = 0; + } else if (olimit->p_exclusive == 2 && p1->p_nthreads == 1) { + KKASSERT(olimit->p_refcnt == 1); + olimit->p_exclusive = 0; + } - if (olimit->p_refcnt == 1) { - olimit->p_exclusive = 1; - } else { - nlimit = plimit_copy(olimit); - nlimit->p_exclusive = 1; - *limitp = nlimit; - spin_lock_wr(&olimit->p_spin); - if (--olimit->p_refcnt == 0) { - spin_unlock_wr(&olimit->p_spin); - kfree(olimit, M_SUBPROC); + /* + * Take a short-cut that requires limited spin locks. If we aren't + * exclusive we will not be threaded and we can just bump the ref + * count. If that is true and we also have only one ref then there + * can be no other accessors. + */ + if (olimit->p_exclusive == 0) { + if (olimit->p_refcnt == 1) { + ++olimit->p_refcnt; } else { + spin_lock_wr(&olimit->p_spin); + ++olimit->p_refcnt; spin_unlock_wr(&olimit->p_spin); } + return(olimit); + } + + /* + * Full-blown code-up. + */ + nlimit = NULL; + spin_lock_wr(&olimit->p_spin); + + for (;;) { + if (olimit->p_exclusive == 0) { + ++olimit->p_refcnt; + rlimit = olimit; + break; + } + if (nlimit) { + plimit_copy(olimit, nlimit); + rlimit = nlimit; + nlimit = NULL; + break; + } + spin_unlock_wr(&olimit->p_spin); + nlimit = kmalloc(sizeof(*nlimit), M_SUBPROC, M_WAITOK); + spin_lock_wr(&olimit->p_spin); } + spin_unlock_wr(&olimit->p_spin); + if (nlimit) + kfree(nlimit, M_SUBPROC); + return(rlimit); } /* - * Return a copy of the passed plimit. The returned copy will have a refcnt - * of 1 and p_exclusive will be cleared. + * This routine is called when a new LWP is created for a process. We + * must force exclusivity (=2) so p->p_limit remains stable. * - * MPSAFE + * LWPs share the same process structure so this does not bump refcnt. */ -static -struct plimit * -plimit_copy(struct plimit *olimit) +void +plimit_lwp_fork(struct proc *p) { - struct plimit *nlimit; - - nlimit = kmalloc(sizeof(struct plimit), M_SUBPROC, M_WAITOK); - - spin_lock_rd(&olimit->p_spin); - *nlimit = *olimit; - spin_unlock_rd(&olimit->p_spin); + struct plimit *olimit; - spin_init(&nlimit->p_spin); - nlimit->p_refcnt = 1; - nlimit->p_exclusive = 0; - return (nlimit); + for (;;) { + olimit = p->p_limit; + if (olimit->p_exclusive == 2) { + KKASSERT(olimit->p_refcnt == 1); + break; + } + if (olimit->p_refcnt == 1) { + olimit->p_exclusive = 2; + break; + } + plimit_modify(p, -1, NULL); + } } /* * This routine is called to fixup a proces's p_limit structure prior - * to it being modified. If index >= 0 the specified modified is also + * to it being modified. If index >= 0 the specified modification is also * made. * - * A limit structure is potentially shared only if the process has exactly - * one LWP, otherwise it is guarenteed to be exclusive and no copy needs - * to be made. This means that we can safely replace *limitp in the copy - * case. + * This routine must make the limit structure exclusive. A later fork + * will convert it back to copy-on-write if possible. * - * We call plimit_exclusive() to do all the hard work, but the result does - * not actually have to be exclusive since the original was not, so just - * clear p_exclusive afterwords. + * We can count on p->p_limit being stable since if we had created any + * threads it will have already been made exclusive (=2). * * MPSAFE */ void -plimit_modify(struct plimit **limitp, int index, struct rlimit *rlim) +plimit_modify(struct proc *p, int index, struct rlimit *rlim) { - struct plimit *limit = *limitp; + struct plimit *olimit; + struct plimit *nlimit; + struct plimit *rlimit; - if (limit->p_exclusive == 0 && limit->p_refcnt > 1) { - plimit_exclusive(limitp); - limit = *limitp; - limit->p_exclusive = 0; + /* + * Shortcut. If we are not threaded we may be able to trivially + * set the structure to exclusive access without needing to acquire + * any spinlocks. The p_limit structure will be stable. + */ + olimit = p->p_limit; + if (p->p_nthreads == 1) { + if (olimit->p_exclusive == 0 && olimit->p_refcnt == 1) + olimit->p_exclusive = 1; + if (olimit->p_exclusive) { + if (index >= 0) + p->p_limit->pl_rlimit[index] = *rlim; + return; + } } - if (index >= 0) { - spin_lock_wr(&limit->p_spin); - limit->pl_rlimit[index] = *rlim; - spin_unlock_wr(&limit->p_spin); + + /* + * Full-blown code-up. Make a copy if we aren't exclusive. If + * we have only one ref we can safely convert the structure to + * exclusive without copying. + */ + nlimit = NULL; + spin_lock_wr(&olimit->p_spin); + + for (;;) { + if (olimit->p_refcnt == 1) { + if (olimit->p_exclusive == 0) + olimit->p_exclusive = 1; + rlimit = olimit; + break; + } + KKASSERT(olimit->p_exclusive == 0); + if (nlimit) { + plimit_copy(olimit, nlimit); + nlimit->p_exclusive = 1; + p->p_limit = nlimit; + rlimit = nlimit; + nlimit = NULL; + break; + } + spin_unlock_wr(&olimit->p_spin); + nlimit = kmalloc(sizeof(*nlimit), M_SUBPROC, M_WAITOK); + spin_lock_wr(&olimit->p_spin); } + if (index >= 0) + rlimit->pl_rlimit[index] = *rlim; + spin_unlock_wr(&olimit->p_spin); + if (nlimit) + kfree(nlimit, M_SUBPROC); } /* @@ -232,19 +289,24 @@ plimit_modify(struct plimit **limitp, int index, struct rlimit *rlim) * MPSAFE */ void -plimit_free(struct plimit **limitp) +plimit_free(struct proc *p) { struct plimit *limit; - if ((limit = *limitp) != NULL) { - *limitp = NULL; + if ((limit = p->p_limit) != NULL) { + p->p_limit = NULL; - spin_lock_wr(&limit->p_spin); - if (--limit->p_refcnt == 0) { - spin_unlock_wr(&limit->p_spin); + if (limit->p_refcnt == 1) { + limit->p_refcnt = -999; kfree(limit, M_SUBPROC); } else { - spin_unlock_wr(&limit->p_spin); + spin_lock_wr(&limit->p_spin); + if (--limit->p_refcnt == 0) { + spin_unlock_wr(&limit->p_spin); + kfree(limit, M_SUBPROC); + } else { + spin_unlock_wr(&limit->p_spin); + } } } } @@ -268,7 +330,7 @@ kern_setrlimit(u_int which, struct rlimit *limp) /* * We will be modifying a resource, make a copy if necessary. */ - plimit_modify(&p->p_limit, -1, NULL); + plimit_modify(p, -1, NULL); limit = p->p_limit; alimp = &limit->pl_rlimit[which]; @@ -422,3 +484,21 @@ plimit_testcpulimit(struct plimit *limit, u_int64_t ttime) return(mode); } +/* + * Helper routine to copy olimit to nlimit and initialize nlimit for + * use. nlimit's reference count will be set to 1 and its exclusive bit + * will be cleared. + * + * MPSAFE + */ +static +void +plimit_copy(struct plimit *olimit, struct plimit *nlimit) +{ + *nlimit = *olimit; + + spin_init(&nlimit->p_spin); + nlimit->p_refcnt = 1; + nlimit->p_exclusive = 0; +} + diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h index 3210609058..d411a13338 100644 --- a/sys/sys/resourcevar.h +++ b/sys/sys/resourcevar.h @@ -32,7 +32,7 @@ * * @(#)resourcevar.h 8.4 (Berkeley) 1/9/95 * $FreeBSD: src/sys/sys/resourcevar.h,v 1.16.2.1 2000/09/07 19:13:55 truckman Exp $ - * $DragonFly: src/sys/sys/resourcevar.h,v 1.15 2007/01/06 03:23:20 dillon Exp $ + * $DragonFly: src/sys/sys/resourcevar.h,v 1.16 2008/05/08 01:26:01 dillon Exp $ */ #ifndef _SYS_RESOURCEVAR_H_ @@ -69,10 +69,10 @@ struct uprof { /* profile arguments */ /* * Kernel shareable process resource limits. Because this structure * is moderately large but changes infrequently, it is normally - * shared copy-on-write after forks. If a group of processes - * ("threads") share modifications, the PL_SHAREMOD flag is set, - * and a copy must be made for the child of a new fork that isn't - * sharing modifications to the limits. + * shared copy-on-write after forks. + * + * Threaded programs force p_exclusive (set it to 2) to prevent the + * proc->p_limit pointer from changing out from under threaded access. */ struct plimit { struct rlimit pl_rlimit[RLIM_NLIMITS]; @@ -118,10 +118,11 @@ void uireplace (struct uidinfo **puip, struct uidinfo *nuip); void uihashinit (void); void plimit_init0(struct plimit *); -struct plimit *plimit_fork(struct plimit *); +struct plimit *plimit_fork(struct proc *); +void plimit_lwp_fork(struct proc *); int plimit_testcpulimit(struct plimit *, u_int64_t); -void plimit_modify(struct plimit **, int, struct rlimit *); -void plimit_free(struct plimit **); +void plimit_modify(struct proc *, int, struct rlimit *); +void plimit_free(struct proc *); #endif -- 2.11.4.GIT