From e7e1189fb188fbaff70fbfd50b4ecde079c824e6 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 5 Jul 2017 09:07:06 -0700 Subject: [PATCH] kernel - Automatically downscasle NPROC resource limit * Downscale the NPROC resource limit based on fork and chroot depth, up to 50%, and also make the limit apply to root processes. This is intended to be a poor-man's safety, preventing run-away (root or other) process creation from completely imploding a system. * Each level of fork() downscales the NPROC resource limit by 1/3%, capped at 32 levels (~10%) * Each chroot (including that made by a jail) downscales the NPROC resource limit by 10%, up to 40%. --- sys/kern/kern_fork.c | 23 +++++++++++++++++------ sys/kern/kern_plimit.c | 32 ++++++++++++++++++++++++++++++++ sys/kern/vfs_syscalls.c | 7 +++++++ sys/sys/proc.h | 3 ++- sys/sys/resourcevar.h | 1 + 5 files changed, 59 insertions(+), 7 deletions(-) diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 949b870341..6c3c241c08 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -384,19 +384,24 @@ fork1(struct lwp *lp1, int flags, struct proc **procp) atomic_add_int(&nprocs, 1); /* - * Increment the count of procs running with this uid. Don't allow - * a nonprivileged user to exceed their current limit. + * Increment the count of procs running with this uid. This also + * applies to root. */ ok = chgproccnt(lp1->lwp_thread->td_ucred->cr_ruidinfo, 1, - (uid != 0) ? p1->p_rlimit[RLIMIT_NPROC].rlim_cur : 0); + plimit_getadjvalue(RLIMIT_NPROC)); if (!ok) { /* * Back out the process count */ atomic_add_int(&nprocs, -1); - if (ppsratecheck(&lastfail, &curfail, 1)) - kprintf("maxproc limit exceeded by uid %d, please " - "see tuning(7) and login.conf(5).\n", uid); + if (ppsratecheck(&lastfail, &curfail, 1)) { + kprintf("maxproc limit of %jd " + "exceeded by \"%s\" uid %d, " + "please see tuning(7) and login.conf(5).\n", + plimit_getadjvalue(RLIMIT_NPROC), + p1->p_comm, + uid); + } tsleep(&forksleep, 0, "fork", hz / 2); error = EAGAIN; goto done; @@ -541,6 +546,12 @@ fork1(struct lwp *lp1, int flags, struct proc **procp) p2->p_limit = plimit_fork(p1); /* + * Adjust depth for resource downscaling + */ + if ((p2->p_depth & 31) != 31) + ++p2->p_depth; + + /* * Preserve some more flags in subprocess. P_PROFIL has already * been preserved. */ diff --git a/sys/kern/kern_plimit.c b/sys/kern/kern_plimit.c index f5d38a5f33..696c77f31e 100644 --- a/sys/kern/kern_plimit.c +++ b/sys/kern/kern_plimit.c @@ -519,3 +519,35 @@ plimit_copy(struct plimit *olimit, struct plimit *nlimit) nlimit->p_exclusive = 0; } +/* + * This routine returns the value of a resource, downscaled based on + * the processes fork depth and chroot depth (up to 50%). This mechanism + * is designed to prevent run-aways from blowing up unrelated processes + * running under the same UID. + * + * NOTE: Currently only applicable to RLIMIT_NPROC. We could also limit + * file descriptors but we shouldn't have to as these are allocated + * dynamically. + */ +u_int64_t +plimit_getadjvalue(int i) +{ + struct proc *p = curproc; + struct plimit *limit; + uint64_t v; + uint32_t depth; + + limit = p->p_limit; + v = limit->pl_rlimit[i].rlim_cur; + if (i == RLIMIT_NPROC) { + /* + * 10% per chroot (around 1/3% per fork depth), with a + * maximum of 50% downscaling of the resource limit. + */ + depth = p->p_depth; + if (depth > 32 * 5) + depth = 32 * 5; + v -= v * depth / 320; + } + return v; +} diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index c5c50bd607..787bec0f1d 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1836,6 +1836,7 @@ kern_chroot(struct nchandle *nch) error = checkvp_chdir(vp, td); vn_unlock(vp); /* leave reference intact */ if (error == 0) { + lwkt_gettoken(&p->p_token); vrele(fdp->fd_rdir); fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ cache_drop(&fdp->fd_nrdir); @@ -1845,6 +1846,12 @@ kern_chroot(struct nchandle *nch) vref(fdp->fd_jdir); cache_copy(nch, &fdp->fd_njdir); } + if ((p->p_flags & P_DIDCHROOT) == 0) { + p->p_flags |= P_DIDCHROOT; + if (p->p_depth <= 65535 - 32) + p->p_depth += 32; + } + lwkt_reltoken(&p->p_token); } else { vrele(vp); } diff --git a/sys/sys/proc.h b/sys/sys/proc.h index a1b85c6195..71a37010e7 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -310,6 +310,7 @@ struct proc { struct rtprio p_rtprio; /* Realtime priority. */ struct pargs *p_args; u_short p_xstat; /* Exit status or last stop signal */ + u_short p_depth; /* Used to downscale resource limits */ int p_ionice; void *p_unused02; @@ -384,7 +385,7 @@ struct proc { #define P_SIGVTALRM 0x02000000 /* signal SIGVTALRM pending due to itimer */ #define P_SIGPROF 0x04000000 /* signal SIGPROF pending due to itimer */ #define P_INEXEC 0x08000000 /* Process is in execve(). */ -#define P_UNUSED28 0x10000000 +#define P_DIDCHROOT 0x10000000 /* Did at least one chroot */ #define P_UNUSED29 0x20000000 #define P_XCPU 0x40000000 /* SIGXCPU */ diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h index c7b9fd0471..6b9b89e327 100644 --- a/sys/sys/resourcevar.h +++ b/sys/sys/resourcevar.h @@ -133,6 +133,7 @@ void uihashinit (void); void plimit_init0(struct plimit *); struct plimit *plimit_fork(struct proc *); +u_int64_t plimit_getadjvalue(int i); void plimit_lwp_fork(struct proc *); int plimit_testcpulimit(struct plimit *, u_int64_t); void plimit_modify(struct proc *, int, struct rlimit *); -- 2.11.4.GIT