From 8cdef6cbcc73174e567af3341631ec8fa492796a Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 5 Dec 2016 16:49:04 -0800 Subject: [PATCH] kernel - Increase worst-case maximum exec rate * The pid reuse algorithm limits the maximum fork rate. This limit was set too low. Increase the limit from 10000/sec to 100000/sec. Currently our opteron maxes out at 43000/sec. Note that with 999999 pids and a 10-second mandatory reuse time floor there isn't much of a point increasing the limit beyond 100000/sec. 100,000/sec. Currently our opteron maxes out at around 43,000/sec (vfork/exec/wait3/exit of a small static binary). * The domain reuse array was increased to 1MB to accomodate this change. In addition, update the array in a cache-friendly manner. * Modify test/sysperf/exec1 to take a nprocesses argument for the timing run. --- sys/kern/kern_proc.c | 29 ++++++++++++++++++++++------- test/sysperf/exec1.c | 35 ++++++++++++++++++++++++++++------- 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 66886a6a8f..bbd00188ef 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -74,7 +74,9 @@ */ #define PIDDOM_COUNT 10 /* 10 pids per domain - reduce array size */ #define PIDDOM_DELAY 10 /* min 10 seconds after exit before reuse */ -#define PIDSEL_DOMAINS (PID_MAX / PIDDOM_COUNT / ALLPROC_HSIZE * ALLPROC_HSIZE) +#define PIDDOM_SCALE 10 /* (10,000*SCALE)/sec performance guarantee */ +#define PIDSEL_DOMAINS (PID_MAX * PIDDOM_SCALE / PIDDOM_COUNT / \ + ALLPROC_HSIZE * ALLPROC_HSIZE) /* Used by libkvm */ int allproc_hsize = ALLPROC_HSIZE; @@ -120,11 +122,14 @@ static procglob_t procglob[ALLPROC_HSIZE]; * using that to skip-over the domain on-allocate. * * This array has to be fairly large to support a high fork/exec rate. - * We want ~100,000 entries or so to support a 10-second reuse latency - * at 10,000 execs/second, worst case. Best-case multiply by PIDDOM_COUNT + * A ~100,000 entry array will support a 10-second reuse latency at + * 10,000 execs/second, worst case. Best-case multiply by PIDDOM_COUNT * (approximately 100,000 execs/second). + * + * Currently we allocate around a megabyte, making the worst-case fork + * rate around 100,000/second. */ -static uint8_t pid_doms[PIDSEL_DOMAINS]; /* ~100,000 entries */ +static uint8_t *pid_doms; /* * Random component to nextpid generation. We mix in a random factor to make @@ -173,6 +178,13 @@ procinit(void) u_long i; /* + * Allocate dynamically. This array can be large (~1MB) so don't + * waste boot loader space. + */ + pid_doms = kmalloc(sizeof(pid_doms[0]) * PIDSEL_DOMAINS, + M_PROC, M_WAITOK | M_ZERO); + + /* * Avoid unnecessary stalls due to pid_doms[] values all being * the same. Make sure that the allocation of pid 1 and pid 2 * succeeds. @@ -589,7 +601,8 @@ pgrel(struct pgrp *pgrp) * Successful 1->0 transition, pghash_spin is held. */ LIST_REMOVE(pgrp, pg_list); - pid_doms[pgrp->pg_id % PIDSEL_DOMAINS] = (uint8_t)time_second; + if (pid_doms[pgrp->pg_id % PIDSEL_DOMAINS] != (uint8_t)time_second) + pid_doms[pgrp->pg_id % PIDSEL_DOMAINS] = (uint8_t)time_second; /* * Reset any sigio structures pointing to us as a result of @@ -843,7 +856,8 @@ sess_rele(struct session *sess) * Successful 1->0 transition and tty_token is held. */ LIST_REMOVE(sess, s_list); - pid_doms[sess->s_sid % PIDSEL_DOMAINS] = (uint8_t)time_second; + if (pid_doms[sess->s_sid % PIDSEL_DOMAINS] != (uint8_t)time_second) + pid_doms[sess->s_sid % PIDSEL_DOMAINS] = (uint8_t)time_second; if (sess->s_ttyp && sess->s_ttyp->t_session) { #ifdef TTY_DO_FULL_CLOSE @@ -1135,7 +1149,8 @@ proc_remove_zombie(struct proc *p) LIST_REMOVE(p, p_list); /* from remove master list */ LIST_REMOVE(p, p_sibling); /* and from sibling list */ p->p_pptr = NULL; - pid_doms[p->p_pid % PIDSEL_DOMAINS] = (uint8_t)time_second; + if (pid_doms[p->p_pid % PIDSEL_DOMAINS] != (uint8_t)time_second) + pid_doms[p->p_pid % PIDSEL_DOMAINS] = (uint8_t)time_second; lwkt_reltoken(&prg->proc_token); } diff --git a/test/sysperf/exec1.c b/test/sysperf/exec1.c index 3a4c74a265..5c44fb5ebc 100644 --- a/test/sysperf/exec1.c +++ b/test/sysperf/exec1.c @@ -8,6 +8,7 @@ #include #include #include +#include char *Av0; @@ -24,7 +25,6 @@ execltest(void) ++elm; if ((pid = vfork()) == 0) { - setpriority(PRIO_PROCESS, getpid(), -20); execl(Av0, elm, "dummy", NULL); _exit(1); } else if (pid < 0) { @@ -33,7 +33,7 @@ execltest(void) } else { int status; - while(waitpid(pid, &status, 0) != pid) + while (waitpid(pid, &status, 0) != pid) ; if (WEXITSTATUS(status)) { fprintf(stderr, "execl in child failed\n"); @@ -47,10 +47,19 @@ main(int ac, char **av) { int i; int count; + int status; + int ncpus; + int n; + long *countr; + + countr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_ANON|MAP_SHARED, -1, 0); Av0 = av[0]; - if (ac == 2) + if (ac == 2 && strcmp(av[1], "dummy") == 0) exit(0); + ncpus = 1; + if (ac > 1) + ncpus = strtol(av[1], NULL, 0); count = 0; start_timing(); @@ -61,12 +70,24 @@ main(int ac, char **av) } count *= 5; /* 5 second run */ start_timing(); - for (i = 0; i < count; ++i) - execltest(); + for (n = 0; n < ncpus; ++n) { + if (fork() == 0) { + count = 0; + while (get_timing() < 5000000) { + execltest(); + ++count; + stop_timing(0, NULL); + } + atomic_add_long(countr, count); + _exit(0); + } + } + while (wait3(&status, 0, NULL) >= 0 || errno == EINTR) + ; #ifdef ISSTATIC - stop_timing(count, "execl static program:", count); + stop_timing(*countr, "execl static program:"); #else - stop_timing(count, "execl dynamic program:", count); + stop_timing(*countr, "execl dynamic program:"); #endif return(0); } -- 2.11.4.GIT