From 77bc82e1e8344e7ddfdbfbd121e60efb1fbfa686 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 29 Jul 2017 10:30:25 -0700 Subject: [PATCH] kernel - Add a sampling history mechanism called kcollect (2) * Add collection code for remaining base statistics. * Round-up some calculations. --- sys/kern/kern_clock.c | 59 +++++++++++++++++++++++++++++- sys/kern/kern_collect.c | 12 ++++++ sys/kern/kern_synch.c | 4 +- sys/kern/vfs_nlookup.c | 35 ++++++++++++++++++ sys/sys/kcollect.h | 46 +++++++++++++++++------ sys/vm/swap_pager.c | 33 ++++++++++++++++- sys/vm/vm_meter.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 270 insertions(+), 16 deletions(-) diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index 7bf2bc1c99..6b423c286f 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -85,12 +85,14 @@ #include #include #include -#include #include +#include +#include + +#include #include #include #include -#include #include #include @@ -349,6 +351,49 @@ initclocks_pcpu(void) } /* + * Called on a 10-second interval after the system is operational. + * Return the collection data for USERPCT and install the data for + * SYSTPCT and IDLEPCT. + */ +static +uint64_t +collect_cputime_callback(int n) +{ + static long cpu_base[CPUSTATES]; + long cpu_states[CPUSTATES]; + long total; + long acc; + long lsb; + + bzero(cpu_states, sizeof(cpu_states)); + for (n = 0; n < ncpus; ++n) { + cpu_states[CP_USER] += cputime_percpu[n].cp_user; + cpu_states[CP_NICE] += cputime_percpu[n].cp_nice; + cpu_states[CP_SYS] += cputime_percpu[n].cp_sys; + cpu_states[CP_INTR] += cputime_percpu[n].cp_intr; + cpu_states[CP_IDLE] += cputime_percpu[n].cp_idle; + } + + acc = 0; + for (n = 0; n < CPUSTATES; ++n) { + total = cpu_states[n] - cpu_base[n]; + cpu_base[n] = cpu_states[n]; + cpu_states[n] = total; + acc += total; + } + if (acc == 0) /* prevent degenerate divide by 0 */ + acc = 1; + lsb = acc / (10000 * 2); + kcollect_setvalue(KCOLLECT_SYSTPCT, + (cpu_states[CP_SYS] + lsb) * 10000 / acc); + kcollect_setvalue(KCOLLECT_IDLEPCT, + (cpu_states[CP_IDLE] + lsb) * 10000 / acc); + kcollect_setvalue(KCOLLECT_INTRPCT, + (cpu_states[CP_INTR] + lsb) * 10000 / acc); + return((cpu_states[CP_USER] + cpu_states[CP_NICE] + lsb) * 10000 / acc); +} + +/* * This routine is called on just the BSP, just after SMP initialization * completes to * finish initializing any clocks that might contend/block * (e.g. like on a token). We can't do this in initclocks_pcpu() because @@ -387,6 +432,16 @@ initclocks_other(void *dummy) NULL, ESTCPUFREQ); } lwkt_setcpu_self(ogd); + + /* + * Regular data collection + */ + kcollect_register(KCOLLECT_USERPCT, "user", collect_cputime_callback, + KCOLLECT_SCALE(KCOLLECT_USERPCT_FORMAT, 0)); + kcollect_register(KCOLLECT_SYSTPCT, "syst", NULL, + KCOLLECT_SCALE(KCOLLECT_SYSTPCT_FORMAT, 0)); + kcollect_register(KCOLLECT_IDLEPCT, "idle", NULL, + KCOLLECT_SCALE(KCOLLECT_IDLEPCT_FORMAT, 0)); } SYSINIT(clocks2, SI_BOOT2_POST_SMP, SI_ORDER_ANY, initclocks_other, NULL); diff --git a/sys/kern/kern_collect.c b/sys/kern/kern_collect.c index cd754ffb96..85fcdc75b0 100644 --- a/sys/kern/kern_collect.c +++ b/sys/kern/kern_collect.c @@ -130,6 +130,18 @@ kcollect_setvalue(int n, uint64_t value) } } +/* + * Callback to change scale adjustment, if necessary. Certain statistics + * have scale info available (such as KCOLLECT_SWAPANO and SWAPCAC). + */ +void +kcollect_setscale(int n, uint64_t value) +{ + if (n >= 0 && n < KCOLLECT_ENTRIES) { + kcollect_scale.data[n] = value; + } +} + static void kcollect_thread(void *dummy) diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 5ca105ad58..6023baf172 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -1272,7 +1272,9 @@ loadav_count_runnable(struct lwp *lp, void *data) static uint64_t collect_load_callback(int n) { - return (averunnable.ldavg[0] * 100 / averunnable.fscale); + int fscale = averunnable.fscale; + + return ((averunnable.ldavg[0] * 100 + (fscale >> 1)) / fscale); } /* ARGSUSED */ diff --git a/sys/kern/vfs_nlookup.c b/sys/kern/vfs_nlookup.c index 3230cdf5a3..12e995730d 100644 --- a/sys/kern/vfs_nlookup.c +++ b/sys/kern/vfs_nlookup.c @@ -64,6 +64,7 @@ #include #include #include +#include #ifdef KTRACE #include @@ -1476,3 +1477,37 @@ naccess_va(struct vattr *va, int nflags, struct ucred *cred) return(0); } +/* + * Long-term (10-second interval) statistics collection + */ +static +uint64_t +collect_nlookup_callback(int n) +{ + static uint64_t last_total; + uint64_t save; + uint64_t total; + + total = 0; + for (n = 0; n < ncpus; ++n) { + globaldata_t gd = globaldata_find(n); + struct nchstats *sp; + + if ((sp = gd->gd_nchstats) != NULL) + total += sp->ncs_longhits + sp->ncs_longmiss; + } + save = total; + total = total - last_total; + last_total = save; + + return total; +} + +static +void +nlookup_collect_init(void *dummy __unused) +{ + kcollect_register(KCOLLECT_NLOOKUP, "nlookup", collect_nlookup_callback, + KCOLLECT_SCALE(KCOLLECT_NLOOKUP_FORMAT, 0)); +} +SYSINIT(collect_nlookup, SI_SUB_PROP, SI_ORDER_ANY, nlookup_collect_init, 0); diff --git a/sys/sys/kcollect.h b/sys/sys/kcollect.h index d3fe82b9c9..05a20ee453 100644 --- a/sys/sys/kcollect.h +++ b/sys/sys/kcollect.h @@ -35,34 +35,55 @@ typedef struct { #define KCOLLECT_USERPCT 1 /* whole machine user % */ #define KCOLLECT_SYSTPCT 2 /* whole machine sys % */ #define KCOLLECT_IDLEPCT 3 /* whole machine idle % */ -#define KCOLLECT_SWAPPCT 4 /* total swap used % */ -#define KCOLLECT_SWAPANO 5 /* anonymous swap used MB */ -#define KCOLLECT_SWAPCAC 6 /* swapcache swap used MB */ -#define KCOLLECT_VMFAULT 7 /* all vm faults incl zero-fill */ -#define KCOLLECT_ZFILL 8 /* zero-fill faults */ -#define KCOLLECT_MEMFRE 9 /* amount of free memory, bytes */ -#define KCOLLECT_MEMCAC 10 /* amount of almost free memory */ -#define KCOLLECT_MEMINA 11 /* amount of inactive memory */ -#define KCOLLECT_MEMACT 12 /* amount of active memory */ -#define KCOLLECT_MEMWIR 13 /* amount of wired/kernel memory */ - -#define KCOLLECT_DYNAMIC_START 16 /* dynamic entries */ +#define KCOLLECT_INTRPCT 4 /* whole machine intr % (or other) */ +#define KCOLLECT_SWAPPCT 5 /* total swap used % */ +#define KCOLLECT_SWAPANO 6 /* anonymous swap used MB */ +#define KCOLLECT_SWAPCAC 7 /* swapcache swap used MB */ + +#define KCOLLECT_VMFAULT 8 /* all vm faults incl zero-fill */ +#define KCOLLECT_COWFAULT 9 /* all vm faults incl zero-fill */ +#define KCOLLECT_ZFILL 10 /* zero-fill faults */ + +#define KCOLLECT_MEMFRE 11 /* amount of free memory, bytes */ +#define KCOLLECT_MEMCAC 12 /* amount of almost free memory */ +#define KCOLLECT_MEMINA 13 /* amount of inactive memory */ +#define KCOLLECT_MEMACT 14 /* amount of active memory */ +#define KCOLLECT_MEMWIR 15 /* amount of wired/kernel memory */ + +#define KCOLLECT_SYSCALLS 16 /* system calls */ +#define KCOLLECT_NLOOKUP 17 /* path lookups */ + +#define KCOLLECT_INTR 18 /* nominal external interrupts */ +#define KCOLLECT_IPI 19 /* inter-cpu interrupts */ +#define KCOLLECT_TIMER 20 /* timer interrupts */ + +#define KCOLLECT_DYNAMIC_START 24 /* dynamic entries */ #define KCOLLECT_LOAD_FORMAT '2' /* N.NN (modulo 100) */ #define KCOLLECT_USERPCT_FORMAT 'p' /* percentage of single cpu x 100 */ #define KCOLLECT_SYSTPCT_FORMAT 'p' /* percentage of single cpu x 100 */ #define KCOLLECT_IDLEPCT_FORMAT 'p' /* percentage of single cpu x 100 */ + #define KCOLLECT_SWAPPCT_FORMAT 'p' /* percentage of single cpu x 100 */ #define KCOLLECT_SWAPANO_FORMAT 'm' /* in megabytes (1024*1024) */ #define KCOLLECT_SWAPCAC_FORMAT 'm' /* in megabytes (1024*1024) */ + #define KCOLLECT_VMFAULT_FORMAT 'c' /* count over period */ +#define KCOLLECT_COWFAULT_FORMAT 'c' /* count over period */ #define KCOLLECT_ZFILL_FORMAT 'c' /* count over period */ + #define KCOLLECT_MEMFRE_FORMAT 'b' /* total bytes */ #define KCOLLECT_MEMCAC_FORMAT 'b' /* total bytes */ #define KCOLLECT_MEMINA_FORMAT 'b' /* total bytes */ #define KCOLLECT_MEMACT_FORMAT 'b' /* total bytes */ #define KCOLLECT_MEMWIR_FORMAT 'b' /* total bytes */ +#define KCOLLECT_SYSCALLS_FORMAT 'c' /* count over period */ +#define KCOLLECT_NLOOKUP_FORMAT 'c' /* count over period */ +#define KCOLLECT_INTR_FORMAT 'c' /* count over period */ +#define KCOLLECT_IPI_FORMAT 'c' /* count over period */ +#define KCOLLECT_TIMER_FORMAT 'c' /* count over period */ + #define KCOLLECT_SCALE(fmt, scale) ((fmt) | ((uint64_t)(scale) << 8)) #define KCOLLECT_GETFMT(scale) ((char)(scale)) #define KCOLLECT_GETSCALE(scale) ((scale) >> 8) @@ -75,6 +96,7 @@ int kcollect_register(int which, const char *id, kcallback_t func, uint64_t scale); void kcollect_unregister(int n); void kcollect_setvalue(int n, uint64_t value); +void kcollect_setscale(int n, uint64_t value); #endif diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 934c565f96..c52ec2f217 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -106,7 +106,7 @@ #include #include #include -#include +#include #include #include "opt_swap.h" @@ -120,6 +120,7 @@ #include #include +#include #include #include @@ -328,6 +329,30 @@ swp_sizecheck(void) } /* + * Long-term data collection on 10-second interval. Return the value + * for KCOLLECT_SWAPPCT and set the values for SWAPANO and SWAPCCAC. + * + * Return total swap in the scale field. This can change if swap is + * regularly added or removed and may cause some historical confusion + * in that case, but SWAPPCT will always be historically accurate. + */ +static uint64_t +collect_swap_callback(int n) +{ + uint64_t total = vm_swap_size; + uint64_t anon = vm_swap_anon_use; + uint64_t cache = vm_swap_cache_use; + + if (total == 0) /* avoid divide by zero */ + total = 1; + kcollect_setvalue(KCOLLECT_SWAPANO, anon * PAGE_SIZE); + kcollect_setvalue(KCOLLECT_SWAPCAC, cache * PAGE_SIZE); + kcollect_setscale(KCOLLECT_SWAPANO, total); + kcollect_setscale(KCOLLECT_SWAPCAC, total); + return (((anon + cache) * 10000 + (total >> 1)) / total); +} + +/* * SWAP_PAGER_INIT() - initialize the swap pager! * * Expected to be started from system init. NOTE: This code is run @@ -339,6 +364,12 @@ swp_sizecheck(void) static void swap_pager_init(void *arg __unused) { + kcollect_register(KCOLLECT_SWAPPCT, "swapuse", collect_swap_callback, + KCOLLECT_SCALE(KCOLLECT_SWAPPCT_FORMAT, 0)); + kcollect_register(KCOLLECT_SWAPANO, "swapmem", NULL, + KCOLLECT_SCALE(KCOLLECT_SWAPANO_FORMAT, 0)); + kcollect_register(KCOLLECT_SWAPCAC, "swapcsh", NULL, + KCOLLECT_SCALE(KCOLLECT_SWAPCAC_FORMAT, 0)); } SYSINIT(vm_mem, SI_BOOT1_VM, SI_ORDER_THIRD, swap_pager_init, NULL); diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index 919fd4512c..9be8788d03 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -442,6 +443,74 @@ do_vmmeter_pcpu(SYSCTL_HANDLER_ARGS) } /* + * Callback for long-term slow data collection on 10-second interval. + * + * Return faults, set data for other entries. + */ +static uint64_t +collect_vmstats_callback(int n) +{ + static struct vmmeter last_vmm; + struct vmmeter cur_vmm; + const int boffset = offsetof(struct vmmeter, vmmeter_uint_begin); + const int eoffset = offsetof(struct vmmeter, vmmeter_uint_end); + uint64_t total; + + /* + * The hardclock already rolls up vmstats for us. + */ + kcollect_setvalue(KCOLLECT_MEMFRE, vmstats.v_free_count); + kcollect_setvalue(KCOLLECT_MEMCAC, vmstats.v_cache_count); + kcollect_setvalue(KCOLLECT_MEMINA, vmstats.v_inactive_count); + kcollect_setvalue(KCOLLECT_MEMACT, vmstats.v_active_count); + kcollect_setvalue(KCOLLECT_MEMWIR, vmstats.v_wire_count); + + /* + * Collect pcpu statistics for things like faults. + */ + bzero(&cur_vmm, sizeof(cur_vmm)); + for (n = 0; n < ncpus; ++n) { + struct globaldata *gd = globaldata_find(n); + int off; + + for (off = boffset; off <= eoffset; off += sizeof(u_int)) { + *(u_int *)((char *)&cur_vmm + off) += + *(u_int *)((char *)&gd->gd_cnt + off); + } + + } + + total = cur_vmm.v_cow_faults - last_vmm.v_cow_faults; + last_vmm.v_cow_faults = cur_vmm.v_cow_faults; + kcollect_setvalue(KCOLLECT_COWFAULT, total); + + total = cur_vmm.v_zfod - last_vmm.v_zfod; + last_vmm.v_zfod = cur_vmm.v_zfod; + kcollect_setvalue(KCOLLECT_ZFILL, total); + + total = cur_vmm.v_syscall - last_vmm.v_syscall; + last_vmm.v_syscall = cur_vmm.v_syscall; + kcollect_setvalue(KCOLLECT_SYSCALLS, total); + + total = cur_vmm.v_intr - last_vmm.v_intr; + last_vmm.v_intr = cur_vmm.v_intr; + kcollect_setvalue(KCOLLECT_INTR, total); + + total = cur_vmm.v_ipi - last_vmm.v_ipi; + last_vmm.v_ipi = cur_vmm.v_ipi; + kcollect_setvalue(KCOLLECT_IPI, total); + + total = cur_vmm.v_timer - last_vmm.v_timer; + last_vmm.v_timer = cur_vmm.v_timer; + kcollect_setvalue(KCOLLECT_TIMER, total); + + total = cur_vmm.v_vm_faults - last_vmm.v_vm_faults; + last_vmm.v_vm_faults = cur_vmm.v_vm_faults; + + return total; +} + +/* * Called from the low level boot code only. */ static void @@ -468,6 +537,34 @@ vmmeter_init(void *dummy __unused) gd, sizeof(struct vmmeter), do_vmmeter_pcpu, "S,vmmeter", "System per-cpu statistics"); } + kcollect_register(KCOLLECT_VMFAULT, "fault", collect_vmstats_callback, + KCOLLECT_SCALE(KCOLLECT_VMFAULT_FORMAT, + vmstats.v_page_count)); + kcollect_register(KCOLLECT_COWFAULT, "cow", NULL, + KCOLLECT_SCALE(KCOLLECT_COWFAULT_FORMAT, 0)); + kcollect_register(KCOLLECT_ZFILL, "zfill", NULL, + KCOLLECT_SCALE(KCOLLECT_ZFILL_FORMAT, 0)); + + kcollect_register(KCOLLECT_MEMFRE, "free", NULL, + KCOLLECT_SCALE(KCOLLECT_MEMFRE_FORMAT, 0)); + kcollect_register(KCOLLECT_MEMCAC, "cache", NULL, + KCOLLECT_SCALE(KCOLLECT_MEMCAC_FORMAT, 0)); + kcollect_register(KCOLLECT_MEMINA, "inact", NULL, + KCOLLECT_SCALE(KCOLLECT_MEMINA_FORMAT, 0)); + kcollect_register(KCOLLECT_MEMACT, "act", NULL, + KCOLLECT_SCALE(KCOLLECT_MEMACT_FORMAT, 0)); + kcollect_register(KCOLLECT_MEMWIR, "wired", NULL, + KCOLLECT_SCALE(KCOLLECT_MEMWIR_FORMAT, 0)); + + kcollect_register(KCOLLECT_SYSCALLS, "syscalls", NULL, + KCOLLECT_SCALE(KCOLLECT_SYSCALLS_FORMAT, 0)); + + kcollect_register(KCOLLECT_INTR, "intr", NULL, + KCOLLECT_SCALE(KCOLLECT_INTR_FORMAT, 0)); + kcollect_register(KCOLLECT_IPI, "ipi", NULL, + KCOLLECT_SCALE(KCOLLECT_IPI_FORMAT, 0)); + kcollect_register(KCOLLECT_TIMER, "timer", NULL, + KCOLLECT_SCALE(KCOLLECT_TIMER_FORMAT, 0)); } SYSINIT(vmmeter, SI_SUB_PSEUDO, SI_ORDER_ANY, vmmeter_init, 0); -- 2.11.4.GIT