From 81d43577d1b5e76e6016ba642ecc1a76fde43021 Mon Sep 17 00:00:00 2001 From: Jerry Jelinek Date: Wed, 16 Jul 2014 10:31:40 -0700 Subject: [PATCH] 5004 load average should be virtualized for zones Reviewed by: Robert Mustacchi Reviewed by: Igor Kozhukhov Reviewed by: Garrett D'Amore Approved by: Dan McDonald --- usr/src/uts/common/os/clock.c | 5 ++ usr/src/uts/common/os/kstat_fr.c | 9 +--- usr/src/uts/common/os/zone.c | 94 +++++++++++++++++++++++++++++++++ usr/src/uts/common/sys/zone.h | 9 ++++ usr/src/uts/common/syscall/getloadavg.c | 14 +---- 5 files changed, 111 insertions(+), 20 deletions(-) diff --git a/usr/src/uts/common/os/clock.c b/usr/src/uts/common/os/clock.c index 70286146d9..3421ae28d1 100644 --- a/usr/src/uts/common/os/clock.c +++ b/usr/src/uts/common/os/clock.c @@ -67,6 +67,7 @@ #include #include #include +#include /* * for NTP support @@ -1149,6 +1150,10 @@ loadavg_update() } while ((cpupart = cpupart->cp_next) != cp_list_head); + /* + * Third pass totals up per-zone statistics. + */ + zone_loadavg_update(); } /* diff --git a/usr/src/uts/common/os/kstat_fr.c b/usr/src/uts/common/os/kstat_fr.c index 31ba702346..3f055e6cde 100644 --- a/usr/src/uts/common/os/kstat_fr.c +++ b/usr/src/uts/common/os/kstat_fr.c @@ -806,7 +806,6 @@ system_misc_kstat_update(kstat_t *ksp, int rw) { int myncpus = ncpus; int *loadavgp = &avenrun[0]; - int loadavg[LOADAVG_NSTATS]; time_t zone_boot_time; clock_t zone_lbolt; hrtime_t zone_hrtime; @@ -823,17 +822,11 @@ system_misc_kstat_update(kstat_t *ksp, int rw) */ mutex_enter(&cpu_lock); if (pool_pset_enabled()) { - psetid_t mypsid = zone_pset_get(curproc->p_zone); - int error; - myncpus = zone_ncpus_get(curproc->p_zone); ASSERT(myncpus > 0); - error = cpupart_get_loadavg(mypsid, &loadavg[0], - LOADAVG_NSTATS); - ASSERT(error == 0); - loadavgp = &loadavg[0]; } mutex_exit(&cpu_lock); + loadavgp = &curproc->p_zone->zone_avenrun[0]; } if (INGLOBALZONE(curproc)) { diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index 84467a6659..444b455e5e 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -1841,6 +1841,10 @@ zone_misc_kstat_update(kstat_t *ksp, int rw) scalehrtime(&tmp); zmp->zm_wtime.value.ui64 = tmp; + zmp->zm_avenrun1.value.ui32 = zone->zone_avenrun[0]; + zmp->zm_avenrun5.value.ui32 = zone->zone_avenrun[1]; + zmp->zm_avenrun15.value.ui32 = zone->zone_avenrun[2]; + return (0); } @@ -1870,6 +1874,10 @@ zone_misc_kstat_create(zone_t *zone) kstat_named_init(&zmp->zm_utime, "nsec_user", KSTAT_DATA_UINT64); kstat_named_init(&zmp->zm_stime, "nsec_sys", KSTAT_DATA_UINT64); kstat_named_init(&zmp->zm_wtime, "nsec_waitrq", KSTAT_DATA_UINT64); + kstat_named_init(&zmp->zm_avenrun1, "avenrun_1min", KSTAT_DATA_UINT32); + kstat_named_init(&zmp->zm_avenrun5, "avenrun_5min", KSTAT_DATA_UINT32); + kstat_named_init(&zmp->zm_avenrun15, "avenrun_15min", + KSTAT_DATA_UINT32); ksp->ks_update = zone_misc_kstat_update; ksp->ks_private = zone; @@ -3050,6 +3058,92 @@ zone_find_by_path(const char *path) } /* + * Public interface for updating per-zone load averages. Called once per + * second. + * + * Based on loadavg_update(), genloadavg() and calcloadavg() from clock.c. + */ +void +zone_loadavg_update() +{ + zone_t *zp; + zone_status_t status; + struct loadavg_s *lavg; + hrtime_t zone_total; + int i; + hrtime_t hr_avg; + int nrun; + static int64_t f[3] = { 135, 27, 9 }; + int64_t q, r; + + mutex_enter(&zonehash_lock); + for (zp = list_head(&zone_active); zp != NULL; + zp = list_next(&zone_active, zp)) { + mutex_enter(&zp->zone_lock); + + /* Skip zones that are on the way down or not yet up */ + status = zone_status_get(zp); + if (status < ZONE_IS_READY || status >= ZONE_IS_DOWN) { + /* For all practical purposes the zone doesn't exist. */ + mutex_exit(&zp->zone_lock); + continue; + } + + /* + * Update the 10 second moving average data in zone_loadavg. + */ + lavg = &zp->zone_loadavg; + + zone_total = zp->zone_utime + zp->zone_stime + zp->zone_wtime; + scalehrtime(&zone_total); + + /* The zone_total should always be increasing. */ + lavg->lg_loads[lavg->lg_cur] = (zone_total > lavg->lg_total) ? + zone_total - lavg->lg_total : 0; + lavg->lg_cur = (lavg->lg_cur + 1) % S_LOADAVG_SZ; + /* lg_total holds the prev. 1 sec. total */ + lavg->lg_total = zone_total; + + /* + * To simplify the calculation, we don't calculate the load avg. + * until the zone has been up for at least 10 seconds and our + * moving average is thus full. + */ + if ((lavg->lg_len + 1) < S_LOADAVG_SZ) { + lavg->lg_len++; + mutex_exit(&zp->zone_lock); + continue; + } + + /* Now calculate the 1min, 5min, 15 min load avg. */ + hr_avg = 0; + for (i = 0; i < S_LOADAVG_SZ; i++) + hr_avg += lavg->lg_loads[i]; + hr_avg = hr_avg / S_LOADAVG_SZ; + nrun = hr_avg / (NANOSEC / LGRP_LOADAVG_IN_THREAD_MAX); + + /* Compute load avg. See comment in calcloadavg() */ + for (i = 0; i < 3; i++) { + q = (zp->zone_hp_avenrun[i] >> 16) << 7; + r = (zp->zone_hp_avenrun[i] & 0xffff) << 7; + zp->zone_hp_avenrun[i] += + ((nrun - q) * f[i] - ((r * f[i]) >> 16)) >> 4; + + /* avenrun[] can only hold 31 bits of load avg. */ + if (zp->zone_hp_avenrun[i] < + ((uint64_t)1<<(31+16-FSHIFT))) + zp->zone_avenrun[i] = (int32_t) + (zp->zone_hp_avenrun[i] >> (16 - FSHIFT)); + else + zp->zone_avenrun[i] = 0x7fffffff; + } + + mutex_exit(&zp->zone_lock); + } + mutex_exit(&zonehash_lock); +} + +/* * Get the number of cpus visible to this zone. The system-wide global * 'ncpus' is returned if pools are disabled, the caller is in the * global zone, or a NULL zone argument is passed in. diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 18c2a9ef4b..6d6d324843 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -321,6 +321,7 @@ typedef struct zone_net_data { * libraries which may be defining ther own versions. */ #include +#include #define GLOBAL_ZONEUNIQID 0 /* uniqid of the global zone */ @@ -383,6 +384,9 @@ typedef struct { kstat_named_t zm_utime; kstat_named_t zm_stime; kstat_named_t zm_wtime; + kstat_named_t zm_avenrun1; + kstat_named_t zm_avenrun5; + kstat_named_t zm_avenrun15; } zone_misc_kstat_t; typedef struct zone { @@ -565,6 +569,10 @@ typedef struct zone { uint64_t zone_utime; /* total user time */ uint64_t zone_wtime; /* total time waiting in runq */ + struct loadavg_s zone_loadavg; /* loadavg for this zone */ + uint64_t zone_hp_avenrun[3]; /* high-precision avenrun */ + int zone_avenrun[3]; /* FSCALED avg. run queue len */ + /* * DTrace-private per-zone state */ @@ -604,6 +612,7 @@ extern zoneid_t getzoneid(void); extern zone_t *zone_find_by_id_nolock(zoneid_t); extern int zone_datalink_walk(zoneid_t, int (*)(datalink_id_t, void *), void *); extern int zone_check_datalink(zoneid_t *, datalink_id_t); +extern void zone_loadavg_update(); /* * Zone-specific data (ZSD) APIs diff --git a/usr/src/uts/common/syscall/getloadavg.c b/usr/src/uts/common/syscall/getloadavg.c index c669f9b8ba..0f44064e90 100644 --- a/usr/src/uts/common/syscall/getloadavg.c +++ b/usr/src/uts/common/syscall/getloadavg.c @@ -22,10 +22,9 @@ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2011 Joyent, Inc. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include #include @@ -41,7 +40,6 @@ int getloadavg(int *buf, int nelem) { int *loadbuf = &avenrun[0]; - int loadavg[LOADAVG_NSTATS]; int error; if (nelem < 0) @@ -50,15 +48,7 @@ getloadavg(int *buf, int nelem) nelem = LOADAVG_NSTATS; if (!INGLOBALZONE(curproc)) { - mutex_enter(&cpu_lock); - if (pool_pset_enabled()) { - psetid_t psetid = zone_pset_get(curproc->p_zone); - - error = cpupart_get_loadavg(psetid, &loadavg[0], nelem); - ASSERT(error == 0); /* pset isn't going anywhere */ - loadbuf = &loadavg[0]; - } - mutex_exit(&cpu_lock); + loadbuf = &curproc->p_zone->zone_avenrun[0]; } error = copyout(loadbuf, buf, nelem * sizeof (avenrun[0])); -- 2.11.4.GIT