4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
24 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
36 #include <libcontract.h>
40 #include <zonestat_impl.h>
54 #include <sys/acctctl.h>
55 #include <sys/contract/process.h>
58 #include <sys/param.h>
59 #include <sys/priocntl.h>
60 #include <sys/fxpriocntl.h>
61 #include <sys/processor.h>
63 #include <sys/socket.h>
65 #include <sys/statvfs.h>
67 #include <sys/systeminfo.h>
71 #include <sys/types.h>
72 #include <sys/vm_usage.h>
82 #define MAX_PSET_NAME 1024 /* Taken from PV_NAME_MAX_LEN */
83 #define ZSD_PSET_UNLIMITED UINT16_MAX
84 #define ZONESTAT_EXACCT_FILE "/var/adm/exacct/zonestat-process"
87 * zonestatd implements gathering cpu and memory utilization data for
88 * running zones. It has these components:
91 * Door server to respond to client connections. Each client
92 * will connect using libzonestat.so, which will open and
93 * call /var/tmp/.zonestat_door. Each connecting client is given
94 * a file descriptor to the stat server.
96 * The zsd_server also responds to zoneadmd, which reports when a
97 * new zone is booted. This is used to fattach the zsd_server door
101 * Receives client requests for the current utilization data. Each
102 * client request will cause zonestatd to update the current utilization
103 * data by kicking the stat_thread.
105 * If the client is in a non-global zone, the utilization data will
106 * be filtered to only show the given zone. The usage by all other zones
107 * will be added to the system utilization.
110 * The stat thread implements querying the system to determine the
111 * current utilization data for each running zone. This includes
112 * inspecting the system's processor set configuration, as well as details
113 * of each zone, such as their configured limits, and which processor
114 * sets they are running in.
116 * The stat_thread will only update memory utilization data as often as
117 * the configured config/sample_interval on the zones-monitoring service.
121 * The private vmusage structure unfortunately uses size_t types, and assumes
122 * the caller's bitness matches the kernel's bitness. Since the getvmusage()
123 * system call is contracted, and zonestatd is 32 bit, the following structures
124 * are used to interact with a 32bit or 64 bit kernel.
126 typedef struct zsd_vmusage32
{
131 uint32_t vmu_rss_all
;
132 uint32_t vmu_rss_private
;
133 uint32_t vmu_rss_shared
;
134 uint32_t vmu_swap_all
;
135 uint32_t vmu_swap_private
;
136 uint32_t vmu_swap_shared
;
139 typedef struct zsd_vmusage64
{
144 * An amd64 kernel will align the following uint64_t members, but a
145 * 32bit i386 process will not without help.
147 int vmu_align_next_members_on_8_bytes
;
148 uint64_t vmu_rss_all
;
149 uint64_t vmu_rss_private
;
150 uint64_t vmu_rss_shared
;
151 uint64_t vmu_swap_all
;
152 uint64_t vmu_swap_private
;
153 uint64_t vmu_swap_shared
;
158 /* Used to store a zone's usage of a pset */
159 typedef struct zsd_pset_usage
{
160 struct zsd_zone
*zsu_zone
;
161 struct zsd_pset
*zsu_pset
;
163 list_node_t zsu_next
;
166 boolean_t zsu_found
; /* zone bound at end of interval */
167 boolean_t zsu_active
; /* zone was bound during interval */
168 boolean_t zsu_new
; /* zone newly bound in this interval */
169 boolean_t zsu_deleted
; /* zone was unbound in this interval */
170 boolean_t zsu_empty
; /* no procs in pset in this interval */
171 time_t zsu_start
; /* time when zone was found in pset */
172 hrtime_t zsu_hrstart
; /* time when zone was found in pset */
173 uint64_t zsu_cpu_shares
;
174 uint_t zsu_scheds
; /* schedulers found in this pass */
175 timestruc_t zsu_cpu_usage
; /* cpu time used */
178 /* Used to store a pset's utilization */
179 typedef struct zsd_pset
{
181 list_node_t zsp_next
;
182 char zsp_name
[ZS_PSETNAME_MAX
];
184 uint_t zsp_cputype
; /* default, dedicated or shared */
185 boolean_t zsp_found
; /* pset found at end of interval */
186 boolean_t zsp_new
; /* pset new in this interval */
187 boolean_t zsp_deleted
; /* pset deleted in this interval */
188 boolean_t zsp_active
; /* pset existed during interval */
189 boolean_t zsp_empty
; /* no processes in pset */
191 hrtime_t zsp_hrstart
;
193 uint64_t zsp_online
; /* online cpus in interval */
194 uint64_t zsp_size
; /* size in this interval */
195 uint64_t zsp_min
; /* configured min in this interval */
196 uint64_t zsp_max
; /* configured max in this interval */
197 int64_t zsp_importance
; /* configured max in this interval */
199 uint_t zsp_scheds
; /* scheds of processes found in pset */
200 uint64_t zsp_cpu_shares
; /* total shares in this interval */
202 timestruc_t zsp_total_time
;
203 timestruc_t zsp_usage_kern
;
204 timestruc_t zsp_usage_zones
;
206 /* Individual zone usages of pset */
207 list_t zsp_usage_list
;
210 /* Summed kstat values from individual cpus in pset */
211 timestruc_t zsp_idle
;
212 timestruc_t zsp_intr
;
213 timestruc_t zsp_kern
;
214 timestruc_t zsp_user
;
218 /* Used to track an individual cpu's utilization as reported by kstats */
219 typedef struct zsd_cpu
{
220 processorid_t zsc_id
;
221 list_node_t zsc_next
;
223 psetid_t zsc_psetid_prev
;
224 zsd_pset_t
*zsc_pset
;
226 boolean_t zsc_found
; /* cpu online in this interval */
227 boolean_t zsc_onlined
; /* cpu onlined during this interval */
228 boolean_t zsc_offlined
; /* cpu offlined during this interval */
229 boolean_t zsc_active
; /* cpu online during this interval */
230 boolean_t zsc_allocated
; /* True if cpu has ever been found */
232 /* kstats this interval */
233 uint64_t zsc_nsec_idle
;
234 uint64_t zsc_nsec_intr
;
235 uint64_t zsc_nsec_kern
;
236 uint64_t zsc_nsec_user
;
238 /* kstats in most recent interval */
239 uint64_t zsc_nsec_idle_prev
;
240 uint64_t zsc_nsec_intr_prev
;
241 uint64_t zsc_nsec_kern_prev
;
242 uint64_t zsc_nsec_user_prev
;
244 /* Total kstat increases since zonestatd started reading kstats */
245 timestruc_t zsc_idle
;
246 timestruc_t zsc_intr
;
247 timestruc_t zsc_kern
;
248 timestruc_t zsc_user
;
252 /* Used to describe an individual zone and its utilization */
253 typedef struct zsd_zone
{
255 list_node_t zsz_next
;
256 char zsz_name
[ZS_ZONENAME_MAX
];
260 hrtime_t zsz_hrstart
;
262 char zsz_pool
[ZS_POOLNAME_MAX
];
263 char zsz_pset
[ZS_PSETNAME_MAX
];
264 int zsz_default_sched
;
265 /* These are deduced by inspecting processes */
269 boolean_t zsz_new
; /* zone booted during this interval */
270 boolean_t zsz_deleted
; /* halted during this interval */
271 boolean_t zsz_active
; /* running in this interval */
272 boolean_t zsz_empty
; /* no processes in this interval */
273 boolean_t zsz_gone
; /* not installed in this interval */
274 boolean_t zsz_found
; /* Running at end of this interval */
276 uint64_t zsz_cpu_shares
;
277 uint64_t zsz_cpu_cap
;
278 uint64_t zsz_ram_cap
;
279 uint64_t zsz_locked_cap
;
282 uint64_t zsz_cpus_online
;
283 timestruc_t zsz_cpu_usage
; /* cpu time of cpu cap */
284 timestruc_t zsz_cap_time
; /* cpu time of cpu cap */
285 timestruc_t zsz_share_time
; /* cpu time of share of cpu */
286 timestruc_t zsz_pset_time
; /* time of all psets zone is bound to */
288 uint64_t zsz_usage_ram
;
289 uint64_t zsz_usage_locked
;
290 uint64_t zsz_usage_vm
;
292 uint64_t zsz_processes_cap
;
293 uint64_t zsz_lwps_cap
;
294 uint64_t zsz_shm_cap
;
295 uint64_t zsz_shmids_cap
;
296 uint64_t zsz_semids_cap
;
297 uint64_t zsz_msgids_cap
;
298 uint64_t zsz_lofi_cap
;
300 uint64_t zsz_processes
;
311 * Used to track the cpu usage of an individual processes.
313 * zonestatd sweeps /proc each interval and charges the cpu usage of processes.
314 * to their zone. As processes exit, their extended accounting records are
315 * read and the difference of their total and known usage is charged to their
318 * If a process is never seen in /proc, the total usage on its extended
319 * accounting record will be charged to its zone.
321 typedef struct zsd_proc
{
322 list_node_t zspr_next
;
324 psetid_t zspr_psetid
;
325 zoneid_t zspr_zoneid
;
327 timestruc_t zspr_usage
;
330 /* Used to track the overall resource usage of the system */
331 typedef struct zsd_system
{
333 uint64_t zss_ram_total
;
334 uint64_t zss_ram_kern
;
335 uint64_t zss_ram_zones
;
337 uint64_t zss_locked_kern
;
338 uint64_t zss_locked_zones
;
340 uint64_t zss_vm_total
;
341 uint64_t zss_vm_kern
;
342 uint64_t zss_vm_zones
;
344 uint64_t zss_swap_total
;
345 uint64_t zss_swap_used
;
347 timestruc_t zss_idle
;
348 timestruc_t zss_intr
;
349 timestruc_t zss_kern
;
350 timestruc_t zss_user
;
352 timestruc_t zss_cpu_total_time
;
353 timestruc_t zss_cpu_usage_kern
;
354 timestruc_t zss_cpu_usage_zones
;
357 uint64_t zss_processes_max
;
358 uint64_t zss_lwps_max
;
359 uint64_t zss_shm_max
;
360 uint64_t zss_shmids_max
;
361 uint64_t zss_semids_max
;
362 uint64_t zss_msgids_max
;
363 uint64_t zss_lofi_max
;
365 uint64_t zss_processes
;
374 uint64_t zss_ncpus_online
;
379 * A dumping ground for various information and structures used to compute
382 * This structure is used to track the system while clients are connected.
383 * When The first client connects, a zsd_ctl is allocated and configured by
384 * zsd_open(). When all clients disconnect, the zsd_ctl is closed.
386 typedef struct zsd_ctl
{
387 kstat_ctl_t
*zsctl_kstat_ctl
;
389 /* To track extended accounting */
390 int zsctl_proc_fd
; /* Log currently being used */
391 ea_file_t zsctl_proc_eaf
;
392 struct stat64 zsctl_proc_stat
;
394 int zsctl_proc_fd_next
; /* Log file to use next */
395 ea_file_t zsctl_proc_eaf_next
;
396 struct stat64 zsctl_proc_stat_next
;
397 int zsctl_proc_open_next
;
399 /* pool configuration handle */
400 pool_conf_t
*zsctl_pool_conf
;
401 int zsctl_pool_status
;
402 int zsctl_pool_changed
;
404 /* The above usage tacking structures */
405 zsd_system_t
*zsctl_system
;
409 zsd_cpu_t
*zsctl_cpu_array
;
410 zsd_proc_t
*zsctl_proc_array
;
412 /* Various system info */
413 uint64_t zsctl_maxcpuid
;
414 uint64_t zsctl_maxproc
;
415 uint64_t zsctl_kern_bits
;
416 uint64_t zsctl_pagesize
;
418 /* Used to track time available under a cpu cap. */
419 uint64_t zsctl_hrtime
;
420 uint64_t zsctl_hrtime_prev
;
421 timestruc_t zsctl_hrtime_total
;
423 struct timeval zsctl_timeofday
;
425 /* Caches for arrays allocated for use by various system calls */
426 psetid_t
*zsctl_pset_cache
;
427 uint_t zsctl_pset_ncache
;
428 processorid_t
*zsctl_cpu_cache
;
429 uint_t zsctl_cpu_ncache
;
430 zoneid_t
*zsctl_zone_cache
;
431 uint_t zsctl_zone_ncache
;
432 struct swaptable
*zsctl_swap_cache
;
433 uint64_t zsctl_swap_cache_size
;
434 uint64_t zsctl_swap_cache_num
;
435 zsd_vmusage64_t
*zsctl_vmusage_cache
;
436 uint64_t zsctl_vmusage_cache_num
;
438 /* Info about procfs for scanning /proc */
439 struct dirent
*zsctl_procfs_dent
;
440 long zsctl_procfs_dent_size
;
441 pool_value_t
*zsctl_pool_vals
[3];
443 /* Counts on tracked entities */
446 uint_t zsctl_npset_usages
;
450 boolean_t g_open
; /* True if g_ctl is open */
451 int g_hasclient
; /* True if any clients are connected */
454 * The usage cache is updated by the stat_thread, and copied to clients by
455 * the zsd_stat_server. Mutex and cond are to synchronize between the
456 * stat_thread and the stat_server.
458 zs_usage_cache_t
*g_usage_cache
;
459 mutex_t g_usage_cache_lock
;
460 cond_t g_usage_cache_kick
;
461 uint_t g_usage_cache_kickers
;
462 cond_t g_usage_cache_wait
;
463 char *g_usage_cache_buf
;
464 uint_t g_usage_cache_bufsz
;
467 /* fds of door servers */
472 * Starting and current time. Used to throttle memory calculation, and to
473 * mark new zones and psets with their boot and creation time.
488 zsd_warn(const char *fmt
, ...)
492 va_start(alist
, fmt
);
494 (void) fprintf(stderr
, gettext("zonestat: Warning: "));
495 (void) vfprintf(stderr
, fmt
, alist
);
496 (void) fprintf(stderr
, "\n");
502 zsd_error(const char *fmt
, ...)
506 va_start(alist
, fmt
);
508 (void) fprintf(stderr
, gettext("zonestat: Error: "));
509 (void) vfprintf(stderr
, fmt
, alist
);
510 (void) fprintf(stderr
, "\n");
515 /* Turns on extended accounting if not configured externally */
517 zsd_enable_cpu_stats()
519 char *path
= ZONESTAT_EXACCT_FILE
;
520 char oldfile
[MAXPATHLEN
];
521 int ret
, state
= AC_ON
;
525 * Start a new accounting file if accounting not configured
529 res
[0].ar_id
= AC_PROC_PID
;
530 res
[0].ar_state
= AC_ON
;
531 res
[1].ar_id
= AC_PROC_ANCPID
;
532 res
[1].ar_state
= AC_ON
;
533 res
[2].ar_id
= AC_PROC_CPU
;
534 res
[2].ar_state
= AC_ON
;
535 res
[3].ar_id
= AC_PROC_TIME
;
536 res
[3].ar_state
= AC_ON
;
537 res
[4].ar_id
= AC_PROC_ZONENAME
;
538 res
[4].ar_state
= AC_ON
;
539 res
[5].ar_id
= AC_NONE
;
540 res
[5].ar_state
= AC_ON
;
541 if (acctctl(AC_PROC
| AC_RES_SET
, res
, sizeof (res
)) != 0) {
542 zsd_warn(gettext("Unable to set accounting resources"));
545 /* Only set accounting file if none is configured */
546 ret
= acctctl(AC_PROC
| AC_FILE_GET
, oldfile
, sizeof (oldfile
));
550 if (acctctl(AC_PROC
| AC_FILE_SET
, path
, strlen(path
) + 1)
552 zsd_warn(gettext("Unable to set accounting file"));
556 if (acctctl(AC_PROC
| AC_STATE_SET
, &state
, sizeof (state
)) == -1) {
557 zsd_warn(gettext("Unable to enable accounting"));
563 /* Turns off extended accounting if not configured externally */
565 zsd_disable_cpu_stats()
567 char *path
= ZONESTAT_EXACCT_FILE
;
568 int ret
, state
= AC_OFF
;
570 char oldfile
[MAXPATHLEN
];
572 /* If accounting file is externally configured, leave it alone */
573 ret
= acctctl(AC_PROC
| AC_FILE_GET
, oldfile
, sizeof (oldfile
));
574 if (ret
== 0 && strcmp(oldfile
, path
) != 0)
577 res
[0].ar_id
= AC_PROC_PID
;
578 res
[0].ar_state
= AC_OFF
;
579 res
[1].ar_id
= AC_PROC_ANCPID
;
580 res
[1].ar_state
= AC_OFF
;
581 res
[2].ar_id
= AC_PROC_CPU
;
582 res
[2].ar_state
= AC_OFF
;
583 res
[3].ar_id
= AC_PROC_TIME
;
584 res
[3].ar_state
= AC_OFF
;
585 res
[4].ar_id
= AC_PROC_ZONENAME
;
586 res
[4].ar_state
= AC_OFF
;
587 res
[5].ar_id
= AC_NONE
;
588 res
[5].ar_state
= AC_OFF
;
589 if (acctctl(AC_PROC
| AC_RES_SET
, res
, sizeof (res
)) != 0) {
590 zsd_warn(gettext("Unable to clear accounting resources"));
593 if (acctctl(AC_PROC
| AC_FILE_SET
, NULL
, 0) == -1) {
594 zsd_warn(gettext("Unable to clear accounting file"));
597 if (acctctl(AC_PROC
| AC_STATE_SET
, &state
, sizeof (state
)) == -1) {
598 zsd_warn(gettext("Unable to diable accounting"));
607 * If not configured externally, deletes the current extended accounting file
608 * and starts a new one.
610 * Since the stat_thread holds an open handle to the accounting file, it will
611 * read all remaining entries from the old file before switching to
615 zsd_roll_exacct(void)
618 char *path
= ZONESTAT_EXACCT_FILE
;
619 char oldfile
[MAXPATHLEN
];
621 /* If accounting file is externally configured, leave it alone */
622 ret
= acctctl(AC_PROC
| AC_FILE_GET
, oldfile
, sizeof (oldfile
));
623 if (ret
== 0 && strcmp(oldfile
, path
) != 0)
626 if (unlink(path
) != 0)
627 /* Roll it next time */
630 if (acctctl(AC_PROC
| AC_FILE_SET
, path
, strlen(path
) + 1) == -1) {
631 zsd_warn(gettext("Unable to set accounting file"));
637 /* Contract stuff for zone_enter() */
644 fd
= open64(CTFS_ROOT
"/process/template", O_RDWR
);
649 * For now, zoneadmd doesn't do anything with the contract.
650 * Deliver no events, don't inherit, and allow it to be orphaned.
652 err
|= ct_tmpl_set_critical(fd
, 0);
653 err
|= ct_tmpl_set_informative(fd
, 0);
654 err
|= ct_pr_tmpl_set_fatal(fd
, CT_PR_EV_HWERR
);
655 err
|= ct_pr_tmpl_set_param(fd
, CT_PR_PGRPONLY
| CT_PR_REGENT
);
656 if (err
|| ct_tmpl_activate(fd
)) {
665 * Contract stuff for zone_enter()
668 contract_latest(ctid_t
*id
)
674 if ((cfd
= open64(CTFS_ROOT
"/process/latest", O_RDONLY
)) == -1)
677 if ((r
= ct_status_read(cfd
, CTD_COMMON
, &st
)) != 0) {
682 result
= ct_status_get_id(st
);
691 close_on_exec(int fd
)
693 int flags
= fcntl(fd
, F_GETFD
, 0);
694 if ((flags
!= -1) && (fcntl(fd
, F_SETFD
, flags
| FD_CLOEXEC
) != -1))
700 contract_open(ctid_t ctid
, const char *type
, const char *file
, int oflag
)
708 n
= snprintf(path
, PATH_MAX
, CTFS_ROOT
"/%s/%ld/%s", type
, ctid
, file
);
709 if (n
>= sizeof (path
)) {
710 errno
= ENAMETOOLONG
;
714 fd
= open64(path
, oflag
);
716 if (close_on_exec(fd
) == -1) {
727 contract_abandon_id(ctid_t ctid
)
731 fd
= contract_open(ctid
, "all", "ctl", O_WRONLY
);
735 err
= ct_ctl_abandon(fd
);
741 * Attach the zsd_server to a zone. Called for each zone when zonestatd
742 * starts, and for each newly booted zone when zoneadmd contacts the zsd_server
744 * Zone_enter is used to avoid reaching into zone to fattach door.
747 zsd_fattach_zone(zoneid_t zid
, int door
, boolean_t detach_only
)
749 char *path
= ZS_DOOR_PATH
;
750 int fd
, pid
, stat
, tmpl_fd
;
753 if ((tmpl_fd
= init_template()) == -1) {
754 zsd_warn("Unable to init template");
760 (void) ct_tmpl_clear(tmpl_fd
);
762 "Unable to fork to add zonestat to zoneid %d\n"), zid
);
767 (void) ct_tmpl_clear(tmpl_fd
);
768 (void) close(tmpl_fd
);
769 if (zid
!= 0 && zone_enter(zid
) != 0) {
770 if (errno
== EINVAL
) {
775 (void) fdetach(path
);
779 fd
= open(path
, O_CREAT
|O_RDWR
, 0644);
782 if (fattach(door
, path
) != 0)
786 if (contract_latest(&ct
) == -1)
788 (void) ct_tmpl_clear(tmpl_fd
);
789 (void) close(tmpl_fd
);
790 (void) contract_abandon_id(ct
);
791 while (waitpid(pid
, &stat
, 0) != pid
)
793 if (WIFEXITED(stat
) && WEXITSTATUS(stat
) == 0)
796 zsd_warn(gettext("Unable to attach door to zoneid: %d"), zid
);
798 if (WEXITSTATUS(stat
) == 1)
799 zsd_warn(gettext("Cannot entering zone"));
800 else if (WEXITSTATUS(stat
) == 2)
801 zsd_warn(gettext("Unable to create door file: %s"), path
);
802 else if (WEXITSTATUS(stat
) == 3)
803 zsd_warn(gettext("Unable to fattach file: %s"), path
);
805 zsd_warn(gettext("Internal error entering zone: %d"), zid
);
809 * Zone lookup and allocation functions to manage list of currently running
813 zsd_lookup_zone(zsd_ctl_t
*ctl
, char *zonename
, zoneid_t zoneid
)
817 for (zone
= list_head(&ctl
->zsctl_zones
); zone
!= NULL
;
818 zone
= list_next(&ctl
->zsctl_zones
, zone
)) {
819 if (strcmp(zone
->zsz_name
, zonename
) == 0) {
821 zone
->zsz_id
= zoneid
;
829 zsd_lookup_zone_byid(zsd_ctl_t
*ctl
, zoneid_t zoneid
)
833 for (zone
= list_head(&ctl
->zsctl_zones
); zone
!= NULL
;
834 zone
= list_next(&ctl
->zsctl_zones
, zone
)) {
835 if (zone
->zsz_id
== zoneid
)
842 zsd_allocate_zone(zsd_ctl_t
*ctl
, char *zonename
, zoneid_t zoneid
)
846 if ((zone
= (zsd_zone_t
*)calloc(1, sizeof (zsd_zone_t
))) == NULL
)
849 (void) strlcpy(zone
->zsz_name
, zonename
, sizeof (zone
->zsz_name
));
850 zone
->zsz_id
= zoneid
;
851 zone
->zsz_found
= B_FALSE
;
854 * Allocate as deleted so if not found in first pass, zone is deleted
855 * from list. This can happen if zone is returned by zone_list, but
856 * exits before first attempt to fetch zone details.
858 zone
->zsz_start
= g_now
;
859 zone
->zsz_hrstart
= g_hrnow
;
860 zone
->zsz_deleted
= B_TRUE
;
862 zone
->zsz_cpu_shares
= ZS_LIMIT_NONE
;
863 zone
->zsz_cpu_cap
= ZS_LIMIT_NONE
;
864 zone
->zsz_ram_cap
= ZS_LIMIT_NONE
;
865 zone
->zsz_locked_cap
= ZS_LIMIT_NONE
;
866 zone
->zsz_vm_cap
= ZS_LIMIT_NONE
;
868 zone
->zsz_processes_cap
= ZS_LIMIT_NONE
;
869 zone
->zsz_lwps_cap
= ZS_LIMIT_NONE
;
870 zone
->zsz_shm_cap
= ZS_LIMIT_NONE
;
871 zone
->zsz_shmids_cap
= ZS_LIMIT_NONE
;
872 zone
->zsz_semids_cap
= ZS_LIMIT_NONE
;
873 zone
->zsz_msgids_cap
= ZS_LIMIT_NONE
;
874 zone
->zsz_lofi_cap
= ZS_LIMIT_NONE
;
882 zsd_lookup_insert_zone(zsd_ctl_t
*ctl
, char *zonename
, zoneid_t zoneid
)
884 zsd_zone_t
*zone
, *tmp
;
886 if ((zone
= zsd_lookup_zone(ctl
, zonename
, zoneid
)) != NULL
)
889 if ((zone
= zsd_allocate_zone(ctl
, zonename
, zoneid
)) == NULL
)
892 /* Insert sorted by zonename */
893 tmp
= list_head(&ctl
->zsctl_zones
);
894 while (tmp
!= NULL
&& strcmp(zonename
, tmp
->zsz_name
) > 0)
895 tmp
= list_next(&ctl
->zsctl_zones
, tmp
);
897 list_insert_before(&ctl
->zsctl_zones
, tmp
, zone
);
902 * Mark all zones as not existing. As zones are found, they will
903 * be marked as existing. If a zone is not found, then it must have
907 zsd_mark_zones_start(zsd_ctl_t
*ctl
)
912 for (zone
= list_head(&ctl
->zsctl_zones
); zone
!= NULL
;
913 zone
= list_next(&ctl
->zsctl_zones
, zone
)) {
914 zone
->zsz_found
= B_FALSE
;
919 * Mark each zone as not using pset. If processes are found using the
920 * pset, the zone will remain bound to the pset. If none of a zones
921 * processes are bound to the pset, the zone's usage of the pset will
926 zsd_mark_pset_usage_start(zsd_pset_t
*pset
)
928 zsd_pset_usage_t
*usage
;
930 for (usage
= list_head(&pset
->zsp_usage_list
);
932 usage
= list_next(&pset
->zsp_usage_list
, usage
)) {
933 usage
->zsu_found
= B_FALSE
;
934 usage
->zsu_empty
= B_TRUE
;
939 * Mark each pset as not existing. If a pset is found, it will be marked
940 * as existing. If a pset is not found, it wil be deleted.
943 zsd_mark_psets_start(zsd_ctl_t
*ctl
)
947 for (pset
= list_head(&ctl
->zsctl_psets
); pset
!= NULL
;
948 pset
= list_next(&ctl
->zsctl_psets
, pset
)) {
949 pset
->zsp_found
= B_FALSE
;
950 zsd_mark_pset_usage_start(pset
);
955 * A pset was found. Update its information
958 zsd_mark_pset_found(zsd_pset_t
*pset
, uint_t type
, uint64_t online
,
959 uint64_t size
, uint64_t min
, uint64_t max
, int64_t importance
)
961 pset
->zsp_empty
= B_TRUE
;
962 pset
->zsp_deleted
= B_FALSE
;
964 assert(pset
->zsp_found
== B_FALSE
);
966 /* update pset flags */
967 if (pset
->zsp_active
== B_FALSE
)
968 /* pset not seen on previous interval. It is new. */
969 pset
->zsp_new
= B_TRUE
;
971 pset
->zsp_new
= B_FALSE
;
973 pset
->zsp_found
= B_TRUE
;
974 pset
->zsp_cputype
= type
;
975 pset
->zsp_online
= online
;
976 pset
->zsp_size
= size
;
979 pset
->zsp_importance
= importance
;
980 pset
->zsp_cpu_shares
= 0;
981 pset
->zsp_scheds
= 0;
982 pset
->zsp_active
= B_TRUE
;
986 * A zone's process was found using a pset. Charge the process to the pset and
987 * the per-zone data for the pset.
990 zsd_mark_pset_usage_found(zsd_pset_usage_t
*usage
, uint_t sched
)
992 zsd_zone_t
*zone
= usage
->zsu_zone
;
993 zsd_pset_t
*pset
= usage
->zsu_pset
;
995 /* Nothing to do if already found */
996 if (usage
->zsu_found
== B_TRUE
)
999 usage
->zsu_found
= B_TRUE
;
1000 usage
->zsu_empty
= B_FALSE
;
1002 usage
->zsu_deleted
= B_FALSE
;
1003 /* update usage flags */
1004 if (usage
->zsu_active
== B_FALSE
)
1005 usage
->zsu_new
= B_TRUE
;
1007 usage
->zsu_new
= B_FALSE
;
1009 usage
->zsu_scheds
= 0;
1010 usage
->zsu_cpu_shares
= ZS_LIMIT_NONE
;
1011 usage
->zsu_active
= B_TRUE
;
1012 pset
->zsp_empty
= B_FALSE
;
1013 zone
->zsz_empty
= B_FALSE
;
1016 /* Detect zone's pset id, and if it is bound to multiple psets */
1017 if (zone
->zsz_psetid
== ZS_PSET_ERROR
)
1018 zone
->zsz_psetid
= pset
->zsp_id
;
1019 else if (zone
->zsz_psetid
!= pset
->zsp_id
)
1020 zone
->zsz_psetid
= ZS_PSET_MULTI
;
1022 usage
->zsu_scheds
|= sched
;
1023 pset
->zsp_scheds
|= sched
;
1024 zone
->zsz_scheds
|= sched
;
1026 /* Record if FSS is co-habitating with conflicting scheduler */
1027 if ((pset
->zsp_scheds
& ZS_SCHED_FSS
) &&
1028 usage
->zsu_scheds
& (
1029 ZS_SCHED_TS
| ZS_SCHED_IA
| ZS_SCHED_FX
)) {
1030 usage
->zsu_scheds
|= ZS_SCHED_CONFLICT
;
1032 pset
->zsp_scheds
|= ZS_SCHED_CONFLICT
;
1037 /* Add cpu time for a process to a pset, zone, and system totals */
1039 zsd_add_usage(zsd_ctl_t
*ctl
, zsd_pset_usage_t
*usage
, timestruc_t
*delta
)
1041 zsd_system_t
*system
= ctl
->zsctl_system
;
1042 zsd_zone_t
*zone
= usage
->zsu_zone
;
1043 zsd_pset_t
*pset
= usage
->zsu_pset
;
1045 TIMESTRUC_ADD_TIMESTRUC(usage
->zsu_cpu_usage
, *delta
);
1046 TIMESTRUC_ADD_TIMESTRUC(pset
->zsp_usage_zones
, *delta
);
1047 TIMESTRUC_ADD_TIMESTRUC(zone
->zsz_cpu_usage
, *delta
);
1048 TIMESTRUC_ADD_TIMESTRUC(system
->zss_cpu_usage_zones
, *delta
);
1051 /* Determine which processor sets have been deleted */
1053 zsd_mark_psets_end(zsd_ctl_t
*ctl
)
1055 zsd_pset_t
*pset
, *tmp
;
1058 * Mark pset as not exists, and deleted if it existed
1059 * previous interval.
1061 pset
= list_head(&ctl
->zsctl_psets
);
1062 while (pset
!= NULL
) {
1063 if (pset
->zsp_found
== B_FALSE
) {
1064 pset
->zsp_empty
= B_TRUE
;
1065 if (pset
->zsp_deleted
== B_TRUE
) {
1067 pset
= list_next(&ctl
->zsctl_psets
, pset
);
1068 list_remove(&ctl
->zsctl_psets
, tmp
);
1070 ctl
->zsctl_npsets
--;
1073 /* Pset vanished during this interval */
1074 pset
->zsp_new
= B_FALSE
;
1075 pset
->zsp_deleted
= B_TRUE
;
1076 pset
->zsp_active
= B_TRUE
;
1079 pset
= list_next(&ctl
->zsctl_psets
, pset
);
1083 /* Determine which zones are no longer bound to processor sets */
1085 zsd_mark_pset_usages_end(zsd_ctl_t
*ctl
)
1089 zsd_pset_usage_t
*usage
, *tmp
;
1092 * Mark pset as not exists, and deleted if it existed previous
1095 for (pset
= list_head(&ctl
->zsctl_psets
); pset
!= NULL
;
1096 pset
= list_next(&ctl
->zsctl_psets
, pset
)) {
1097 usage
= list_head(&pset
->zsp_usage_list
);
1098 while (usage
!= NULL
) {
1100 * Mark pset as not exists, and deleted if it existed
1101 * previous interval.
1103 if (usage
->zsu_found
== B_FALSE
||
1104 usage
->zsu_zone
->zsz_deleted
== B_TRUE
||
1105 usage
->zsu_pset
->zsp_deleted
== B_TRUE
) {
1107 usage
= list_next(&pset
->zsp_usage_list
,
1109 list_remove(&pset
->zsp_usage_list
, tmp
);
1112 ctl
->zsctl_npset_usages
--;
1115 usage
->zsu_new
= B_FALSE
;
1116 usage
->zsu_deleted
= B_TRUE
;
1117 usage
->zsu_active
= B_TRUE
;
1119 /* Add cpu shares for usages that are in FSS */
1120 zone
= usage
->zsu_zone
;
1121 if (usage
->zsu_scheds
& ZS_SCHED_FSS
&&
1122 zone
->zsz_cpu_shares
!= ZS_SHARES_UNLIMITED
&&
1123 zone
->zsz_cpu_shares
!= 0) {
1124 zone
= usage
->zsu_zone
;
1125 usage
->zsu_cpu_shares
= zone
->zsz_cpu_shares
;
1126 pset
->zsp_cpu_shares
+= zone
->zsz_cpu_shares
;
1128 usage
= list_next(&pset
->zsp_usage_list
,
1134 /* A zone has been found. Update its information */
1136 zsd_mark_zone_found(zsd_ctl_t
*ctl
, zsd_zone_t
*zone
, uint64_t cpu_shares
,
1137 uint64_t cpu_cap
, uint64_t ram_cap
, uint64_t locked_cap
,
1138 uint64_t vm_cap
, uint64_t processes_cap
, uint64_t processes
,
1139 uint64_t lwps_cap
, uint64_t lwps
, uint64_t shm_cap
, uint64_t shm
,
1140 uint64_t shmids_cap
, uint64_t shmids
, uint64_t semids_cap
,
1141 uint64_t semids
, uint64_t msgids_cap
, uint64_t msgids
, uint64_t lofi_cap
,
1142 uint64_t lofi
, char *poolname
, char *psetname
, uint_t sched
, uint_t cputype
,
1145 zsd_system_t
*sys
= ctl
->zsctl_system
;
1147 assert(zone
->zsz_found
== B_FALSE
);
1150 * Mark zone as exists, and new if it did not exist in previous
1153 zone
->zsz_found
= B_TRUE
;
1154 zone
->zsz_empty
= B_TRUE
;
1155 zone
->zsz_deleted
= B_FALSE
;
1158 * Zone is new. Assume zone's properties are the same over entire
1161 if (zone
->zsz_active
== B_FALSE
)
1162 zone
->zsz_new
= B_TRUE
;
1164 zone
->zsz_new
= B_FALSE
;
1166 (void) strlcpy(zone
->zsz_pool
, poolname
, sizeof (zone
->zsz_pool
));
1167 (void) strlcpy(zone
->zsz_pset
, psetname
, sizeof (zone
->zsz_pset
));
1168 zone
->zsz_default_sched
= sched
;
1170 /* Schedulers updated later as processes are found */
1171 zone
->zsz_scheds
= 0;
1173 /* Cpus updated later as psets bound are identified */
1174 zone
->zsz_cpus_online
= 0;
1176 zone
->zsz_cputype
= cputype
;
1177 zone
->zsz_iptype
= iptype
;
1178 zone
->zsz_psetid
= ZS_PSET_ERROR
;
1179 zone
->zsz_cpu_cap
= cpu_cap
;
1180 zone
->zsz_cpu_shares
= cpu_shares
;
1181 zone
->zsz_ram_cap
= ram_cap
;
1182 zone
->zsz_locked_cap
= locked_cap
;
1183 zone
->zsz_vm_cap
= vm_cap
;
1184 zone
->zsz_processes_cap
= processes_cap
;
1185 zone
->zsz_processes
= processes
;
1186 zone
->zsz_lwps_cap
= lwps_cap
;
1187 zone
->zsz_lwps
= lwps
;
1188 zone
->zsz_shm_cap
= shm_cap
;
1189 zone
->zsz_shm
= shm
;
1190 zone
->zsz_shmids_cap
= shmids_cap
;
1191 zone
->zsz_shmids
= shmids
;
1192 zone
->zsz_semids_cap
= semids_cap
;
1193 zone
->zsz_semids
= semids
;
1194 zone
->zsz_msgids_cap
= msgids_cap
;
1195 zone
->zsz_msgids
= msgids
;
1196 zone
->zsz_lofi_cap
= lofi_cap
;
1197 zone
->zsz_lofi
= lofi
;
1199 sys
->zss_processes
+= processes
;
1200 sys
->zss_lwps
+= lwps
;
1201 sys
->zss_shm
+= shm
;
1202 sys
->zss_shmids
+= shmids
;
1203 sys
->zss_semids
+= semids
;
1204 sys
->zss_msgids
+= msgids
;
1205 sys
->zss_lofi
+= lofi
;
1206 zone
->zsz_active
= B_TRUE
;
1210 /* Determine which zones have halted */
1212 zsd_mark_zones_end(zsd_ctl_t
*ctl
)
1214 zsd_zone_t
*zone
, *tmp
;
1217 * Mark zone as not existing, or delete if it did not exist in
1218 * previous interval.
1220 zone
= list_head(&ctl
->zsctl_zones
);
1221 while (zone
!= NULL
) {
1222 if (zone
->zsz_found
== B_FALSE
) {
1223 zone
->zsz_empty
= B_TRUE
;
1224 if (zone
->zsz_deleted
== B_TRUE
) {
1226 * Zone deleted in prior interval,
1227 * so it no longer exists.
1230 zone
= list_next(&ctl
->zsctl_zones
, zone
);
1231 list_remove(&ctl
->zsctl_zones
, tmp
);
1233 ctl
->zsctl_nzones
--;
1236 zone
->zsz_new
= B_FALSE
;
1237 zone
->zsz_deleted
= B_TRUE
;
1238 zone
->zsz_active
= B_TRUE
;
1241 zone
= list_next(&ctl
->zsctl_zones
, zone
);
1246 * Mark cpus as not existing. If a cpu is found, it will be updated. If
1247 * a cpu is not found, then it must have gone offline, so it will be
1250 * The kstat tracking data is rolled so that the usage since the previous
1251 * interval can be determined.
1254 zsd_mark_cpus_start(zsd_ctl_t
*ctl
, boolean_t roll
)
1259 * Mark all cpus as not existing. As cpus are found, they will
1260 * be marked as existing.
1262 for (cpu
= list_head(&ctl
->zsctl_cpus
); cpu
!= NULL
;
1263 cpu
= list_next(&ctl
->zsctl_cpus
, cpu
)) {
1264 cpu
->zsc_found
= B_FALSE
;
1265 if (cpu
->zsc_active
== B_TRUE
&& roll
) {
1266 cpu
->zsc_psetid_prev
= cpu
->zsc_psetid
;
1267 cpu
->zsc_nsec_idle_prev
= cpu
->zsc_nsec_idle
;
1268 cpu
->zsc_nsec_intr_prev
= cpu
->zsc_nsec_intr
;
1269 cpu
->zsc_nsec_kern_prev
= cpu
->zsc_nsec_kern
;
1270 cpu
->zsc_nsec_user_prev
= cpu
->zsc_nsec_user
;
1276 * An array the size of the maximum number of cpus is kept. Within this array
1277 * a list of the online cpus is maintained.
1280 zsd_lookup_insert_cpu(zsd_ctl_t
*ctl
, processorid_t cpuid
)
1284 assert(cpuid
< ctl
->zsctl_maxcpuid
);
1285 cpu
= &(ctl
->zsctl_cpu_array
[cpuid
]);
1286 assert(cpuid
== cpu
->zsc_id
);
1288 if (cpu
->zsc_allocated
== B_FALSE
) {
1289 cpu
->zsc_allocated
= B_TRUE
;
1290 list_insert_tail(&ctl
->zsctl_cpus
, cpu
);
1295 /* A cpu has been found. Update its information */
1297 zsd_mark_cpu_found(zsd_cpu_t
*cpu
, zsd_pset_t
*pset
, psetid_t psetid
)
1300 * legacy processor sets, the cpu may move while zonestatd is
1301 * inspecting, causing it to be found twice. In this case, just
1302 * leave cpu in the first processor set in which it was found.
1304 if (cpu
->zsc_found
== B_TRUE
)
1307 /* Mark cpu as online */
1308 cpu
->zsc_found
= B_TRUE
;
1309 cpu
->zsc_offlined
= B_FALSE
;
1310 cpu
->zsc_pset
= pset
;
1312 * cpu is newly online.
1314 if (cpu
->zsc_active
== B_FALSE
) {
1316 * Cpu is newly online.
1318 cpu
->zsc_onlined
= B_TRUE
;
1319 cpu
->zsc_psetid
= psetid
;
1320 cpu
->zsc_psetid_prev
= psetid
;
1323 * cpu online during previous interval. Save properties at
1326 cpu
->zsc_onlined
= B_FALSE
;
1327 cpu
->zsc_psetid
= psetid
;
1330 cpu
->zsc_active
= B_TRUE
;
1333 /* Remove all offlined cpus from the list of tracked cpus */
1335 zsd_mark_cpus_end(zsd_ctl_t
*ctl
)
1337 zsd_cpu_t
*cpu
, *tmp
;
1340 /* Mark cpu as online or offline */
1341 cpu
= list_head(&ctl
->zsctl_cpus
);
1342 while (cpu
!= NULL
) {
1343 if (cpu
->zsc_found
== B_FALSE
) {
1344 if (cpu
->zsc_offlined
== B_TRUE
) {
1346 * cpu offlined in prior interval. It is gone.
1349 cpu
= list_next(&ctl
->zsctl_cpus
, cpu
);
1350 list_remove(&ctl
->zsctl_cpus
, tmp
);
1351 /* Clear structure for future use */
1353 bzero(tmp
, sizeof (zsd_cpu_t
));
1355 tmp
->zsc_allocated
= B_FALSE
;
1356 tmp
->zsc_psetid
= ZS_PSET_ERROR
;
1357 tmp
->zsc_psetid_prev
= ZS_PSET_ERROR
;
1361 * cpu online at start of interval. Treat
1362 * as still online, since it was online for
1363 * some portion of the interval.
1365 cpu
->zsc_offlined
= B_TRUE
;
1366 cpu
->zsc_onlined
= B_FALSE
;
1367 cpu
->zsc_active
= B_TRUE
;
1368 cpu
->zsc_psetid
= cpu
->zsc_psetid_prev
;
1369 cpu
->zsc_pset
= NULL
;
1372 cpu
= list_next(&ctl
->zsctl_cpus
, cpu
);
1376 /* Some utility functions for managing the list of processor sets */
1378 zsd_lookup_pset_byid(zsd_ctl_t
*ctl
, psetid_t psetid
)
1382 for (pset
= list_head(&ctl
->zsctl_psets
); pset
!= NULL
;
1383 pset
= list_next(&ctl
->zsctl_psets
, pset
)) {
1384 if (pset
->zsp_id
== psetid
)
1391 zsd_lookup_pset(zsd_ctl_t
*ctl
, char *psetname
, psetid_t psetid
)
1395 for (pset
= list_head(&ctl
->zsctl_psets
); pset
!= NULL
;
1396 pset
= list_next(&ctl
->zsctl_psets
, pset
)) {
1397 if (strcmp(pset
->zsp_name
, psetname
) == 0) {
1399 pset
->zsp_id
= psetid
;
1407 zsd_allocate_pset(zsd_ctl_t
*ctl
, char *psetname
, psetid_t psetid
)
1411 if ((pset
= (zsd_pset_t
*)calloc(1, sizeof (zsd_pset_t
))) == NULL
)
1414 (void) strlcpy(pset
->zsp_name
, psetname
, sizeof (pset
->zsp_name
));
1415 pset
->zsp_id
= psetid
;
1416 pset
->zsp_found
= B_FALSE
;
1418 * Allocate as deleted so if not found in first pass, pset is deleted
1419 * from list. This can happen if pset is returned by pset_list, but
1420 * is destroyed before first attempt to fetch pset details.
1422 list_create(&pset
->zsp_usage_list
, sizeof (zsd_pset_usage_t
),
1423 offsetof(zsd_pset_usage_t
, zsu_next
));
1425 pset
->zsp_hrstart
= g_hrnow
;
1426 pset
->zsp_deleted
= B_TRUE
;
1427 pset
->zsp_empty
= B_TRUE
;
1428 ctl
->zsctl_npsets
++;
1434 zsd_lookup_insert_pset(zsd_ctl_t
*ctl
, char *psetname
, psetid_t psetid
)
1436 zsd_pset_t
*pset
, *tmp
;
1438 if ((pset
= zsd_lookup_pset(ctl
, psetname
, psetid
)) != NULL
)
1441 if ((pset
= zsd_allocate_pset(ctl
, psetname
, psetid
)) == NULL
)
1444 /* Insert sorted by psetname */
1445 tmp
= list_head(&ctl
->zsctl_psets
);
1446 while (tmp
!= NULL
&& strcmp(psetname
, tmp
->zsp_name
) > 0)
1447 tmp
= list_next(&ctl
->zsctl_psets
, tmp
);
1449 list_insert_before(&ctl
->zsctl_psets
, tmp
, pset
);
1453 /* Some utility functions for managing the list of zones using each pset */
1454 static zsd_pset_usage_t
*
1455 zsd_lookup_usage(zsd_pset_t
*pset
, zsd_zone_t
*zone
)
1457 zsd_pset_usage_t
*usage
;
1459 for (usage
= list_head(&pset
->zsp_usage_list
); usage
!= NULL
;
1460 usage
= list_next(&pset
->zsp_usage_list
, usage
))
1461 if (usage
->zsu_zone
== zone
)
1467 static zsd_pset_usage_t
*
1468 zsd_allocate_pset_usage(zsd_ctl_t
*ctl
, zsd_pset_t
*pset
, zsd_zone_t
*zone
)
1470 zsd_pset_usage_t
*usage
;
1472 if ((usage
= (zsd_pset_usage_t
*)calloc(1, sizeof (zsd_pset_usage_t
)))
1476 list_link_init(&usage
->zsu_next
);
1477 usage
->zsu_zone
= zone
;
1478 usage
->zsu_zoneid
= zone
->zsz_id
;
1479 usage
->zsu_pset
= pset
;
1480 usage
->zsu_found
= B_FALSE
;
1481 usage
->zsu_active
= B_FALSE
;
1482 usage
->zsu_new
= B_FALSE
;
1484 * Allocate as not deleted. If a process is found in a pset for
1485 * a zone, the usage will not be deleted until at least the next
1488 usage
->zsu_start
= g_now
;
1489 usage
->zsu_hrstart
= g_hrnow
;
1490 usage
->zsu_deleted
= B_FALSE
;
1491 usage
->zsu_empty
= B_TRUE
;
1492 usage
->zsu_scheds
= 0;
1493 usage
->zsu_cpu_shares
= ZS_LIMIT_NONE
;
1495 ctl
->zsctl_npset_usages
++;
1501 static zsd_pset_usage_t
*
1502 zsd_lookup_insert_usage(zsd_ctl_t
*ctl
, zsd_pset_t
*pset
, zsd_zone_t
*zone
)
1504 zsd_pset_usage_t
*usage
, *tmp
;
1506 if ((usage
= zsd_lookup_usage(pset
, zone
))
1510 if ((usage
= zsd_allocate_pset_usage(ctl
, pset
, zone
)) == NULL
)
1513 tmp
= list_head(&pset
->zsp_usage_list
);
1514 while (tmp
!= NULL
&& strcmp(zone
->zsz_name
, tmp
->zsu_zone
->zsz_name
)
1516 tmp
= list_next(&pset
->zsp_usage_list
, tmp
);
1518 list_insert_before(&pset
->zsp_usage_list
, tmp
, usage
);
1523 zsd_refresh_system(zsd_ctl_t
*ctl
)
1525 zsd_system_t
*system
= ctl
->zsctl_system
;
1527 /* Re-count these values each interval */
1528 system
->zss_processes
= 0;
1529 system
->zss_lwps
= 0;
1530 system
->zss_shm
= 0;
1531 system
->zss_shmids
= 0;
1532 system
->zss_semids
= 0;
1533 system
->zss_msgids
= 0;
1534 system
->zss_lofi
= 0;
1538 /* Reads each cpu's kstats, and adds the usage to the cpu's pset */
1540 zsd_update_cpu_stats(zsd_ctl_t
*ctl
, zsd_cpu_t
*cpu
)
1543 processorid_t cpuid
;
1544 zsd_pset_t
*pset_prev
;
1549 uint64_t idle
, intr
, kern
, user
;
1551 sys
= ctl
->zsctl_system
;
1552 pset
= cpu
->zsc_pset
;
1555 cpuid
= cpu
->zsc_id
;
1557 /* Get the cpu time totals for this cpu */
1558 kstat
= kstat_lookup(ctl
->zsctl_kstat_ctl
, "cpu", cpuid
, "sys");
1562 kid
= kstat_read(ctl
->zsctl_kstat_ctl
, kstat
, NULL
);
1566 knp
= kstat_data_lookup(kstat
, "cpu_nsec_idle");
1567 if (knp
== NULL
|| knp
->data_type
!= KSTAT_DATA_UINT64
)
1570 idle
= knp
->value
.ui64
;
1572 knp
= kstat_data_lookup(kstat
, "cpu_nsec_kernel");
1573 if (knp
== NULL
|| knp
->data_type
!= KSTAT_DATA_UINT64
)
1576 kern
= knp
->value
.ui64
;
1578 knp
= kstat_data_lookup(kstat
, "cpu_nsec_user");
1579 if (knp
== NULL
|| knp
->data_type
!= KSTAT_DATA_UINT64
)
1582 user
= knp
->value
.ui64
;
1585 * Tracking intr time per cpu just exists for future enhancements.
1586 * The value is presently always zero.
1589 cpu
->zsc_nsec_idle
= idle
;
1590 cpu
->zsc_nsec_intr
= intr
;
1591 cpu
->zsc_nsec_kern
= kern
;
1592 cpu
->zsc_nsec_user
= user
;
1594 if (cpu
->zsc_onlined
== B_TRUE
) {
1596 * cpu is newly online. There is no reference value,
1597 * so just record its current stats for comparison
1598 * on next stat read.
1600 cpu
->zsc_nsec_idle_prev
= cpu
->zsc_nsec_idle
;
1601 cpu
->zsc_nsec_intr_prev
= cpu
->zsc_nsec_intr
;
1602 cpu
->zsc_nsec_kern_prev
= cpu
->zsc_nsec_kern
;
1603 cpu
->zsc_nsec_user_prev
= cpu
->zsc_nsec_user
;
1608 * Calculate relative time since previous refresh.
1609 * Paranoia. Don't let time go backwards.
1611 idle
= intr
= kern
= user
= 0;
1612 if (cpu
->zsc_nsec_idle
> cpu
->zsc_nsec_idle_prev
)
1613 idle
= cpu
->zsc_nsec_idle
- cpu
->zsc_nsec_idle_prev
;
1615 if (cpu
->zsc_nsec_intr
> cpu
->zsc_nsec_intr_prev
)
1616 intr
= cpu
->zsc_nsec_intr
- cpu
->zsc_nsec_intr_prev
;
1618 if (cpu
->zsc_nsec_kern
> cpu
->zsc_nsec_kern_prev
)
1619 kern
= cpu
->zsc_nsec_kern
- cpu
->zsc_nsec_kern_prev
;
1621 if (cpu
->zsc_nsec_user
> cpu
->zsc_nsec_user_prev
)
1622 user
= cpu
->zsc_nsec_user
- cpu
->zsc_nsec_user_prev
;
1624 /* Update totals for cpu usage */
1625 TIMESTRUC_ADD_NANOSEC(cpu
->zsc_idle
, idle
);
1626 TIMESTRUC_ADD_NANOSEC(cpu
->zsc_intr
, intr
);
1627 TIMESTRUC_ADD_NANOSEC(cpu
->zsc_kern
, kern
);
1628 TIMESTRUC_ADD_NANOSEC(cpu
->zsc_user
, user
);
1631 * Add cpu's stats to its pset if it is known to be in
1632 * the pset since previous read.
1634 if (cpu
->zsc_psetid
== cpu
->zsc_psetid_prev
||
1635 cpu
->zsc_psetid_prev
== ZS_PSET_ERROR
||
1636 (pset_prev
= zsd_lookup_pset_byid(ctl
,
1637 cpu
->zsc_psetid_prev
)) == NULL
) {
1638 TIMESTRUC_ADD_NANOSEC(pset
->zsp_idle
, idle
);
1639 TIMESTRUC_ADD_NANOSEC(pset
->zsp_intr
, intr
);
1640 TIMESTRUC_ADD_NANOSEC(pset
->zsp_kern
, kern
);
1641 TIMESTRUC_ADD_NANOSEC(pset
->zsp_user
, user
);
1644 * Last pset was different than current pset.
1645 * Best guess is to split usage between the two.
1647 TIMESTRUC_ADD_NANOSEC(pset_prev
->zsp_idle
, idle
/ 2);
1648 TIMESTRUC_ADD_NANOSEC(pset_prev
->zsp_intr
, intr
/ 2);
1649 TIMESTRUC_ADD_NANOSEC(pset_prev
->zsp_kern
, kern
/ 2);
1650 TIMESTRUC_ADD_NANOSEC(pset_prev
->zsp_user
, user
/ 2);
1652 TIMESTRUC_ADD_NANOSEC(pset
->zsp_idle
,
1653 (idle
/ 2) + (idle
% 2));
1654 TIMESTRUC_ADD_NANOSEC(pset
->zsp_intr
,
1655 (intr
/ 2) + (intr
% 2));
1656 TIMESTRUC_ADD_NANOSEC(pset
->zsp_kern
,
1657 (kern
/ 2) + (kern
% 2));
1658 TIMESTRUC_ADD_NANOSEC(pset
->zsp_user
,
1659 (user
/ 2) + (user
% 2));
1661 TIMESTRUC_ADD_NANOSEC(sys
->zss_idle
, idle
);
1662 TIMESTRUC_ADD_NANOSEC(sys
->zss_intr
, intr
);
1663 TIMESTRUC_ADD_NANOSEC(sys
->zss_kern
, kern
);
1664 TIMESTRUC_ADD_NANOSEC(sys
->zss_user
, user
);
1667 /* Determine the details of a processor set by pset_id */
1669 zsd_get_pool_pset(zsd_ctl_t
*ctl
, psetid_t psetid
, char *psetname
,
1670 size_t namelen
, uint_t
*cputype
, uint64_t *online
, uint64_t *size
,
1671 uint64_t *min
, uint64_t *max
, int64_t *importance
)
1675 pool_conf_t
*conf
= ctl
->zsctl_pool_conf
;
1676 pool_value_t
**vals
= ctl
->zsctl_pool_vals
;
1677 pool_resource_t
**res_list
= NULL
;
1678 pool_resource_t
*pset
;
1679 pool_component_t
**cpus
= NULL
;
1680 processorid_t
*cache
;
1686 if (ctl
->zsctl_pool_status
== POOL_DISABLED
) {
1689 * Inspect legacy psets
1692 old
= num
= ctl
->zsctl_cpu_ncache
;
1693 ret
= pset_info(psetid
, &type
, &num
,
1694 ctl
->zsctl_cpu_cache
);
1696 /* pset is gone. Tell caller to retry */
1704 if ((cache
= (processorid_t
*)realloc(
1705 ctl
->zsctl_cpu_cache
, num
*
1706 sizeof (processorid_t
))) != NULL
) {
1707 ctl
->zsctl_cpu_ncache
= num
;
1708 ctl
->zsctl_cpu_cache
= cache
;
1711 * Could not allocate to get new cpu list.
1714 "Could not allocate for cpu list"));
1720 * Old school pset. Just make min and max equal
1723 if (psetid
== ZS_PSET_DEFAULT
) {
1724 *cputype
= ZS_CPUTYPE_DEFAULT_PSET
;
1725 (void) strlcpy(psetname
, "pset_default", namelen
);
1727 *cputype
= ZS_CPUTYPE_PSRSET_PSET
;
1728 (void) snprintf(psetname
, namelen
,
1729 "SUNWlegacy_pset_%d", psetid
);
1733 * Just treat legacy pset as a simple pool pset
1744 /* Look up the pool pset using the pset id */
1746 pool_value_set_int64(vals
[1], psetid
);
1747 if (pool_value_set_name(vals
[1], "pset.sys_id")
1751 if (pool_value_set_name(vals
[0], "type") != PO_SUCCESS
)
1753 if (pool_value_set_string(vals
[0], "pset") != PO_SUCCESS
)
1755 if ((res_list
= pool_query_resources(conf
, &num
, vals
)) == NULL
)
1762 if (pool_get_property(conf
, pool_resource_to_elem(conf
, pset
),
1763 "pset.name", vals
[0]) != POC_STRING
||
1764 pool_value_get_string(vals
[0], &string
) != PO_SUCCESS
)
1767 (void) strlcpy(psetname
, string
, namelen
);
1768 if (strncmp(psetname
, "SUNWtmp", strlen("SUNWtmp")) == 0)
1769 *cputype
= ZS_CPUTYPE_DEDICATED
;
1770 else if (psetid
== ZS_PSET_DEFAULT
)
1771 *cputype
= ZS_CPUTYPE_DEFAULT_PSET
;
1773 *cputype
= ZS_CPUTYPE_POOL_PSET
;
1775 /* Get size, min, max, and importance */
1776 if (pool_get_property(conf
, pool_resource_to_elem(conf
,
1777 pset
), "pset.size", vals
[0]) == POC_UINT
&&
1778 pool_value_get_uint64(vals
[0], &uint64
) == PO_SUCCESS
)
1783 /* Get size, min, max, and importance */
1784 if (pool_get_property(conf
, pool_resource_to_elem(conf
,
1785 pset
), "pset.min", vals
[0]) == POC_UINT
&&
1786 pool_value_get_uint64(vals
[0], &uint64
) == PO_SUCCESS
)
1790 if (*min
>= ZSD_PSET_UNLIMITED
)
1791 *min
= ZS_LIMIT_NONE
;
1793 if (pool_get_property(conf
, pool_resource_to_elem(conf
,
1794 pset
), "pset.max", vals
[0]) == POC_UINT
&&
1795 pool_value_get_uint64(vals
[0], &uint64
) == PO_SUCCESS
)
1798 *max
= ZS_LIMIT_NONE
;
1800 if (*max
>= ZSD_PSET_UNLIMITED
)
1801 *max
= ZS_LIMIT_NONE
;
1803 if (pool_get_property(conf
, pool_resource_to_elem(conf
,
1804 pset
), "pset.importance", vals
[0]) == POC_INT
&&
1805 pool_value_get_int64(vals
[0], &int64
) == PO_SUCCESS
)
1806 *importance
= int64
;
1808 *importance
= (uint64_t)1;
1815 cpus
= pool_query_resource_components(conf
, pset
, &num
, NULL
);
1819 /* Make sure there is space for cpu id list */
1820 if (num
> ctl
->zsctl_cpu_ncache
) {
1821 if ((cache
= (processorid_t
*)realloc(
1822 ctl
->zsctl_cpu_cache
, num
*
1823 sizeof (processorid_t
))) != NULL
) {
1824 ctl
->zsctl_cpu_ncache
= num
;
1825 ctl
->zsctl_cpu_cache
= cache
;
1828 * Could not allocate to get new cpu list.
1831 "Could not allocate for cpu list"));
1836 /* count the online cpus */
1837 for (i
= 0; i
< num
; i
++) {
1838 if (pool_get_property(conf
, pool_component_to_elem(
1839 conf
, cpus
[i
]), "cpu.status", vals
[0]) != POC_STRING
||
1840 pool_value_get_string(vals
[0], &string
) != PO_SUCCESS
)
1843 if (strcmp(string
, "on-line") != 0 &&
1844 strcmp(string
, "no-intr") != 0)
1847 if (pool_get_property(conf
, pool_component_to_elem(
1848 conf
, cpus
[i
]), "cpu.sys_id", vals
[0]) != POC_INT
||
1849 pool_value_get_int64(vals
[0], &int64
) != PO_SUCCESS
)
1853 ctl
->zsctl_cpu_cache
[i
] = (psetid_t
)int64
;
1862 * The pools operations should succeed since the conf is a consistent
1863 * snapshot. Tell caller there is no need to retry.
1870 * Update the current list of processor sets.
1871 * This also updates the list of online cpus, and each cpu's pset membership.
1874 zsd_refresh_psets(zsd_ctl_t
*ctl
)
1876 int i
, j
, ret
, state
;
1879 int64_t sys_id
, importance
;
1880 uint64_t online
, size
, min
, max
;
1881 zsd_system_t
*system
;
1885 char psetname
[ZS_PSETNAME_MAX
];
1886 processorid_t cpuid
;
1887 pool_value_t
*pv_save
= NULL
;
1888 pool_resource_t
**res_list
= NULL
;
1889 pool_resource_t
*res
;
1890 pool_value_t
**vals
;
1892 boolean_t roll_cpus
= B_TRUE
;
1894 /* Zero cpu counters to recount them */
1895 system
= ctl
->zsctl_system
;
1896 system
->zss_ncpus
= 0;
1897 system
->zss_ncpus_online
= 0;
1899 ret
= pool_get_status(&state
);
1900 if (ret
== 0 && state
== POOL_ENABLED
) {
1902 conf
= ctl
->zsctl_pool_conf
;
1903 vals
= ctl
->zsctl_pool_vals
;
1907 if (ctl
->zsctl_pool_status
== POOL_DISABLED
) {
1908 if (pool_conf_open(ctl
->zsctl_pool_conf
,
1909 pool_dynamic_location(), PO_RDONLY
) == 0) {
1910 ctl
->zsctl_pool_status
= POOL_ENABLED
;
1911 ctl
->zsctl_pool_changed
= POU_PSET
;
1914 ctl
->zsctl_pool_changed
= 0;
1915 ret
= pool_conf_update(ctl
->zsctl_pool_conf
,
1916 &(ctl
->zsctl_pool_changed
));
1918 /* Pools must have become disabled */
1919 (void) pool_conf_close(ctl
->zsctl_pool_conf
);
1920 ctl
->zsctl_pool_status
= POOL_DISABLED
;
1921 if (pool_error() == POE_SYSTEM
&& errno
==
1926 "Unable to update pool configuration"));
1927 /* Not able to get pool info. Don't update. */
1931 /* Get the list of psets using libpool */
1932 if (pool_value_set_name(vals
[0], "type") != PO_SUCCESS
)
1935 if (pool_value_set_string(vals
[0], "pset") != PO_SUCCESS
)
1937 if ((res_list
= pool_query_resources(conf
, &num
, vals
))
1941 if (num
> ctl
->zsctl_pset_ncache
) {
1942 if ((cache
= (psetid_t
*)realloc(ctl
->zsctl_pset_cache
,
1943 (num
) * sizeof (psetid_t
))) == NULL
) {
1946 ctl
->zsctl_pset_ncache
= num
;
1947 ctl
->zsctl_pset_cache
= cache
;
1949 /* Save the pset id of each pset */
1950 for (i
= 0; i
< num
; i
++) {
1952 if (pool_get_property(conf
, pool_resource_to_elem(conf
,
1953 res
), "pset.sys_id", vals
[0]) != POC_INT
||
1954 pool_value_get_int64(vals
[0], &sys_id
)
1957 ctl
->zsctl_pset_cache
[i
] = (int)sys_id
;
1962 if (ctl
->zsctl_pool_status
== POOL_ENABLED
) {
1963 (void) pool_conf_close(ctl
->zsctl_pool_conf
);
1964 ctl
->zsctl_pool_status
= POOL_DISABLED
;
1966 /* Get the pset list using legacy psets */
1968 old
= num
= ctl
->zsctl_pset_ncache
;
1969 (void) pset_list(ctl
->zsctl_pset_cache
, &num
);
1970 if ((num
+ 1) <= old
) {
1973 if ((cache
= (psetid_t
*)realloc(ctl
->zsctl_pset_cache
,
1974 (num
+ 1) * sizeof (psetid_t
))) != NULL
) {
1975 ctl
->zsctl_pset_ncache
= num
+ 1;
1976 ctl
->zsctl_pset_cache
= cache
;
1979 * Could not allocate to get new pset list.
1985 /* Add the default pset to list */
1986 ctl
->zsctl_pset_cache
[num
] = ctl
->zsctl_pset_cache
[0];
1987 ctl
->zsctl_pset_cache
[0] = ZS_PSET_DEFAULT
;
1991 zsd_mark_cpus_start(ctl
, roll_cpus
);
1992 zsd_mark_psets_start(ctl
);
1993 roll_cpus
= B_FALSE
;
1995 /* Refresh cpu membership of all psets */
1996 for (i
= 0; i
< num
; i
++) {
1998 /* Get pool pset information */
1999 sys_id
= ctl
->zsctl_pset_cache
[i
];
2000 if (zsd_get_pool_pset(ctl
, sys_id
, psetname
, sizeof (psetname
),
2001 &cputype
, &online
, &size
, &min
, &max
, &importance
)
2005 zsd_warn(gettext("Failed to get info for pset %d"),
2010 system
->zss_ncpus
+= size
;
2011 system
->zss_ncpus_online
+= online
;
2013 pset
= zsd_lookup_insert_pset(ctl
, psetname
,
2014 ctl
->zsctl_pset_cache
[i
]);
2016 /* update pset info */
2017 zsd_mark_pset_found(pset
, cputype
, online
, size
, min
,
2020 /* update each cpu in pset */
2021 for (j
= 0; j
< pset
->zsp_online
; j
++) {
2022 cpuid
= ctl
->zsctl_cpu_cache
[j
];
2023 cpu
= zsd_lookup_insert_cpu(ctl
, cpuid
);
2024 zsd_mark_cpu_found(cpu
, pset
, sys_id
);
2029 if (pv_save
!= NULL
)
2036 * Fetch the current pool and pset name for the given zone.
2039 zsd_get_zone_pool_pset(zsd_ctl_t
*ctl
, zsd_zone_t
*zone
,
2040 char *pool
, int poollen
, char *pset
, int psetlen
, uint_t
*cputype
)
2043 pool_t
**pools
= NULL
;
2044 pool_resource_t
**res_list
= NULL
;
2045 char poolname
[ZS_POOLNAME_MAX
];
2046 char psetname
[ZS_PSETNAME_MAX
];
2047 pool_conf_t
*conf
= ctl
->zsctl_pool_conf
;
2048 pool_value_t
*pv_save
= NULL
;
2049 pool_value_t
**vals
= ctl
->zsctl_pool_vals
;
2055 ret
= zone_getattr(zone
->zsz_id
, ZONE_ATTR_POOLID
,
2056 &poolid
, sizeof (poolid
));
2065 /* Default values if lookup fails */
2066 (void) strlcpy(poolname
, "pool_default", sizeof (poolname
));
2067 (void) strlcpy(psetname
, "pset_default", sizeof (poolname
));
2068 *cputype
= ZS_CPUTYPE_DEFAULT_PSET
;
2070 /* no dedicated cpu if pools are disabled */
2071 if (ctl
->zsctl_pool_status
== POOL_DISABLED
)
2074 /* Get the pool name using the id */
2075 pool_value_set_int64(vals
[0], poolid
);
2076 if (pool_value_set_name(vals
[0], "pool.sys_id") != PO_SUCCESS
)
2079 if ((pools
= pool_query_pools(conf
, &num
, vals
)) == NULL
)
2085 if (pool_get_property(conf
, pool_to_elem(conf
, pools
[0]),
2086 "pool.name", vals
[0]) != POC_STRING
||
2087 pool_value_get_string(vals
[0], &string
) != PO_SUCCESS
)
2089 (void) strlcpy(poolname
, (char *)string
, sizeof (poolname
));
2091 /* Get the name of the pset for the pool */
2092 if (pool_value_set_name(vals
[0], "type") != PO_SUCCESS
)
2095 if (pool_value_set_string(vals
[0], "pset") != PO_SUCCESS
)
2098 if ((res_list
= pool_query_pool_resources(conf
, pools
[0], &num
, vals
))
2105 if (pool_get_property(conf
, pool_resource_to_elem(conf
,
2106 res_list
[0]), "pset.sys_id", vals
[0]) != POC_INT
||
2107 pool_value_get_int64(vals
[0], &int64
) != PO_SUCCESS
)
2110 if (int64
== ZS_PSET_DEFAULT
)
2111 *cputype
= ZS_CPUTYPE_DEFAULT_PSET
;
2113 if (pool_get_property(conf
, pool_resource_to_elem(conf
,
2114 res_list
[0]), "pset.name", vals
[0]) != POC_STRING
||
2115 pool_value_get_string(vals
[0], &string
) != PO_SUCCESS
)
2118 (void) strlcpy(psetname
, (char *)string
, sizeof (psetname
));
2120 if (strncmp(psetname
, "SUNWtmp_", strlen("SUNWtmp_")) == 0)
2121 *cputype
= ZS_CPUTYPE_DEDICATED
;
2122 if (strncmp(psetname
, "SUNW_legacy_", strlen("SUNW_legacy_")) == 0)
2123 *cputype
= ZS_CPUTYPE_PSRSET_PSET
;
2125 *cputype
= ZS_CPUTYPE_POOL_PSET
;
2129 if (pv_save
!= NULL
)
2135 (void) strlcpy(pool
, poolname
, poollen
);
2136 (void) strlcpy(pset
, psetname
, psetlen
);
2139 /* Convert scheduler names to ZS_* scheduler flags */
2141 zsd_schedname2int(char *clname
, int pri
)
2145 if (strcmp(clname
, "TS") == 0) {
2146 sched
= ZS_SCHED_TS
;
2147 } else if (strcmp(clname
, "IA") == 0) {
2148 sched
= ZS_SCHED_IA
;
2149 } else if (strcmp(clname
, "FX") == 0) {
2151 sched
= ZS_SCHED_FX_60
;
2153 sched
= ZS_SCHED_FX
;
2155 } else if (strcmp(clname
, "RT") == 0) {
2156 sched
= ZS_SCHED_RT
;
2158 } else if (strcmp(clname
, "FSS") == 0) {
2159 sched
= ZS_SCHED_FSS
;
2165 zsd_get_zone_rctl_limit(char *name
)
2169 rblk
= (rctlblk_t
*)alloca(rctlblk_size());
2170 if (getrctl(name
, NULL
, rblk
, RCTL_FIRST
)
2172 return (ZS_LIMIT_NONE
);
2174 return (rctlblk_get_value(rblk
));
2178 zsd_get_zone_rctl_usage(char *name
)
2182 rblk
= (rctlblk_t
*)alloca(rctlblk_size());
2183 if (getrctl(name
, NULL
, rblk
, RCTL_USAGE
)
2187 return (rctlblk_get_value(rblk
));
2190 #define ZSD_NUM_RCTL_VALS 19
2193 * Fetch the limit information for a zone. This uses zone_enter() as the
2194 * getrctl(2) system call only returns rctl information for the zone of
2198 zsd_get_zone_caps(zsd_ctl_t
*ctl
, zsd_zone_t
*zone
, uint64_t *cpu_shares
,
2199 uint64_t *cpu_cap
, uint64_t *ram_cap
, uint64_t *locked_cap
,
2200 uint64_t *vm_cap
, uint64_t *processes_cap
, uint64_t *processes
,
2201 uint64_t *lwps_cap
, uint64_t *lwps
, uint64_t *shm_cap
, uint64_t *shm
,
2202 uint64_t *shmids_cap
, uint64_t *shmids
, uint64_t *semids_cap
,
2203 uint64_t *semids
, uint64_t *msgids_cap
, uint64_t *msgids
,
2204 uint64_t *lofi_cap
, uint64_t *lofi
, uint_t
*sched
)
2206 int p
[2], pid
, tmpl_fd
, ret
;
2208 char class[PC_CLNMSZ
];
2209 uint64_t vals
[ZSD_NUM_RCTL_VALS
];
2210 zsd_system_t
*sys
= ctl
->zsctl_system
;
2214 /* Treat all caps as no cap on error */
2215 *cpu_shares
= ZS_LIMIT_NONE
;
2216 *cpu_cap
= ZS_LIMIT_NONE
;
2217 *ram_cap
= ZS_LIMIT_NONE
;
2218 *locked_cap
= ZS_LIMIT_NONE
;
2219 *vm_cap
= ZS_LIMIT_NONE
;
2221 *processes_cap
= ZS_LIMIT_NONE
;
2222 *lwps_cap
= ZS_LIMIT_NONE
;
2223 *shm_cap
= ZS_LIMIT_NONE
;
2224 *shmids_cap
= ZS_LIMIT_NONE
;
2225 *semids_cap
= ZS_LIMIT_NONE
;
2226 *msgids_cap
= ZS_LIMIT_NONE
;
2227 *lofi_cap
= ZS_LIMIT_NONE
;
2237 /* Get the ram cap first since it is a zone attr */
2238 ret
= zone_getattr(zone
->zsz_id
, ZONE_ATTR_PHYS_MCAP
,
2239 ram_cap
, sizeof (*ram_cap
));
2240 if (ret
< 0 || *ram_cap
== 0)
2241 *ram_cap
= ZS_LIMIT_NONE
;
2243 /* Get the zone's default scheduling class */
2244 ret
= zone_getattr(zone
->zsz_id
, ZONE_ATTR_SCHED_CLASS
,
2245 class, sizeof (class));
2249 *sched
= zsd_schedname2int(class, 0);
2251 /* rctl caps must be fetched from within the zone */
2255 if ((tmpl_fd
= init_template()) == -1) {
2262 (void) ct_tmpl_clear(tmpl_fd
);
2269 (void) ct_tmpl_clear(tmpl_fd
);
2270 (void) close(tmpl_fd
);
2272 if (zone
->zsz_id
!= getzoneid()) {
2273 if (zone_enter(zone
->zsz_id
) < 0) {
2279 /* Get caps for zone, and write them to zonestatd parent. */
2280 vals
[i
++] = zsd_get_zone_rctl_limit("zone.cpu-shares");
2281 vals
[i
++] = zsd_get_zone_rctl_limit("zone.cpu-cap");
2282 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-locked-memory");
2283 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-swap");
2284 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-processes");
2285 vals
[i
++] = zsd_get_zone_rctl_usage("zone.max-processes");
2286 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-lwps");
2287 vals
[i
++] = zsd_get_zone_rctl_usage("zone.max-lwps");
2288 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-shm-memory");
2289 vals
[i
++] = zsd_get_zone_rctl_usage("zone.max-shm-memory");
2290 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-shm-ids");
2291 vals
[i
++] = zsd_get_zone_rctl_usage("zone.max-shm-ids");
2292 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-sem-ids");
2293 vals
[i
++] = zsd_get_zone_rctl_usage("zone.max-sem-ids");
2294 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-msg-ids");
2295 vals
[i
++] = zsd_get_zone_rctl_usage("zone.max-msg-ids");
2296 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-lofi");
2297 vals
[i
++] = zsd_get_zone_rctl_usage("zone.max-lofi");
2299 if (write(p
[1], vals
, ZSD_NUM_RCTL_VALS
* sizeof (uint64_t)) !=
2300 ZSD_NUM_RCTL_VALS
* sizeof (uint64_t)) {
2308 if (contract_latest(&ct
) == -1)
2311 (void) ct_tmpl_clear(tmpl_fd
);
2312 (void) close(tmpl_fd
);
2314 while (waitpid(pid
, NULL
, 0) != pid
)
2317 /* Read cap from child in zone */
2318 if (read(p
[0], vals
, ZSD_NUM_RCTL_VALS
* sizeof (uint64_t)) !=
2319 ZSD_NUM_RCTL_VALS
* sizeof (uint64_t)) {
2324 *cpu_shares
= vals
[i
++];
2325 *cpu_cap
= vals
[i
++];
2326 *locked_cap
= vals
[i
++];
2327 *vm_cap
= vals
[i
++];
2328 *processes_cap
= vals
[i
++];
2329 *processes
= vals
[i
++];
2330 *lwps_cap
= vals
[i
++];
2332 *shm_cap
= vals
[i
++];
2334 *shmids_cap
= vals
[i
++];
2335 *shmids
= vals
[i
++];
2336 *semids_cap
= vals
[i
++];
2337 *semids
= vals
[i
++];
2338 *msgids_cap
= vals
[i
++];
2339 *msgids
= vals
[i
++];
2340 *lofi_cap
= vals
[i
++];
2343 /* Interpret maximum values as no cap */
2344 if (*cpu_cap
== UINT32_MAX
|| *cpu_cap
== 0)
2345 *cpu_cap
= ZS_LIMIT_NONE
;
2346 if (*processes_cap
== sys
->zss_processes_max
)
2347 *processes_cap
= ZS_LIMIT_NONE
;
2348 if (*lwps_cap
== sys
->zss_lwps_max
)
2349 *lwps_cap
= ZS_LIMIT_NONE
;
2350 if (*shm_cap
== sys
->zss_shm_max
)
2351 *shm_cap
= ZS_LIMIT_NONE
;
2352 if (*shmids_cap
== sys
->zss_shmids_max
)
2353 *shmids_cap
= ZS_LIMIT_NONE
;
2354 if (*semids_cap
== sys
->zss_semids_max
)
2355 *semids_cap
= ZS_LIMIT_NONE
;
2356 if (*msgids_cap
== sys
->zss_msgids_max
)
2357 *msgids_cap
= ZS_LIMIT_NONE
;
2358 if (*lofi_cap
== sys
->zss_lofi_max
)
2359 *lofi_cap
= ZS_LIMIT_NONE
;
2364 (void) ct_tmpl_clear(tmpl_fd
);
2365 (void) close(tmpl_fd
);
2366 (void) contract_abandon_id(ct
);
2371 /* Update the current list of running zones */
2373 zsd_refresh_zones(zsd_ctl_t
*ctl
)
2380 uint64_t cpu_shares
;
2383 uint64_t locked_cap
;
2385 uint64_t processes_cap
;
2391 uint64_t shmids_cap
;
2393 uint64_t semids_cap
;
2395 uint64_t msgids_cap
;
2400 char zonename
[ZS_ZONENAME_MAX
];
2401 char poolname
[ZS_POOLNAME_MAX
];
2402 char psetname
[ZS_PSETNAME_MAX
];
2407 /* Get the current list of running zones */
2409 old
= num
= ctl
->zsctl_zone_ncache
;
2410 (void) zone_list(ctl
->zsctl_zone_cache
, &num
);
2413 if ((cache
= (zoneid_t
*)realloc(ctl
->zsctl_zone_cache
,
2414 (num
) * sizeof (zoneid_t
))) != NULL
) {
2415 ctl
->zsctl_zone_ncache
= num
;
2416 ctl
->zsctl_zone_cache
= cache
;
2418 /* Could not allocate to get new zone list. Give up */
2423 zsd_mark_zones_start(ctl
);
2425 for (i
= 0; i
< num
; i
++) {
2427 ret
= getzonenamebyid(ctl
->zsctl_zone_cache
[i
],
2428 zonename
, sizeof (zonename
));
2432 zone
= zsd_lookup_insert_zone(ctl
, zonename
,
2433 ctl
->zsctl_zone_cache
[i
]);
2435 ret
= zone_getattr(ctl
->zsctl_zone_cache
[i
], ZONE_ATTR_FLAGS
,
2436 &flags
, sizeof (flags
));
2440 if (flags
& ZF_NET_EXCL
)
2441 iptype
= ZS_IPTYPE_EXCLUSIVE
;
2443 iptype
= ZS_IPTYPE_SHARED
;
2445 zsd_get_zone_pool_pset(ctl
, zone
, poolname
, sizeof (poolname
),
2446 psetname
, sizeof (psetname
), &cputype
);
2448 if (zsd_get_zone_caps(ctl
, zone
, &cpu_shares
, &cpu_cap
,
2449 &ram_cap
, &locked_cap
, &vm_cap
, &processes_cap
, &processes
,
2450 &lwps_cap
, &lwps
, &shm_cap
, &shm
, &shmids_cap
, &shmids
,
2451 &semids_cap
, &semids
, &msgids_cap
, &msgids
, &lofi_cap
,
2452 &lofi
, &sched
) != 0)
2455 zsd_mark_zone_found(ctl
, zone
, cpu_shares
, cpu_cap
, ram_cap
,
2456 locked_cap
, vm_cap
, processes_cap
, processes
, lwps_cap
,
2457 lwps
, shm_cap
, shm
, shmids_cap
, shmids
, semids_cap
,
2458 semids
, msgids_cap
, msgids
, lofi_cap
, lofi
, poolname
,
2459 psetname
, sched
, cputype
, iptype
);
2463 /* Fetch the details of a process from its psinfo_t */
2465 zsd_get_proc_info(zsd_ctl_t
*ctl
, psinfo_t
*psinfo
, psetid_t
*psetid
,
2466 psetid_t
*prev_psetid
, zoneid_t
*zoneid
, zoneid_t
*prev_zoneid
,
2467 timestruc_t
*delta
, uint_t
*sched
)
2472 /* Get cached data for proc */
2473 proc
= &(ctl
->zsctl_proc_array
[psinfo
->pr_pid
]);
2474 *psetid
= psinfo
->pr_lwp
.pr_bindpset
;
2476 if (proc
->zspr_psetid
== ZS_PSET_ERROR
)
2477 *prev_psetid
= *psetid
;
2479 *prev_psetid
= proc
->zspr_psetid
;
2481 *zoneid
= psinfo
->pr_zoneid
;
2482 if (proc
->zspr_zoneid
== -1)
2483 *prev_zoneid
= *zoneid
;
2485 *prev_zoneid
= proc
->zspr_zoneid
;
2487 TIMESTRUC_DELTA(d
, psinfo
->pr_time
, proc
->zspr_usage
);
2490 *sched
= zsd_schedname2int(psinfo
->pr_lwp
.pr_clname
,
2491 psinfo
->pr_lwp
.pr_pri
);
2493 /* Update cached data for proc */
2494 proc
->zspr_psetid
= psinfo
->pr_lwp
.pr_bindpset
;
2495 proc
->zspr_zoneid
= psinfo
->pr_zoneid
;
2496 proc
->zspr_sched
= *sched
;
2497 proc
->zspr_usage
.tv_sec
= psinfo
->pr_time
.tv_sec
;
2498 proc
->zspr_usage
.tv_nsec
= psinfo
->pr_time
.tv_nsec
;
2499 proc
->zspr_ppid
= psinfo
->pr_ppid
;
2503 * Reset the known cpu usage of a process. This is done after a process
2504 * exits so that if the pid is recycled, data from its previous life is
2508 zsd_flush_proc_info(zsd_proc_t
*proc
)
2510 proc
->zspr_usage
.tv_sec
= 0;
2511 proc
->zspr_usage
.tv_nsec
= 0;
2515 * Open the current extended accounting file. On initialization, open the
2516 * file as the current file to be used. Otherwise, open the file as the
2517 * next file to use of the current file reaches EOF.
2520 zsd_open_exacct(zsd_ctl_t
*ctl
, boolean_t init
)
2522 int ret
, oret
, state
, trys
= 0, flags
;
2525 struct stat64
*stat
;
2526 char path
[MAXPATHLEN
];
2529 * The accounting file is first opened at the tail. Following
2530 * opens to new accounting files are opened at the head.
2532 if (init
== B_TRUE
) {
2533 flags
= EO_NO_VALID_HDR
| EO_TAIL
;
2534 fd
= &ctl
->zsctl_proc_fd
;
2535 eaf
= &ctl
->zsctl_proc_eaf
;
2536 stat
= &ctl
->zsctl_proc_stat
;
2537 open
= &ctl
->zsctl_proc_open
;
2539 flags
= EO_NO_VALID_HDR
| EO_HEAD
;
2540 fd
= &ctl
->zsctl_proc_fd_next
;
2541 eaf
= &ctl
->zsctl_proc_eaf_next
;
2542 stat
= &ctl
->zsctl_proc_stat_next
;
2543 open
= &ctl
->zsctl_proc_open_next
;
2549 /* open accounting files for cpu consumption */
2550 ret
= acctctl(AC_STATE_GET
| AC_PROC
, &state
, sizeof (state
));
2552 zsd_warn(gettext("Unable to get process accounting state"));
2555 if (state
!= AC_ON
) {
2558 "Unable to enable process accounting"));
2561 (void) zsd_enable_cpu_stats();
2566 ret
= acctctl(AC_FILE_GET
| AC_PROC
, path
, sizeof (path
));
2568 zsd_warn(gettext("Unable to get process accounting file"));
2572 if ((*fd
= open64(path
, O_RDONLY
, 0)) >= 0 &&
2573 (oret
= ea_fdopen(eaf
, *fd
, NULL
, flags
, O_RDONLY
)) == 0)
2574 ret
= fstat64(*fd
, stat
);
2576 if (*fd
< 0 || oret
< 0 || ret
< 0) {
2580 * It is possible the accounting file is momentarily unavailable
2581 * because it is being rolled. Try for up to half a second.
2583 * If failure to open accounting file persists, give up.
2586 (void) ea_close(eaf
);
2591 "Unable to open process accounting file"));
2594 /* wait one millisecond */
2596 ts
.tv_nsec
= NANOSEC
/ 1000;
2597 (void) nanosleep(&ts
, NULL
);
2611 * Walk /proc and charge each process to its zone and processor set.
2612 * Then read exacct data for exited processes, and charge them as well.
2615 zsd_refresh_procs(zsd_ctl_t
*ctl
, boolean_t init
)
2618 struct dirent
*dent
, *dresult
;
2621 zsd_proc_t
*proc
, *pproc
, *tmp
, *next
;
2622 list_t pplist
, plist
;
2623 zsd_zone_t
*zone
, *prev_zone
;
2624 zsd_pset_t
*pset
, *prev_pset
;
2625 psetid_t psetid
, prev_psetid
;
2626 zoneid_t zoneid
, prev_zoneid
;
2627 zsd_pset_usage_t
*usage
, *prev_usage
;
2628 char path
[MAXPATHLEN
];
2631 ea_object_t pobject
;
2632 boolean_t hrtime_expired
= B_FALSE
;
2633 struct timeval interval_end
;
2635 timestruc_t delta
, d1
, d2
;
2639 * Get the current accounting file. The current accounting file
2640 * may be different than the file in use, as the accounting file
2641 * may have been rolled, or manually changed by an admin.
2643 ret
= zsd_open_exacct(ctl
, init
);
2645 zsd_warn(gettext("Unable to track process accounting"));
2650 * Mark the current time as the interval end time. Don't track
2651 * processes that exit after this time.
2653 (void) gettimeofday(&interval_end
, NULL
);
2655 dir
= opendir("/proc");
2657 zsd_warn(gettext("Unable to open /proc"));
2661 dent
= ctl
->zsctl_procfs_dent
;
2663 (void) memset(dent
, 0, ctl
->zsctl_procfs_dent_size
);
2665 /* Walk all processes and compute each zone's usage on each pset. */
2666 while (readdir_r(dir
, dent
, &dresult
) == 0) {
2668 if (strcmp(dent
->d_name
, ".") == 0 ||
2669 strcmp(dent
->d_name
, "..") == 0)
2672 (void) snprintf(path
, sizeof (path
), "/proc/%s/psinfo",
2675 fd
= open(path
, O_RDONLY
);
2679 if (read(fd
, &psinfo
, sizeof (psinfo
)) != sizeof (psinfo
)) {
2685 zsd_get_proc_info(ctl
, &psinfo
, &psetid
, &prev_psetid
,
2686 &zoneid
, &prev_zoneid
, &delta
, &sched
);
2688 d1
.tv_sec
= delta
.tv_sec
/ 2;
2689 d1
.tv_nsec
= delta
.tv_nsec
/ 2;
2690 d2
.tv_sec
= (delta
.tv_sec
/ 2) + (delta
.tv_sec
% 2);
2691 d2
.tv_nsec
= (delta
.tv_nsec
/ 2) + (delta
.tv_nsec
% 2);
2693 /* Get the zone and pset this process is running in */
2694 zone
= zsd_lookup_zone_byid(ctl
, zoneid
);
2697 pset
= zsd_lookup_pset_byid(ctl
, psetid
);
2700 usage
= zsd_lookup_insert_usage(ctl
, pset
, zone
);
2705 * Get the usage of the previous zone and pset if they were
2708 if (zoneid
!= prev_zoneid
)
2709 prev_zone
= zsd_lookup_zone_byid(ctl
, prev_zoneid
);
2713 if (psetid
!= prev_psetid
)
2714 prev_pset
= zsd_lookup_pset_byid(ctl
, prev_psetid
);
2719 if (prev_zone
!= NULL
|| prev_pset
!= NULL
) {
2720 if (prev_zone
== NULL
)
2722 if (prev_pset
== NULL
)
2725 prev_usage
= zsd_lookup_insert_usage(ctl
, prev_pset
,
2729 /* Update the usage with the processes info */
2730 if (prev_usage
== NULL
) {
2731 zsd_mark_pset_usage_found(usage
, sched
);
2733 zsd_mark_pset_usage_found(usage
, sched
);
2734 zsd_mark_pset_usage_found(prev_usage
, sched
);
2738 * First time around is just to get a starting point. All
2739 * usages will be zero.
2744 if (prev_usage
== NULL
) {
2745 zsd_add_usage(ctl
, usage
, &delta
);
2747 zsd_add_usage(ctl
, usage
, &d1
);
2748 zsd_add_usage(ctl
, prev_usage
, &d2
);
2751 (void) closedir(dir
);
2754 * No need to collect exited proc data on initialization. Just
2755 * caching the usage of the known processes to get a zero starting
2762 * Add accounting records to account for processes which have
2765 list_create(&plist
, sizeof (zsd_proc_t
),
2766 offsetof(zsd_proc_t
, zspr_next
));
2767 list_create(&pplist
, sizeof (zsd_proc_t
),
2768 offsetof(zsd_proc_t
, zspr_next
));
2773 timestruc_t user
, sys
, proc_usage
;
2777 bzero(&object
, sizeof (object
));
2782 ret
= ea_get_object(&ctl
->zsctl_proc_eaf
, &object
);
2783 if (ret
== EO_ERROR
) {
2784 if (ea_error() == EXR_EOF
) {
2786 struct stat64
*stat
;
2787 struct stat64
*stat_next
;
2790 * See if the next accounting file is the
2791 * same as the current accounting file.
2793 stat
= &(ctl
->zsctl_proc_stat
);
2794 stat_next
= &(ctl
->zsctl_proc_stat_next
);
2795 if (stat
->st_ino
== stat_next
->st_ino
&&
2796 stat
->st_dev
== stat_next
->st_dev
) {
2798 * End of current accounting file is
2799 * reached, so finished. Clear EOF
2800 * bit for next time around.
2802 ea_clear(&ctl
->zsctl_proc_eaf
);
2806 * Accounting file has changed. Move
2807 * to current accounting file.
2809 (void) ea_close(&ctl
->zsctl_proc_eaf
);
2811 ctl
->zsctl_proc_fd
=
2812 ctl
->zsctl_proc_fd_next
;
2813 ctl
->zsctl_proc_eaf
=
2814 ctl
->zsctl_proc_eaf_next
;
2815 ctl
->zsctl_proc_stat
=
2816 ctl
->zsctl_proc_stat_next
;
2818 ctl
->zsctl_proc_fd_next
= -1;
2819 ctl
->zsctl_proc_open_next
= 0;
2824 * Other accounting error. Give up on
2830 /* Skip if not a process group */
2831 if ((object
.eo_catalog
& EXT_TYPE_MASK
) != EXT_GROUP
||
2832 (object
.eo_catalog
& EXD_DATA_MASK
) != EXD_GROUP_PROC
) {
2833 (void) ea_free_item(&object
, EUP_ALLOC
);
2837 /* The process group entry should be complete */
2838 while (numfound
< 9) {
2839 bzero(&pobject
, sizeof (pobject
));
2840 ret
= ea_get_object(&ctl
->zsctl_proc_eaf
,
2843 (void) ea_free_item(&object
, EUP_ALLOC
);
2845 "unable to get process accounting data");
2848 /* Next entries should be process data */
2849 if ((pobject
.eo_catalog
& EXT_TYPE_MASK
) ==
2851 (void) ea_free_item(&object
, EUP_ALLOC
);
2852 (void) ea_free_item(&pobject
, EUP_ALLOC
);
2854 "process data of wrong type");
2857 switch (pobject
.eo_catalog
& EXD_DATA_MASK
) {
2859 pid
= pobject
.eo_item
.ei_uint32
;
2860 proc
= &(ctl
->zsctl_proc_array
[pid
]);
2862 * This process should not be currently in
2863 * the list of processes to process.
2865 assert(!list_link_active(&proc
->zspr_next
));
2868 case EXD_PROC_ANCPID
:
2869 ppid
= pobject
.eo_item
.ei_uint32
;
2870 pproc
= &(ctl
->zsctl_proc_array
[ppid
]);
2873 case EXD_PROC_ZONENAME
:
2874 zone
= zsd_lookup_zone(ctl
,
2875 pobject
.eo_item
.ei_string
, -1);
2878 case EXD_PROC_CPU_USER_SEC
:
2880 pobject
.eo_item
.ei_uint64
;
2883 case EXD_PROC_CPU_USER_NSEC
:
2885 pobject
.eo_item
.ei_uint64
;
2888 case EXD_PROC_CPU_SYS_SEC
:
2890 pobject
.eo_item
.ei_uint64
;
2893 case EXD_PROC_CPU_SYS_NSEC
:
2895 pobject
.eo_item
.ei_uint64
;
2898 case EXD_PROC_FINISH_SEC
:
2900 pobject
.eo_item
.ei_uint64
;
2903 case EXD_PROC_FINISH_NSEC
:
2905 pobject
.eo_item
.ei_uint64
;
2909 (void) ea_free_item(&pobject
, EUP_ALLOC
);
2911 (void) ea_free_item(&object
, EUP_ALLOC
);
2912 if (numfound
!= 9) {
2914 "Malformed process accounting entry found"));
2918 if (finish
.tv_sec
> interval_end
.tv_sec
||
2919 (finish
.tv_sec
== interval_end
.tv_sec
&&
2920 finish
.tv_nsec
> (interval_end
.tv_usec
* 1000)))
2921 hrtime_expired
= B_TRUE
;
2924 * Try to identify the zone and pset to which this
2925 * exited process belongs.
2930 /* Save proc info */
2931 proc
->zspr_ppid
= ppid
;
2932 proc
->zspr_zoneid
= zone
->zsz_id
;
2934 prev_psetid
= ZS_PSET_ERROR
;
2938 * The following tries to deduce the processes pset.
2940 * First choose pset and sched using cached value from the
2941 * most recent time the process has been seen.
2943 * pset and sched can change across zone_enter, so make sure
2944 * most recent sighting of this process was in the same
2945 * zone before using most recent known value.
2947 * If there is no known value, use value of processes
2948 * parent. If parent is unknown, walk parents until a known
2951 * If no parent in the zone is found, use the zone's default
2952 * pset and scheduling class.
2954 if (proc
->zspr_psetid
!= ZS_PSET_ERROR
) {
2955 prev_psetid
= proc
->zspr_psetid
;
2956 pset
= zsd_lookup_pset_byid(ctl
, prev_psetid
);
2957 sched
= proc
->zspr_sched
;
2958 } else if (pproc
->zspr_zoneid
== zone
->zsz_id
&&
2959 pproc
->zspr_psetid
!= ZS_PSET_ERROR
) {
2960 prev_psetid
= pproc
->zspr_psetid
;
2961 pset
= zsd_lookup_pset_byid(ctl
, prev_psetid
);
2962 sched
= pproc
->zspr_sched
;
2967 * Process or processes parent has never been seen.
2968 * Save to deduce a known parent later.
2971 TIMESTRUC_ADD_TIMESTRUC(proc_usage
, user
);
2972 TIMESTRUC_DELTA(delta
, proc_usage
,
2974 proc
->zspr_usage
= delta
;
2975 list_insert_tail(&plist
, proc
);
2979 /* Add the zone's usage to the pset */
2980 usage
= zsd_lookup_insert_usage(ctl
, pset
, zone
);
2984 zsd_mark_pset_usage_found(usage
, sched
);
2986 /* compute the usage to add for the exited proc */
2988 TIMESTRUC_ADD_TIMESTRUC(proc_usage
, user
);
2989 TIMESTRUC_DELTA(delta
, proc_usage
,
2992 zsd_add_usage(ctl
, usage
, &delta
);
2994 zsd_flush_proc_info(proc
);
2996 if (hrtime_expired
== B_TRUE
)
3000 * close next accounting file.
3002 if (ctl
->zsctl_proc_open_next
) {
3004 &ctl
->zsctl_proc_eaf_next
);
3005 ctl
->zsctl_proc_open_next
= 0;
3006 ctl
->zsctl_proc_fd_next
= -1;
3009 /* For the remaining processes, use pset and sched of a known parent */
3010 proc
= list_head(&plist
);
3011 while (proc
!= NULL
) {
3014 if (next
->zspr_ppid
== 0 || next
->zspr_ppid
== -1) {
3016 * Kernel process, or parent is unknown, skip
3017 * process, remove from process list.
3020 proc
= list_next(&plist
, proc
);
3021 list_link_init(&tmp
->zspr_next
);
3024 pproc
= &(ctl
->zsctl_proc_array
[next
->zspr_ppid
]);
3025 if (pproc
->zspr_zoneid
!= proc
->zspr_zoneid
) {
3027 * Parent in different zone. Save process and
3028 * use zone's default pset and sched below
3031 proc
= list_next(&plist
, proc
);
3032 list_remove(&plist
, tmp
);
3033 list_insert_tail(&pplist
, tmp
);
3036 /* Parent has unknown pset, Search parent's parent */
3037 if (pproc
->zspr_psetid
== ZS_PSET_ERROR
) {
3041 /* Found parent with known pset. Use its info */
3042 proc
->zspr_psetid
= pproc
->zspr_psetid
;
3043 proc
->zspr_sched
= pproc
->zspr_sched
;
3044 next
->zspr_psetid
= pproc
->zspr_psetid
;
3045 next
->zspr_sched
= pproc
->zspr_sched
;
3046 zone
= zsd_lookup_zone_byid(ctl
,
3050 proc
= list_next(&plist
, proc
);
3051 list_remove(&plist
, tmp
);
3052 list_link_init(&tmp
->zspr_next
);
3055 pset
= zsd_lookup_pset_byid(ctl
,
3059 proc
= list_next(&plist
, proc
);
3060 list_remove(&plist
, tmp
);
3061 list_link_init(&tmp
->zspr_next
);
3064 /* Add the zone's usage to the pset */
3065 usage
= zsd_lookup_insert_usage(ctl
, pset
, zone
);
3066 if (usage
== NULL
) {
3068 proc
= list_next(&plist
, proc
);
3069 list_remove(&plist
, tmp
);
3070 list_link_init(&tmp
->zspr_next
);
3073 zsd_mark_pset_usage_found(usage
, proc
->zspr_sched
);
3074 zsd_add_usage(ctl
, usage
, &proc
->zspr_usage
);
3075 zsd_flush_proc_info(proc
);
3077 proc
= list_next(&plist
, proc
);
3078 list_remove(&plist
, tmp
);
3079 list_link_init(&tmp
->zspr_next
);
3084 * Process has never been seen. Using zone info to
3085 * determine pset and scheduling class.
3087 proc
= list_head(&pplist
);
3088 while (proc
!= NULL
) {
3090 zone
= zsd_lookup_zone_byid(ctl
, proc
->zspr_zoneid
);
3093 if (zone
->zsz_psetid
!= ZS_PSET_ERROR
&&
3094 zone
->zsz_psetid
!= ZS_PSET_MULTI
) {
3095 prev_psetid
= zone
->zsz_psetid
;
3096 pset
= zsd_lookup_pset_byid(ctl
, prev_psetid
);
3098 pset
= zsd_lookup_pset(ctl
, zone
->zsz_pset
, -1);
3100 prev_psetid
= pset
->zsp_id
;
3105 sched
= zone
->zsz_scheds
;
3107 * Ignore FX high scheduling class if it is not the
3108 * only scheduling class in the zone.
3110 if (sched
!= ZS_SCHED_FX_60
)
3111 sched
&= (~ZS_SCHED_FX_60
);
3113 * If more than one scheduling class has been found
3114 * in the zone, use zone's default scheduling class for
3117 if ((sched
& (sched
- 1)) != 0)
3118 sched
= zone
->zsz_default_sched
;
3120 /* Add the zone's usage to the pset */
3121 usage
= zsd_lookup_insert_usage(ctl
, pset
, zone
);
3125 zsd_mark_pset_usage_found(usage
, sched
);
3126 zsd_add_usage(ctl
, usage
, &proc
->zspr_usage
);
3129 proc
= list_next(&pplist
, proc
);
3130 zsd_flush_proc_info(tmp
);
3131 list_link_init(&tmp
->zspr_next
);
3136 * Close the next accounting file if we have not transitioned to it
3139 if (ctl
->zsctl_proc_open_next
) {
3140 (void) ea_close(&ctl
->zsctl_proc_eaf_next
);
3141 ctl
->zsctl_proc_open_next
= 0;
3142 ctl
->zsctl_proc_fd_next
= -1;
3147 * getvmusage(2) uses size_t's in the passwd data structure, which differ
3148 * in size for 32bit and 64 bit kernels. Since this is a contracted interface,
3149 * and zonestatd does not necessarily match the kernel's bitness, marshal
3150 * results appropriately.
3153 zsd_getvmusage(zsd_ctl_t
*ctl
, uint_t flags
, time_t age
, zsd_vmusage64_t
*buf
,
3156 zsd_vmusage32_t
*vmu32
;
3157 zsd_vmusage64_t
*vmu64
;
3162 if (ctl
->zsctl_kern_bits
== 32) {
3164 ret
= syscall(SYS_rusagesys
, _RUSAGESYS_GETVMUSAGE
,
3165 flags
, age
, (uintptr_t)buf
, (uintptr_t)&nres32
);
3167 if (ret
== 0 && buf
!= NULL
) {
3169 * An array of vmusage32_t's has been returned.
3170 * Convert it to an array of vmusage64_t's.
3172 vmu32
= (zsd_vmusage32_t
*)buf
;
3173 vmu64
= (zsd_vmusage64_t
*)buf
;
3174 for (i
= nres32
- 1; i
>= 0; i
--) {
3176 vmu64
[i
].vmu_zoneid
= vmu32
[i
].vmu_zoneid
;
3177 vmu64
[i
].vmu_type
= vmu32
[i
].vmu_type
;
3178 vmu64
[i
].vmu_type
= vmu32
[i
].vmu_type
;
3179 vmu64
[i
].vmu_rss_all
= vmu32
[i
].vmu_rss_all
;
3180 vmu64
[i
].vmu_rss_private
=
3181 vmu32
[i
].vmu_rss_private
;
3182 vmu64
[i
].vmu_rss_shared
=
3183 vmu32
[i
].vmu_rss_shared
;
3184 vmu64
[i
].vmu_swap_all
= vmu32
[i
].vmu_swap_all
;
3185 vmu64
[i
].vmu_swap_private
=
3186 vmu32
[i
].vmu_swap_private
;
3187 vmu64
[i
].vmu_swap_shared
=
3188 vmu32
[i
].vmu_swap_shared
;
3194 * kernel is 64 bit, so use 64 bit structures as zonestat
3197 return (syscall(SYS_rusagesys
, _RUSAGESYS_GETVMUSAGE
,
3198 flags
, age
, (uintptr_t)buf
, (uintptr_t)nres
));
3204 * Update the current physical, virtual, and locked memory usage of the
3208 zsd_refresh_memory(zsd_ctl_t
*ctl
, boolean_t init
)
3211 uint64_t phys_total
;
3213 uint64_t phys_zones
;
3214 uint64_t phys_zones_overcount
;
3215 uint64_t phys_zones_extra
;
3216 uint64_t phys_zones_credit
;
3221 uint64_t disk_swap_total
;
3222 uint64_t disk_swap_used
; /* disk swap with contents */
3226 uint64_t arc_size
= 0;
3227 struct anoninfo ani
;
3229 int num_swap_devices
;
3230 struct swaptable
*swt
;
3231 struct swapent
*swent
;
3235 zsd_vmusage64_t
*vmusage
;
3236 uint64_t num_vmusage
;
3245 char kstat_name
[KSTAT_STRLEN
];
3252 sys
= ctl
->zsctl_system
;
3254 /* interrogate swap devices to find the amount of disk swap */
3256 num_swap_devices
= swapctl(SC_GETNSWP
, NULL
);
3258 if (num_swap_devices
== 0) {
3259 sys
->zss_swap_total
= disk_swap_total
= 0;
3260 sys
->zss_swap_used
= disk_swap_used
= 0;
3262 goto disk_swap_done
;
3264 /* see if swap table needs to be larger */
3265 if (num_swap_devices
> ctl
->zsctl_swap_cache_num
) {
3266 swt_size
= sizeof (int) +
3267 (num_swap_devices
* sizeof (struct swapent
)) +
3268 (num_swap_devices
* MAXPATHLEN
);
3269 free(ctl
->zsctl_swap_cache
);
3271 swt
= (struct swaptable
*)malloc(swt_size
);
3274 * Could not allocate to get list of swap devices.
3275 * Just use data from the most recent read, which will
3276 * be zero if this is the first read.
3278 zsd_warn(gettext("Unable to allocate to determine "
3280 disk_swap_total
= sys
->zss_swap_total
;
3281 disk_swap_used
= sys
->zss_swap_used
;
3282 goto disk_swap_done
;
3284 swent
= swt
->swt_ent
;
3285 path
= (char *)swt
+ (sizeof (int) +
3286 num_swap_devices
* sizeof (swapent_t
));
3287 for (i
= 0; i
< num_swap_devices
; i
++, swent
++) {
3288 swent
->ste_path
= path
;
3291 swt
->swt_n
= num_swap_devices
;
3292 ctl
->zsctl_swap_cache
= swt
;
3293 ctl
->zsctl_swap_cache_size
= swt_size
;
3294 ctl
->zsctl_swap_cache_num
= num_swap_devices
;
3296 num_swap_devices
= swapctl(SC_LIST
, ctl
->zsctl_swap_cache
);
3297 if (num_swap_devices
< 0) {
3298 /* More swap devices have arrived */
3299 if (errno
== ENOMEM
)
3300 goto disk_swap_again
;
3302 zsd_warn(gettext("Unable to determine disk swap devices"));
3303 /* Unexpected error. Use existing data */
3304 disk_swap_total
= sys
->zss_swap_total
;
3305 disk_swap_used
= sys
->zss_swap_used
;
3306 goto disk_swap_done
;
3309 /* add up the disk swap */
3310 disk_swap_total
= 0;
3312 swent
= ctl
->zsctl_swap_cache
->swt_ent
;
3313 for (i
= 0; i
< num_swap_devices
; i
++, swent
++) {
3314 disk_swap_total
+= swent
->ste_pages
;
3315 disk_swap_used
+= (swent
->ste_pages
- swent
->ste_free
);
3317 disk_swap_total
*= ctl
->zsctl_pagesize
;
3318 disk_swap_used
*= ctl
->zsctl_pagesize
;
3320 sys
->zss_swap_total
= disk_swap_total
;
3321 sys
->zss_swap_used
= disk_swap_used
;
3325 /* get system pages kstat */
3327 kstat
= kstat_lookup(ctl
->zsctl_kstat_ctl
, "unix", 0, "system_pages");
3329 zsd_warn(gettext("Unable to lookup system pages kstat"));
3331 kid
= kstat_read(ctl
->zsctl_kstat_ctl
, kstat
, NULL
);
3334 zsd_warn(gettext("Unable to read system pages kstat"));
3337 knp
= kstat_data_lookup(kstat
, "physmem");
3339 zsd_warn(gettext("Unable to read physmem"));
3341 if (knp
->data_type
== KSTAT_DATA_UINT64
)
3342 physmem
= knp
->value
.ui64
;
3343 else if (knp
->data_type
== KSTAT_DATA_UINT32
)
3344 physmem
= knp
->value
.ui32
;
3348 knp
= kstat_data_lookup(kstat
, "pp_kernel");
3350 zsd_warn(gettext("Unable to read pp_kernel"));
3352 if (knp
->data_type
== KSTAT_DATA_UINT64
)
3353 pp_kernel
= knp
->value
.ui64
;
3354 else if (knp
->data_type
== KSTAT_DATA_UINT32
)
3355 pp_kernel
= knp
->value
.ui32
;
3360 physmem
*= ctl
->zsctl_pagesize
;
3361 pp_kernel
*= ctl
->zsctl_pagesize
;
3363 /* get the zfs arc size if available */
3366 kstat
= kstat_lookup(ctl
->zsctl_kstat_ctl
, "zfs", 0, "arcstats");
3368 kid
= kstat_read(ctl
->zsctl_kstat_ctl
, kstat
, NULL
);
3370 knp
= kstat_data_lookup(kstat
, "size");
3372 if (knp
->data_type
== KSTAT_DATA_UINT64
)
3373 arc_size
= knp
->value
.ui64
;
3376 /* Try to get swap information */
3377 if (swapctl(SC_AINFO
, &ani
) < 0) {
3378 zsd_warn(gettext("Unable to get swap info"));
3383 /* getvmusage to get physical memory usage */
3384 vmusage
= ctl
->zsctl_vmusage_cache
;
3385 num_vmusage
= ctl
->zsctl_vmusage_cache_num
;
3387 ret
= zsd_getvmusage(ctl
, VMUSAGE_SYSTEM
| VMUSAGE_ALL_ZONES
, 0,
3388 vmusage
, &num_vmusage
);
3391 /* Unexpected error. Use existing data */
3392 if (errno
!= EOVERFLOW
) {
3394 "Unable to read physical memory usage"));
3395 phys_zones
= sys
->zss_ram_zones
;
3399 /* vmusage results cache too small */
3400 if (num_vmusage
> ctl
->zsctl_vmusage_cache_num
) {
3402 size_t size
= sizeof (zsd_vmusage64_t
) * num_vmusage
;
3404 free(ctl
->zsctl_vmusage_cache
);
3405 vmusage
= (zsd_vmusage64_t
*)malloc(size
);
3406 if (vmusage
== NULL
) {
3407 zsd_warn(gettext("Unable to alloc to determine "
3408 "physical memory usage"));
3409 phys_zones
= sys
->zss_ram_zones
;
3412 ctl
->zsctl_vmusage_cache
= vmusage
;
3413 ctl
->zsctl_vmusage_cache_num
= num_vmusage
;
3417 phys_zones_overcount
= 0;
3419 for (i
= 0; i
< num_vmusage
; i
++) {
3420 switch (vmusage
[i
].vmu_type
) {
3421 case VMUSAGE_SYSTEM
:
3422 /* total pages backing user process mappings */
3423 phys_zones
= sys
->zss_ram_zones
=
3424 vmusage
[i
].vmu_rss_all
;
3428 phys_zones_overcount
+= vmusage
[i
].vmu_rss_all
;
3429 zone
= zsd_lookup_zone_byid(ctl
, vmusage
[i
].vmu_id
);
3431 zone
->zsz_usage_ram
= vmusage
[i
].vmu_rss_all
;
3438 * Figure how much memory was double counted due to text sharing
3439 * between zones. Credit this back so that the sum of the zones
3440 * equals the total zone ram usage;
3442 phys_zones_extra
= phys_zones_overcount
- phys_zones
;
3443 phys_zones_credit
= phys_zones_extra
/ vmu_nzones
;
3447 /* walk the zones to get swap and locked kstats. Fetch ram cap. */
3448 sys
->zss_locked_zones
= 0;
3449 sys
->zss_vm_zones
= 0;
3450 for (zone
= list_head(&ctl
->zsctl_zones
); zone
!= NULL
;
3451 zone
= list_next(&ctl
->zsctl_zones
, zone
)) {
3453 /* If zone halted during interval, show memory usage as none */
3454 if (zone
->zsz_active
== B_FALSE
||
3455 zone
->zsz_deleted
== B_TRUE
) {
3456 zone
->zsz_usage_ram
= 0;
3457 zone
->zsz_usage_vm
= 0;
3458 zone
->zsz_usage_locked
= 0;
3462 if (phys_zones_credit
> 0) {
3463 if (zone
->zsz_usage_ram
> phys_zones_credit
) {
3464 zone
->zsz_usage_ram
-= phys_zones_credit
;
3468 * Get zone's swap usage. Since zone could have halted,
3469 * treats as zero if cannot read
3471 zone
->zsz_usage_vm
= 0;
3472 (void) snprintf(kstat_name
, sizeof (kstat_name
),
3473 "swapresv_zone_%d", zone
->zsz_id
);
3475 kstat
= kstat_lookup(ctl
->zsctl_kstat_ctl
, "caps",
3476 zone
->zsz_id
, kstat_name
);
3478 kid
= kstat_read(ctl
->zsctl_kstat_ctl
, kstat
, NULL
);
3480 knp
= kstat_data_lookup(kstat
, "usage");
3482 knp
->data_type
== KSTAT_DATA_UINT64
) {
3483 zone
->zsz_usage_vm
= knp
->value
.ui64
;
3484 sys
->zss_vm_zones
+= knp
->value
.ui64
;
3488 * Get zone's locked usage. Since zone could have halted,
3489 * treats as zero if cannot read
3491 zone
->zsz_usage_locked
= 0;
3492 (void) snprintf(kstat_name
, sizeof (kstat_name
),
3493 "lockedmem_zone_%d", zone
->zsz_id
);
3495 kstat
= kstat_lookup(ctl
->zsctl_kstat_ctl
, "caps",
3496 zone
->zsz_id
, kstat_name
);
3498 kid
= kstat_read(ctl
->zsctl_kstat_ctl
, kstat
, NULL
);
3500 knp
= kstat_data_lookup(kstat
, "usage");
3502 knp
->data_type
== KSTAT_DATA_UINT64
) {
3503 zone
->zsz_usage_locked
= knp
->value
.ui64
;
3505 * Since locked memory accounting for zones
3506 * can double count ddi locked memory, cap each
3507 * zone's locked usage at its ram usage.
3509 if (zone
->zsz_usage_locked
>
3510 zone
->zsz_usage_ram
)
3511 zone
->zsz_usage_locked
=
3512 zone
->zsz_usage_ram
;
3513 sys
->zss_locked_zones
+=
3514 zone
->zsz_usage_locked
;
3520 sysconf(_SC_PHYS_PAGES
) * ctl
->zsctl_pagesize
;
3522 phys_used
= (sysconf(_SC_PHYS_PAGES
) - sysconf(_SC_AVPHYS_PAGES
))
3523 * ctl
->zsctl_pagesize
;
3525 /* Compute remaining statistics */
3526 sys
->zss_ram_total
= phys_total
;
3527 sys
->zss_ram_zones
= phys_zones
;
3528 sys
->zss_ram_kern
= phys_used
- phys_zones
- arc_size
;
3531 * The total for kernel locked memory should include
3532 * segkp locked pages, but oh well. The arc size is subtracted,
3533 * as that physical memory is reclaimable.
3535 sys
->zss_locked_kern
= pp_kernel
- arc_size
;
3536 /* Add memory used by kernel startup and obp to kernel locked */
3537 if ((phys_total
- physmem
) > 0)
3538 sys
->zss_locked_kern
+= phys_total
- physmem
;
3541 * Add in the portion of (RAM+DISK) that is not available as swap,
3542 * and consider it swap used by the kernel.
3544 sys
->zss_vm_total
= phys_total
+ disk_swap_total
;
3545 vm_free
= (ani
.ani_max
- ani
.ani_resv
) * ctl
->zsctl_pagesize
;
3546 vm_used
= sys
->zss_vm_total
- vm_free
;
3547 sys
->zss_vm_kern
= vm_used
- sys
->zss_vm_zones
- arc_size
;
3551 * Charge each cpu's usage to its processor sets. Also add the cpu's total
3552 * time to each zone using the processor set. This tracks the maximum
3553 * amount of cpu time that a zone could have used.
3556 zsd_refresh_cpu_stats(zsd_ctl_t
*ctl
, boolean_t init
)
3560 zsd_pset_usage_t
*usage
;
3562 zsd_cpu_t
*cpu_next
;
3568 /* Update the per-cpu kstat data */
3569 cpu_next
= list_head(&ctl
->zsctl_cpus
);
3570 while (cpu_next
!= NULL
) {
3572 cpu_next
= list_next(&ctl
->zsctl_cpus
, cpu
);
3573 zsd_update_cpu_stats(ctl
, cpu
);
3575 /* Update the elapsed real time */
3576 hrtime
= gethrtime();
3578 /* first time around, store hrtime for future comparision */
3579 ctl
->zsctl_hrtime
= hrtime
;
3580 ctl
->zsctl_hrtime_prev
= hrtime
;
3583 /* Compute increase in hrtime since the most recent read */
3584 ctl
->zsctl_hrtime_prev
= ctl
->zsctl_hrtime
;
3585 ctl
->zsctl_hrtime
= hrtime
;
3586 if ((hrtime
= hrtime
- ctl
->zsctl_hrtime_prev
) > 0)
3587 TIMESTRUC_ADD_NANOSEC(ctl
->zsctl_hrtime_total
, hrtime
);
3590 /* On initialization, all psets have zero time */
3594 for (pset
= list_head(&ctl
->zsctl_psets
); pset
!= NULL
;
3595 pset
= list_next(&ctl
->zsctl_psets
, pset
)) {
3597 if (pset
->zsp_active
== B_FALSE
) {
3598 zsd_warn(gettext("Internal error,inactive pset found"));
3602 /* sum total used time for pset */
3605 TIMESTRUC_ADD_TIMESTRUC(ts
, pset
->zsp_intr
);
3606 TIMESTRUC_ADD_TIMESTRUC(ts
, pset
->zsp_kern
);
3607 TIMESTRUC_ADD_TIMESTRUC(ts
, pset
->zsp_user
);
3608 /* kernel time in pset is total time minus zone time */
3609 TIMESTRUC_DELTA(pset
->zsp_usage_kern
, ts
,
3610 pset
->zsp_usage_zones
);
3611 if (pset
->zsp_usage_kern
.tv_sec
< 0 ||
3612 pset
->zsp_usage_kern
.tv_nsec
< 0) {
3613 pset
->zsp_usage_kern
.tv_sec
= 0;
3614 pset
->zsp_usage_kern
.tv_nsec
= 0;
3616 /* Total pset elapsed time is used time plus idle time */
3617 TIMESTRUC_ADD_TIMESTRUC(ts
, pset
->zsp_idle
);
3619 TIMESTRUC_DELTA(delta
, ts
, pset
->zsp_total_time
);
3621 for (usage
= list_head(&pset
->zsp_usage_list
); usage
!= NULL
;
3622 usage
= list_next(&pset
->zsp_usage_list
, usage
)) {
3624 zone
= usage
->zsu_zone
;
3625 if (usage
->zsu_cpu_shares
!= ZS_LIMIT_NONE
&&
3626 usage
->zsu_cpu_shares
!= ZS_SHARES_UNLIMITED
&&
3627 usage
->zsu_cpu_shares
!= 0) {
3629 * Figure out how many nanoseconds of share time
3630 * to give to the zone
3632 hrtime
= delta
.tv_sec
;
3634 hrtime
+= delta
.tv_nsec
;
3635 hrtime
*= usage
->zsu_cpu_shares
;
3636 hrtime
/= pset
->zsp_cpu_shares
;
3637 TIMESTRUC_ADD_NANOSEC(zone
->zsz_share_time
,
3640 /* Add pset time to each zone using pset */
3641 TIMESTRUC_ADD_TIMESTRUC(zone
->zsz_pset_time
, delta
);
3643 zone
->zsz_cpus_online
+= pset
->zsp_online
;
3645 pset
->zsp_total_time
= ts
;
3648 for (zone
= list_head(&ctl
->zsctl_zones
); zone
!= NULL
;
3649 zone
= list_next(&ctl
->zsctl_zones
, zone
)) {
3651 /* update cpu cap tracking if the zone has a cpu cap */
3652 if (zone
->zsz_cpu_cap
!= ZS_LIMIT_NONE
) {
3655 elapsed
= ctl
->zsctl_hrtime
- ctl
->zsctl_hrtime_prev
;
3656 elapsed
*= zone
->zsz_cpu_cap
;
3657 elapsed
= elapsed
/ 100;
3658 TIMESTRUC_ADD_NANOSEC(zone
->zsz_cap_time
, elapsed
);
3661 sys
= ctl
->zsctl_system
;
3664 TIMESTRUC_ADD_TIMESTRUC(ts
, sys
->zss_intr
);
3665 TIMESTRUC_ADD_TIMESTRUC(ts
, sys
->zss_kern
);
3666 TIMESTRUC_ADD_TIMESTRUC(ts
, sys
->zss_user
);
3668 /* kernel time in pset is total time minus zone time */
3669 TIMESTRUC_DELTA(sys
->zss_cpu_usage_kern
, ts
,
3670 sys
->zss_cpu_usage_zones
);
3671 if (sys
->zss_cpu_usage_kern
.tv_sec
< 0 ||
3672 sys
->zss_cpu_usage_kern
.tv_nsec
< 0) {
3673 sys
->zss_cpu_usage_kern
.tv_sec
= 0;
3674 sys
->zss_cpu_usage_kern
.tv_nsec
= 0;
3676 /* Total pset elapsed time is used time plus idle time */
3677 TIMESTRUC_ADD_TIMESTRUC(ts
, sys
->zss_idle
);
3678 sys
->zss_cpu_total_time
= ts
;
3682 * Saves current usage data to a cache that is read by libzonestat when
3683 * calling zs_usage_read().
3685 * All pointers in the cached data structure are set to NULL. When
3686 * libzonestat reads the cached data, it will set the pointers relative to
3687 * its address space.
3690 zsd_usage_cache_update(zsd_ctl_t
*ctl
)
3692 zs_usage_cache_t
*cache
;
3693 zs_usage_cache_t
*old
;
3698 zs_zone_t
*zone
= NULL
;
3700 zs_pset_t
*pset
= NULL
;
3702 zs_pset_zone_t
*pusage
;
3703 zsd_pset_usage_t
*dpusage
;
3709 sizeof (zs_usage_cache_t
) +
3710 sizeof (zs_usage_t
) +
3711 sizeof (zs_system_t
) +
3712 sizeof (zs_zone_t
) * ctl
->zsctl_nzones
+
3713 sizeof (zs_pset_t
) * ctl
->zsctl_npsets
+
3714 sizeof (zs_pset_zone_t
) * ctl
->zsctl_npset_usages
;
3716 cache
= (zs_usage_cache_t
*)malloc(size
);
3717 if (cache
== NULL
) {
3718 zsd_warn(gettext("Unable to allocate usage cache\n"));
3722 next
= (char *)cache
;
3723 cache
->zsuc_size
= size
- sizeof (zs_usage_cache_t
);
3724 next
+= sizeof (zs_usage_cache_t
);
3727 usage
= cache
->zsuc_usage
= (zs_usage_t
*)next
;
3728 next
+= sizeof (zs_usage_t
);
3729 usage
->zsu_start
= g_start
;
3730 usage
->zsu_hrstart
= g_hrstart
;
3731 usage
->zsu_time
= g_now
;
3732 usage
->zsu_hrtime
= g_hrnow
;
3733 usage
->zsu_nzones
= ctl
->zsctl_nzones
;
3734 usage
->zsu_npsets
= ctl
->zsctl_npsets
;
3735 usage
->zsu_system
= NULL
;
3738 sys
= (zs_system_t
*)next
;
3739 next
+= sizeof (zs_system_t
);
3740 dsys
= ctl
->zsctl_system
;
3741 sys
->zss_ram_total
= dsys
->zss_ram_total
;
3742 sys
->zss_ram_kern
= dsys
->zss_ram_kern
;
3743 sys
->zss_ram_zones
= dsys
->zss_ram_zones
;
3744 sys
->zss_locked_kern
= dsys
->zss_locked_kern
;
3745 sys
->zss_locked_zones
= dsys
->zss_locked_zones
;
3746 sys
->zss_vm_total
= dsys
->zss_vm_total
;
3747 sys
->zss_vm_kern
= dsys
->zss_vm_kern
;
3748 sys
->zss_vm_zones
= dsys
->zss_vm_zones
;
3749 sys
->zss_swap_total
= dsys
->zss_swap_total
;
3750 sys
->zss_swap_used
= dsys
->zss_swap_used
;
3751 sys
->zss_ncpus
= dsys
->zss_ncpus
;
3752 sys
->zss_ncpus_online
= dsys
->zss_ncpus_online
;
3754 sys
->zss_processes_max
= dsys
->zss_maxpid
;
3755 sys
->zss_lwps_max
= dsys
->zss_lwps_max
;
3756 sys
->zss_shm_max
= dsys
->zss_shm_max
;
3757 sys
->zss_shmids_max
= dsys
->zss_shmids_max
;
3758 sys
->zss_semids_max
= dsys
->zss_semids_max
;
3759 sys
->zss_msgids_max
= dsys
->zss_msgids_max
;
3760 sys
->zss_lofi_max
= dsys
->zss_lofi_max
;
3762 sys
->zss_processes
= dsys
->zss_processes
;
3763 sys
->zss_lwps
= dsys
->zss_lwps
;
3764 sys
->zss_shm
= dsys
->zss_shm
;
3765 sys
->zss_shmids
= dsys
->zss_shmids
;
3766 sys
->zss_semids
= dsys
->zss_semids
;
3767 sys
->zss_msgids
= dsys
->zss_msgids
;
3768 sys
->zss_lofi
= dsys
->zss_lofi
;
3770 sys
->zss_cpu_total_time
= dsys
->zss_cpu_total_time
;
3771 sys
->zss_cpu_usage_zones
= dsys
->zss_cpu_usage_zones
;
3772 sys
->zss_cpu_usage_kern
= dsys
->zss_cpu_usage_kern
;
3774 for (i
= 0, dzone
= list_head(&ctl
->zsctl_zones
);
3775 i
< ctl
->zsctl_nzones
;
3776 i
++, dzone
= list_next(&ctl
->zsctl_zones
, dzone
)) {
3778 zone
= (zs_zone_t
*)next
;
3779 next
+= sizeof (zs_zone_t
);
3780 list_link_init(&zone
->zsz_next
);
3781 zone
->zsz_system
= NULL
;
3783 (void) strlcpy(zone
->zsz_name
, dzone
->zsz_name
,
3784 sizeof (zone
->zsz_name
));
3785 (void) strlcpy(zone
->zsz_pool
, dzone
->zsz_pool
,
3786 sizeof (zone
->zsz_pool
));
3787 (void) strlcpy(zone
->zsz_pset
, dzone
->zsz_pset
,
3788 sizeof (zone
->zsz_pset
));
3789 zone
->zsz_id
= dzone
->zsz_id
;
3790 zone
->zsz_cputype
= dzone
->zsz_cputype
;
3791 zone
->zsz_iptype
= dzone
->zsz_iptype
;
3792 zone
->zsz_start
= dzone
->zsz_start
;
3793 zone
->zsz_hrstart
= dzone
->zsz_hrstart
;
3794 zone
->zsz_scheds
= dzone
->zsz_scheds
;
3795 zone
->zsz_cpu_shares
= dzone
->zsz_cpu_shares
;
3796 zone
->zsz_cpu_cap
= dzone
->zsz_cpu_cap
;
3797 zone
->zsz_ram_cap
= dzone
->zsz_ram_cap
;
3798 zone
->zsz_vm_cap
= dzone
->zsz_vm_cap
;
3799 zone
->zsz_locked_cap
= dzone
->zsz_locked_cap
;
3800 zone
->zsz_cpu_usage
= dzone
->zsz_cpu_usage
;
3801 zone
->zsz_cpus_online
= dzone
->zsz_cpus_online
;
3802 zone
->zsz_pset_time
= dzone
->zsz_pset_time
;
3803 zone
->zsz_cap_time
= dzone
->zsz_cap_time
;
3804 zone
->zsz_share_time
= dzone
->zsz_share_time
;
3805 zone
->zsz_usage_ram
= dzone
->zsz_usage_ram
;
3806 zone
->zsz_usage_locked
= dzone
->zsz_usage_locked
;
3807 zone
->zsz_usage_vm
= dzone
->zsz_usage_vm
;
3809 zone
->zsz_processes_cap
= dzone
->zsz_processes_cap
;
3810 zone
->zsz_lwps_cap
= dzone
->zsz_lwps_cap
;
3811 zone
->zsz_shm_cap
= dzone
->zsz_shm_cap
;
3812 zone
->zsz_shmids_cap
= dzone
->zsz_shmids_cap
;
3813 zone
->zsz_semids_cap
= dzone
->zsz_semids_cap
;
3814 zone
->zsz_msgids_cap
= dzone
->zsz_msgids_cap
;
3815 zone
->zsz_lofi_cap
= dzone
->zsz_lofi_cap
;
3817 zone
->zsz_processes
= dzone
->zsz_processes
;
3818 zone
->zsz_lwps
= dzone
->zsz_lwps
;
3819 zone
->zsz_shm
= dzone
->zsz_shm
;
3820 zone
->zsz_shmids
= dzone
->zsz_shmids
;
3821 zone
->zsz_semids
= dzone
->zsz_semids
;
3822 zone
->zsz_msgids
= dzone
->zsz_msgids
;
3823 zone
->zsz_lofi
= dzone
->zsz_lofi
;
3826 for (i
= 0, dpset
= list_head(&ctl
->zsctl_psets
);
3827 i
< ctl
->zsctl_npsets
;
3828 i
++, dpset
= list_next(&ctl
->zsctl_psets
, dpset
)) {
3830 pset
= (zs_pset_t
*)next
;
3831 next
+= sizeof (zs_pset_t
);
3832 list_link_init(&pset
->zsp_next
);
3833 (void) strlcpy(pset
->zsp_name
, dpset
->zsp_name
,
3834 sizeof (pset
->zsp_name
));
3835 pset
->zsp_id
= dpset
->zsp_id
;
3836 pset
->zsp_cputype
= dpset
->zsp_cputype
;
3837 pset
->zsp_start
= dpset
->zsp_start
;
3838 pset
->zsp_hrstart
= dpset
->zsp_hrstart
;
3839 pset
->zsp_online
= dpset
->zsp_online
;
3840 pset
->zsp_size
= dpset
->zsp_size
;
3841 pset
->zsp_min
= dpset
->zsp_min
;
3842 pset
->zsp_max
= dpset
->zsp_max
;
3843 pset
->zsp_importance
= dpset
->zsp_importance
;
3844 pset
->zsp_scheds
= dpset
->zsp_scheds
;
3845 pset
->zsp_cpu_shares
= dpset
->zsp_cpu_shares
;
3846 pset
->zsp_total_time
= dpset
->zsp_total_time
;
3847 pset
->zsp_usage_kern
= dpset
->zsp_usage_kern
;
3848 pset
->zsp_usage_zones
= dpset
->zsp_usage_zones
;
3849 pset
->zsp_nusage
= dpset
->zsp_nusage
;
3850 /* Add pset usages for pset */
3851 for (j
= 0, dpusage
= list_head(&dpset
->zsp_usage_list
);
3852 j
< dpset
->zsp_nusage
;
3853 j
++, dpusage
= list_next(&dpset
->zsp_usage_list
, dpusage
)) {
3855 pusage
= (zs_pset_zone_t
*)next
;
3856 next
+= sizeof (zs_pset_zone_t
);
3857 /* pointers are computed by client */
3858 pusage
->zspz_pset
= NULL
;
3859 pusage
->zspz_zone
= NULL
;
3860 list_link_init(&pusage
->zspz_next
);
3861 pusage
->zspz_zoneid
= dpusage
->zsu_zone
->zsz_id
;
3862 pusage
->zspz_start
= dpusage
->zsu_start
;
3863 pusage
->zspz_hrstart
= dpusage
->zsu_hrstart
;
3864 pusage
->zspz_hrstart
= dpusage
->zsu_hrstart
;
3865 pusage
->zspz_cpu_shares
= dpusage
->zsu_cpu_shares
;
3866 pusage
->zspz_scheds
= dpusage
->zsu_scheds
;
3867 pusage
->zspz_cpu_usage
= dpusage
->zsu_cpu_usage
;
3871 /* Update the current cache pointer */
3872 (void) mutex_lock(&g_usage_cache_lock
);
3873 old
= g_usage_cache
;
3874 cache
->zsuc_ref
= 1;
3875 cache
->zsuc_gen
= g_gen_next
;
3876 usage
->zsu_gen
= g_gen_next
;
3877 usage
->zsu_size
= size
;
3878 g_usage_cache
= cache
;
3881 if (old
->zsuc_ref
== 0)
3885 /* Wake up any clients that are waiting for this calculation */
3886 if (g_usage_cache_kickers
> 0) {
3887 (void) cond_broadcast(&g_usage_cache_wait
);
3889 (void) mutex_unlock(&g_usage_cache_lock
);
3892 static zs_usage_cache_t
*
3893 zsd_usage_cache_hold_locked()
3895 zs_usage_cache_t
*ret
;
3897 ret
= g_usage_cache
;
3903 zsd_usage_cache_rele(zs_usage_cache_t
*cache
)
3905 (void) mutex_lock(&g_usage_cache_lock
);
3907 if (cache
->zsuc_ref
== 0)
3909 (void) mutex_unlock(&g_usage_cache_lock
);
3912 /* Close the handles held by zsd_open() */
3914 zsd_close(zsd_ctl_t
*ctl
)
3918 zsd_pset_usage_t
*usage
;
3922 if (ctl
->zsctl_kstat_ctl
) {
3923 (void) kstat_close(ctl
->zsctl_kstat_ctl
);
3924 ctl
->zsctl_kstat_ctl
= NULL
;
3926 if (ctl
->zsctl_proc_open
) {
3927 (void) ea_close(&ctl
->zsctl_proc_eaf
);
3928 ctl
->zsctl_proc_open
= 0;
3929 ctl
->zsctl_proc_fd
= -1;
3931 if (ctl
->zsctl_pool_conf
) {
3932 if (ctl
->zsctl_pool_status
== POOL_ENABLED
)
3933 (void) pool_conf_close(ctl
->zsctl_pool_conf
);
3934 ctl
->zsctl_pool_status
= POOL_DISABLED
;
3937 while ((zone
= list_head(&ctl
->zsctl_zones
)) != NULL
) {
3938 list_remove(&ctl
->zsctl_zones
, zone
);
3940 ctl
->zsctl_nzones
--;
3943 while ((pset
= list_head(&ctl
->zsctl_psets
)) != NULL
) {
3944 while ((usage
= list_head(&pset
->zsp_usage_list
))
3946 list_remove(&pset
->zsp_usage_list
, usage
);
3947 ctl
->zsctl_npset_usages
--;
3950 list_remove(&ctl
->zsctl_psets
, pset
);
3952 ctl
->zsctl_npsets
--;
3955 /* Release all cpus being tracked */
3956 while (cpu
= list_head(&ctl
->zsctl_cpus
)) {
3957 list_remove(&ctl
->zsctl_cpus
, cpu
);
3959 bzero(cpu
, sizeof (zsd_cpu_t
));
3961 cpu
->zsc_allocated
= B_FALSE
;
3962 cpu
->zsc_psetid
= ZS_PSET_ERROR
;
3963 cpu
->zsc_psetid_prev
= ZS_PSET_ERROR
;
3966 assert(ctl
->zsctl_npset_usages
== 0);
3967 assert(ctl
->zsctl_npsets
== 0);
3968 assert(ctl
->zsctl_nzones
== 0);
3969 (void) zsd_disable_cpu_stats();
3974 * Update the utilization data for all zones and processor sets.
3977 zsd_read(zsd_ctl_t
*ctl
, boolean_t init
, boolean_t do_memory
)
3979 (void) kstat_chain_update(ctl
->zsctl_kstat_ctl
);
3980 (void) gettimeofday(&(ctl
->zsctl_timeofday
), NULL
);
3982 zsd_refresh_system(ctl
);
3985 * Memory calculation is expensive. Only update it on sample
3988 if (do_memory
== B_TRUE
)
3989 zsd_refresh_memory(ctl
, init
);
3990 zsd_refresh_zones(ctl
);
3991 zsd_refresh_psets(ctl
);
3992 zsd_refresh_procs(ctl
, init
);
3993 zsd_refresh_cpu_stats(ctl
, init
);
3996 * Delete objects that no longer exist.
3997 * Pset usages must be deleted first as they point to zone and
4000 zsd_mark_pset_usages_end(ctl
);
4001 zsd_mark_psets_end(ctl
);
4002 zsd_mark_cpus_end(ctl
);
4003 zsd_mark_zones_end(ctl
);
4006 * Save results for clients.
4008 zsd_usage_cache_update(ctl
);
4011 * Roll process accounting file.
4013 (void) zsd_roll_exacct();
4018 * Get the system rctl, which is the upper most limit
4021 zsd_get_system_rctl(char *name
)
4023 rctlblk_t
*rblk
, *rblk_last
;
4025 rblk
= (rctlblk_t
*)alloca(rctlblk_size());
4026 rblk_last
= (rctlblk_t
*)alloca(rctlblk_size());
4028 if (getrctl(name
, NULL
, rblk_last
, RCTL_FIRST
) != 0)
4029 return (ZS_LIMIT_NONE
);
4031 while (getrctl(name
, rblk_last
, rblk
, RCTL_NEXT
) == 0)
4032 (void) bcopy(rblk
, rblk_last
, rctlblk_size());
4034 return (rctlblk_get_value(rblk_last
));
4038 * Open any necessary subsystems for collecting utilization data,
4039 * allocate and initialize data structures, and get initial utilization.
4042 * ENOMEM out of memory
4043 * EINVAL other error
4046 zsd_open(zsd_ctl_t
*ctl
)
4048 zsd_system_t
*system
;
4050 char path
[MAXPATHLEN
];
4052 struct statvfs svfs
;
4058 if (ctl
== NULL
&& (ctl
= (zsd_ctl_t
*)calloc(1,
4059 sizeof (zsd_ctl_t
))) == NULL
) {
4060 zsd_warn(gettext("Out of Memory"));
4064 ctl
->zsctl_proc_fd
= -1;
4067 if (ctl
->zsctl_kstat_ctl
== NULL
&&
4068 (ctl
->zsctl_kstat_ctl
= kstat_open()) == NULL
) {
4070 zsd_warn(gettext("Unable to open kstats"));
4072 if (errno
!= ENOMEM
)
4078 * These are set when the accounting file is opened by
4079 * zsd_update_procs()
4081 ctl
->zsctl_proc_fd
= -1;
4082 ctl
->zsctl_proc_fd_next
= -1;
4083 ctl
->zsctl_proc_open
= 0;
4084 ctl
->zsctl_proc_open_next
= 0;
4087 (void) zsd_enable_cpu_stats();
4089 /* Create structures to track usage */
4090 if (ctl
->zsctl_system
== NULL
&& (ctl
->zsctl_system
= (zsd_system_t
*)
4091 calloc(1, sizeof (zsd_system_t
))) == NULL
) {
4093 zsd_warn(gettext("Out of Memory"));
4097 system
= ctl
->zsctl_system
;
4098 /* get the kernel bitness to know structure layout for getvmusage */
4099 ret
= sysinfo(SI_ARCHITECTURE_64
, path
, sizeof (path
));
4101 ctl
->zsctl_kern_bits
= 32;
4103 ctl
->zsctl_kern_bits
= 64;
4104 ctl
->zsctl_pagesize
= sysconf(_SC_PAGESIZE
);
4106 size
= sysconf(_SC_CPUID_MAX
);
4107 ctl
->zsctl_maxcpuid
= size
;
4108 if (ctl
->zsctl_cpu_array
== NULL
&& (ctl
->zsctl_cpu_array
=
4109 (zsd_cpu_t
*)calloc(size
+ 1, sizeof (zsd_cpu_t
))) == NULL
) {
4110 zsd_warn(gettext("Out of Memory"));
4114 for (i
= 0; i
<= ctl
->zsctl_maxcpuid
; i
++) {
4115 ctl
->zsctl_cpu_array
[i
].zsc_id
= i
;
4116 ctl
->zsctl_cpu_array
[i
].zsc_allocated
= B_FALSE
;
4117 ctl
->zsctl_cpu_array
[i
].zsc_psetid
= ZS_PSET_ERROR
;
4118 ctl
->zsctl_cpu_array
[i
].zsc_psetid_prev
= ZS_PSET_ERROR
;
4120 if (statvfs("/proc", &svfs
) != 0 ||
4121 strcmp("/proc", svfs
.f_fstr
) != 0) {
4122 zsd_warn(gettext("/proc not a procfs filesystem"));
4127 size
= sysconf(_SC_MAXPID
) + 1;
4128 ctl
->zsctl_maxproc
= size
;
4129 if (ctl
->zsctl_proc_array
== NULL
&&
4130 (ctl
->zsctl_proc_array
= (zsd_proc_t
*)calloc(size
,
4131 sizeof (zsd_proc_t
))) == NULL
) {
4132 zsd_warn(gettext("Out of Memory"));
4136 for (i
= 0; i
<= ctl
->zsctl_maxproc
; i
++) {
4137 list_link_init(&(ctl
->zsctl_proc_array
[i
].zspr_next
));
4138 ctl
->zsctl_proc_array
[i
].zspr_psetid
= ZS_PSET_ERROR
;
4139 ctl
->zsctl_proc_array
[i
].zspr_zoneid
= -1;
4140 ctl
->zsctl_proc_array
[i
].zspr_usage
.tv_sec
= 0;
4141 ctl
->zsctl_proc_array
[i
].zspr_usage
.tv_nsec
= 0;
4142 ctl
->zsctl_proc_array
[i
].zspr_ppid
= -1;
4145 list_create(&ctl
->zsctl_zones
, sizeof (zsd_zone_t
),
4146 offsetof(zsd_zone_t
, zsz_next
));
4148 list_create(&ctl
->zsctl_psets
, sizeof (zsd_pset_t
),
4149 offsetof(zsd_pset_t
, zsp_next
));
4151 list_create(&ctl
->zsctl_cpus
, sizeof (zsd_cpu_t
),
4152 offsetof(zsd_cpu_t
, zsc_next
));
4154 pathmax
= pathconf("/proc", _PC_NAME_MAX
);
4156 zsd_warn(gettext("Unable to determine max path of /proc"));
4160 size
= sizeof (struct dirent
) + pathmax
+ 1;
4162 ctl
->zsctl_procfs_dent_size
= size
;
4163 if (ctl
->zsctl_procfs_dent
== NULL
&&
4164 (ctl
->zsctl_procfs_dent
= (struct dirent
*)calloc(1, size
))
4166 zsd_warn(gettext("Out of Memory"));
4171 if (ctl
->zsctl_pool_conf
== NULL
&&
4172 (ctl
->zsctl_pool_conf
= pool_conf_alloc()) == NULL
) {
4173 zsd_warn(gettext("Out of Memory"));
4177 ctl
->zsctl_pool_status
= POOL_DISABLED
;
4178 ctl
->zsctl_pool_changed
= 0;
4180 if (ctl
->zsctl_pool_vals
[0] == NULL
&&
4181 (ctl
->zsctl_pool_vals
[0] = pool_value_alloc()) == NULL
) {
4182 zsd_warn(gettext("Out of Memory"));
4186 if (ctl
->zsctl_pool_vals
[1] == NULL
&&
4187 (ctl
->zsctl_pool_vals
[1] = pool_value_alloc()) == NULL
) {
4188 zsd_warn(gettext("Out of Memory"));
4192 ctl
->zsctl_pool_vals
[2] = NULL
;
4197 system
->zss_maxpid
= size
= sysconf(_SC_MAXPID
);
4198 system
->zss_processes_max
= zsd_get_system_rctl("zone.max-processes");
4199 system
->zss_lwps_max
= zsd_get_system_rctl("zone.max-lwps");
4200 system
->zss_shm_max
= zsd_get_system_rctl("zone.max-shm-memory");
4201 system
->zss_shmids_max
= zsd_get_system_rctl("zone.max-shm-ids");
4202 system
->zss_semids_max
= zsd_get_system_rctl("zone.max-sem-ids");
4203 system
->zss_msgids_max
= zsd_get_system_rctl("zone.max-msg-ids");
4204 system
->zss_lofi_max
= zsd_get_system_rctl("zone.max-lofi");
4208 if (zsd_read(ctl
, B_TRUE
, B_FALSE
) != 0)
4209 zsd_warn(gettext("Reading zone statistics failed"));
4219 /* Copy utilization data to buffer, filtering data if non-global zone. */
4221 zsd_usage_filter(zoneid_t zid
, zs_usage_cache_t
*cache
, zs_usage_t
*usage
,
4225 zs_system_t
*sys
, *csys
;
4226 zs_zone_t
*zone
, *czone
;
4227 zs_pset_t
*pset
, *cpset
;
4228 zs_pset_zone_t
*pz
, *cpz
, *foundpz
;
4229 size_t size
= 0, csize
= 0;
4230 char *start
, *cstart
;
4234 /* Privileged users in the global zone get everything */
4236 cusage
= cache
->zsuc_usage
;
4237 (void) bcopy(cusage
, usage
, cusage
->zsu_size
);
4241 /* Zones just get their own usage */
4242 cusage
= cache
->zsuc_usage
;
4244 start
= (char *)usage
;
4245 cstart
= (char *)cusage
;
4246 size
+= sizeof (zs_usage_t
);
4247 csize
+= sizeof (zs_usage_t
);
4249 usage
->zsu_start
= cusage
->zsu_start
;
4250 usage
->zsu_hrstart
= cusage
->zsu_hrstart
;
4251 usage
->zsu_time
= cusage
->zsu_time
;
4252 usage
->zsu_hrtime
= cusage
->zsu_hrtime
;
4253 usage
->zsu_gen
= cusage
->zsu_gen
;
4254 usage
->zsu_nzones
= 1;
4255 usage
->zsu_npsets
= 0;
4258 sys
= (zs_system_t
*)(start
+ size
);
4260 csys
= (zs_system_t
*)(cstart
+ csize
);
4261 size
+= sizeof (zs_system_t
);
4262 csize
+= sizeof (zs_system_t
);
4264 /* Save system limits but not usage */
4267 sys
->zss_ncpus_online
= 0;
4270 zone
= (zs_zone_t
*)(start
+ size
);
4272 czone
= (zs_zone_t
*)(cstart
+ csize
);
4273 /* Find the matching zone */
4274 for (i
= 0; i
< cusage
->zsu_nzones
; i
++) {
4275 if (czone
->zsz_id
== zid
) {
4277 size
+= sizeof (zs_zone_t
);
4279 csize
+= sizeof (zs_zone_t
);
4281 czone
= (zs_zone_t
*)(cstart
+ csize
);
4283 sys
->zss_ram_kern
+= (sys
->zss_ram_zones
- zone
->zsz_usage_ram
);
4284 sys
->zss_ram_zones
= zone
->zsz_usage_ram
;
4286 sys
->zss_vm_kern
+= (sys
->zss_vm_zones
- zone
->zsz_usage_vm
);
4287 sys
->zss_vm_zones
= zone
->zsz_usage_vm
;
4289 sys
->zss_locked_kern
+= (sys
->zss_locked_zones
-
4290 zone
->zsz_usage_locked
);
4291 sys
->zss_locked_zones
= zone
->zsz_usage_locked
;
4293 TIMESTRUC_DELTA(delta
, sys
->zss_cpu_usage_zones
, zone
->zsz_cpu_usage
);
4294 TIMESTRUC_ADD_TIMESTRUC(sys
->zss_cpu_usage_kern
, delta
);
4295 sys
->zss_cpu_usage_zones
= zone
->zsz_cpu_usage
;
4298 pset
= (zs_pset_t
*)(start
+ size
);
4300 cpset
= (zs_pset_t
*)(cstart
+ csize
);
4301 for (i
= 0; i
< cusage
->zsu_npsets
; i
++) {
4302 csize
+= sizeof (zs_pset_t
);
4304 cpz
= (zs_pset_zone_t
*)(csize
+ cstart
);
4306 for (j
= 0; j
< cpset
->zsp_nusage
; j
++) {
4307 if (cpz
->zspz_zoneid
== zid
)
4310 csize
+= sizeof (zs_pset_zone_t
);
4312 cpz
= (zs_pset_zone_t
*)(csize
+ cstart
);
4314 if (foundpz
!= NULL
) {
4315 size
+= sizeof (zs_pset_t
);
4317 pz
= (zs_pset_zone_t
*)(start
+ size
);
4318 size
+= sizeof (zs_pset_zone_t
);
4323 TIMESTRUC_DELTA(delta
, pset
->zsp_usage_zones
,
4324 pz
->zspz_cpu_usage
);
4325 TIMESTRUC_ADD_TIMESTRUC(pset
->zsp_usage_kern
, delta
);
4326 pset
->zsp_usage_zones
= pz
->zspz_cpu_usage
;
4327 pset
->zsp_nusage
= 1;
4328 usage
->zsu_npsets
++;
4329 sys
->zss_ncpus
+= pset
->zsp_size
;
4330 sys
->zss_ncpus_online
+= pset
->zsp_online
;
4333 cpset
= (zs_pset_t
*)(cstart
+ csize
);
4335 usage
->zsu_size
= size
;
4339 * Respond to new connections from libzonestat.so. Also respond to zoneadmd,
4340 * which reports new zones.
4344 zsd_server(void *cookie
, char *argp
, size_t arg_size
,
4345 door_desc_t
*dp
, uint_t n_desc
)
4350 const priv_set_t
*eset
;
4352 if (argp
== DOOR_UNREF_DATA
) {
4353 (void) door_return(NULL
, 0, NULL
, 0);
4357 if (arg_size
!= sizeof (cmd
) * 2) {
4358 (void) door_return(NULL
, 0, NULL
, 0);
4366 /* If connection, return door to stat server */
4367 if (cmd
== ZSD_CMD_CONNECT
) {
4369 /* Verify client compilation version */
4370 if (args
[1] != ZS_VERSION
) {
4371 args
[1] = ZSD_STATUS_VERSION_MISMATCH
;
4372 (void) door_return(argp
, sizeof (cmd
) * 2, NULL
, 0);
4375 ucred
= alloca(ucred_size());
4376 /* Verify client permission */
4377 if (door_ucred(&ucred
) != 0) {
4378 args
[1] = ZSD_STATUS_INTERNAL_ERROR
;
4379 (void) door_return(argp
, sizeof (cmd
) * 2, NULL
, 0);
4383 eset
= ucred_getprivset(ucred
, PRIV_EFFECTIVE
);
4385 args
[1] = ZSD_STATUS_INTERNAL_ERROR
;
4386 (void) door_return(argp
, sizeof (cmd
) * 2, NULL
, 0);
4389 if (!priv_ismember(eset
, PRIV_PROC_INFO
)) {
4390 args
[1] = ZSD_STATUS_PERMISSION
;
4391 (void) door_return(argp
, sizeof (cmd
) * 2, NULL
, 0);
4395 /* Return stat server door */
4396 args
[1] = ZSD_STATUS_OK
;
4397 door
.d_attributes
= DOOR_DESCRIPTOR
;
4398 door
.d_data
.d_desc
.d_descriptor
= g_stat_door
;
4399 (void) door_return(argp
, sizeof (cmd
) * 2, &door
, 1);
4403 /* Respond to zoneadmd informing zonestatd of a new zone */
4404 if (cmd
== ZSD_CMD_NEW_ZONE
) {
4405 zsd_fattach_zone(args
[1], g_server_door
, B_FALSE
);
4406 (void) door_return(NULL
, 0, NULL
, 0);
4410 args
[1] = ZSD_STATUS_INTERNAL_ERROR
;
4411 (void) door_return(argp
, sizeof (cmd
) * 2, NULL
, 0);
4416 * Respond to libzonestat.so clients with the current utlilzation data.
4420 zsd_stat_server(void *cookie
, char *argp
, size_t arg_size
,
4421 door_desc_t
*dp
, uint_t n_desc
)
4423 uint64_t *args
, cmd
;
4424 zs_usage_cache_t
*cache
;
4431 const priv_set_t
*eset
;
4432 boolean_t is_gz
= B_FALSE
;
4434 /* Tell stat thread there are no more clients */
4435 if (argp
== DOOR_UNREF_DATA
) {
4436 (void) mutex_lock(&g_usage_cache_lock
);
4437 g_hasclient
= B_FALSE
;
4438 (void) cond_signal(&g_usage_cache_kick
);
4439 (void) mutex_unlock(&g_usage_cache_lock
);
4440 (void) door_return(NULL
, 0, NULL
, 0);
4443 if (arg_size
!= sizeof (cmd
) * 2) {
4444 (void) door_return(NULL
, 0, NULL
, 0);
4448 args
= (uint64_t *)argp
;
4450 if (cmd
!= ZSD_CMD_READ
) {
4451 (void) door_return(NULL
, 0, NULL
, 0);
4454 ucred
= alloca(ucred_size());
4455 if (door_ucred(&ucred
) != 0) {
4456 (void) door_return(NULL
, 0, NULL
, 0);
4459 zoneid
= ucred_getzoneid(ucred
);
4461 if (zoneid
== GLOBAL_ZONEID
)
4464 eset
= ucred_getprivset(ucred
, PRIV_EFFECTIVE
);
4466 (void) door_return(NULL
, 0, NULL
, 0);
4469 if (!priv_ismember(eset
, PRIV_PROC_INFO
)) {
4470 (void) door_return(NULL
, 0, NULL
, 0);
4473 (void) mutex_lock(&g_usage_cache_lock
);
4474 g_hasclient
= B_TRUE
;
4477 * Force a new cpu calculation for client. This will force a
4478 * new memory calculation if the memory data is older than the
4481 g_usage_cache_kickers
++;
4482 (void) cond_signal(&g_usage_cache_kick
);
4483 ret
= cond_wait(&g_usage_cache_wait
, &g_usage_cache_lock
);
4484 g_usage_cache_kickers
--;
4485 if (ret
!= 0 && errno
== EINTR
) {
4486 (void) mutex_unlock(&g_usage_cache_lock
);
4488 "Interrupted before writing usage size to client\n"));
4489 (void) door_return(NULL
, 0, NULL
, 0);
4492 cache
= zsd_usage_cache_hold_locked();
4493 if (cache
== NULL
) {
4494 zsd_warn(gettext("Usage cache empty.\n"));
4495 (void) door_return(NULL
, 0, NULL
, 0);
4498 (void) mutex_unlock(&g_usage_cache_lock
);
4500 /* Copy current usage data to stack to send to client */
4501 usage
= (zs_usage_t
*)alloca(cache
->zsuc_size
);
4503 /* Filter out results if caller is non-global zone */
4504 zsd_usage_filter(zoneid
, cache
, usage
, is_gz
);
4506 rvalp
= (void *)usage
;
4507 rvals
= usage
->zsu_size
;
4508 zsd_usage_cache_rele(cache
);
4510 (void) door_return(rvalp
, rvals
, 0, NULL
);
4514 static volatile boolean_t g_quit
;
4518 zonestat_quithandler(int sig
)
4524 * The stat thread generates new utilization data when clients request
4525 * it. It also manages opening and closing the subsystems used to gather
4526 * data depending on if clients exist.
4530 stat_thread(void *arg
)
4535 boolean_t do_memory
;
4541 if (g_quit
== B_TRUE
)
4543 zsd_warn(gettext("Unable to fetch current time"));
4548 next_memory
= start
;
4549 while (g_quit
== B_FALSE
) {
4552 * These are used to decide if the most recent memory
4553 * calculation was within a sample interval,
4554 * and weather or not the usage collection needs to
4555 * be opened or closed.
4557 do_memory
= B_FALSE
;
4562 * If all clients have gone, close usage collecting
4564 (void) mutex_lock(&g_usage_cache_lock
);
4565 if (!g_hasclient
&& g_open
== B_TRUE
) {
4567 (void) mutex_unlock(&g_usage_cache_lock
);
4570 if (g_quit
== B_TRUE
) {
4571 (void) mutex_unlock(
4572 &g_usage_cache_lock
);
4576 * Wait for a usage data request
4578 if (g_usage_cache_kickers
== 0) {
4579 (void) cond_wait(&g_usage_cache_kick
,
4580 &g_usage_cache_lock
);
4584 if (g_quit
== B_TRUE
) {
4585 (void) mutex_unlock(
4586 &g_usage_cache_lock
);
4590 (void) mutex_unlock(&g_usage_cache_lock
);
4592 "Unable to fetch current time"));
4597 if (now
>= next_memory
) {
4599 next_memory
= now
+ g_interval
;
4604 (void) mutex_unlock(&g_usage_cache_lock
);
4605 if (do_read
|| do_close
)
4609 g_hrnow
= gethrtime();
4610 if (g_hasclient
&& g_open
== B_FALSE
) {
4612 g_hrstart
= g_hrnow
;
4613 g_ctl
= zsd_open(g_ctl
);
4616 "Unable to open zone statistics"));
4620 if (do_read
&& g_ctl
) {
4621 if (zsd_read(g_ctl
, B_FALSE
, do_memory
) != 0) {
4623 "Unable to read zone statistics"));
4628 (void) mutex_lock(&g_usage_cache_lock
);
4629 if (!g_hasclient
&& g_open
== B_TRUE
&& g_ctl
) {
4630 (void) mutex_unlock(&g_usage_cache_lock
);
4634 (void) mutex_unlock(&g_usage_cache_lock
);
4641 (void) thr_kill(g_main
, SIGINT
);
4652 (void) strlcpy(pcinfo
.pc_clname
, "FX", sizeof (pcinfo
.pc_clname
));
4653 if (priocntl(0, 0, PC_GETCID
, (caddr_t
)&pcinfo
) == -1) {
4654 zsd_warn(gettext("cannot get FX class parameters"));
4657 pcparms
.pc_cid
= pcinfo
.pc_cid
;
4658 ((fxparms_t
*)pcparms
.pc_clparms
)->fx_upri
= 60;
4659 ((fxparms_t
*)pcparms
.pc_clparms
)->fx_uprilim
= 60;
4660 ((fxparms_t
*)pcparms
.pc_clparms
)->fx_tqsecs
= 0;
4661 ((fxparms_t
*)pcparms
.pc_clparms
)->fx_tqnsecs
= FX_NOCHANGE
;
4662 if (priocntl(P_PID
, getpid(), PC_SETPARMS
, (caddr_t
)&pcparms
) == -1)
4663 zsd_warn(gettext("cannot enter the FX class"));
4669 daemonize_ready(char status
)
4672 * wake the parent with a clue
4674 (void) write(pipe_fd
, &status
, 1);
4675 (void) close(pipe_fd
);
4679 daemonize_start(void)
4690 if (pipe(filedes
) < 0)
4693 (void) fflush(NULL
);
4695 if ((pid
= fork1()) < 0)
4702 struct sigaction act
;
4704 act
.sa_sigaction
= SIG_DFL
;
4705 (void) sigemptyset(&act
.sa_mask
);
4708 (void) sigaction(SIGPIPE
, &act
, NULL
); /* ignore SIGPIPE */
4710 (void) close(filedes
[1]);
4711 if (read(filedes
[0], &data
, 1) == 1) {
4712 /* forward ready code via exit status */
4716 (void) wait4(pid
, &status
, 0, NULL
);
4717 /* daemon process exited before becoming ready */
4718 if (WIFEXITED(status
)) {
4719 /* assume daemon process printed useful message */
4720 exit(WEXITSTATUS(status
));
4722 zsd_warn(gettext("daemon process killed or died"));
4730 pipe_fd
= filedes
[1];
4731 (void) close(filedes
[0]);
4734 * generic Unix setup
4743 fattach_all_zones(boolean_t detach_only
)
4746 uint_t nzids
, nzids_last
;
4750 (void) zone_list(NULL
, &nzids
);
4752 zids
= (zoneid_t
*)malloc(sizeof (zoneid_t
) * nzids_last
);
4754 zsd_error(gettext("Out of memory"));
4756 (void) zone_list(zids
, &nzids
);
4757 if (nzids
> nzids_last
) {
4761 for (i
= 0; i
< nzids
; i
++)
4762 zsd_fattach_zone(zids
[i
], g_server_door
, detach_only
);
4768 main(int argc
, char *argv
[])
4773 scf_simple_prop_t
*prop
;
4774 uint64_t *intervalp
;
4775 boolean_t opt_cleanup
= B_FALSE
;
4777 g_main
= thr_self();
4779 (void) signal(SIGINT
, zonestat_quithandler
);
4780 (void) signal(SIGTERM
, zonestat_quithandler
);
4781 (void) signal(SIGHUP
, zonestat_quithandler
);
4782 /* (void) sigignore(SIGCHLD); */
4783 (void) sigignore(SIGPIPE
);
4785 if (getzoneid() != GLOBAL_ZONEID
)
4786 zsd_error(gettext("Must be run from global zone only"));
4788 while ((arg
= getopt(argc
, argv
, "c"))
4792 opt_cleanup
= B_TRUE
;
4795 zsd_error(gettext("Invalid option"));
4800 if (zsd_disable_cpu_stats() != 0)
4806 /* Get the configured sample interval */
4807 prop
= scf_simple_prop_get(NULL
, "svc:/system/zones-monitoring:default",
4808 "config", "sample_interval");
4810 zsd_error(gettext("Unable to fetch SMF property "
4811 "\"config/sample_interval\""));
4813 if (scf_simple_prop_type(prop
) != SCF_TYPE_COUNT
)
4814 zsd_error(gettext("Malformed SMF property "
4815 "\"config/sample_interval\". Must be of type \"count\""));
4817 intervalp
= scf_simple_prop_next_count(prop
);
4818 g_interval
= *intervalp
;
4819 if (g_interval
== 0)
4820 zsd_error(gettext("Malformed SMF property "
4821 "\"config/sample_interval\". Must be greater than zero"));
4823 scf_simple_prop_free(prop
);
4825 if (daemonize_start() < 0)
4826 zsd_error(gettext("Unable to start daemon\n"));
4828 /* Run at high priority */
4831 (void) mutex_init(&g_usage_cache_lock
, USYNC_THREAD
, NULL
);
4832 (void) cond_init(&g_usage_cache_kick
, USYNC_THREAD
, NULL
);
4833 (void) cond_init(&g_usage_cache_wait
, USYNC_THREAD
, NULL
);
4835 g_server_door
= door_create(zsd_server
, NULL
,
4836 DOOR_REFUSE_DESC
| DOOR_NO_CANCEL
);
4837 if (g_server_door
< 0)
4838 zsd_error(gettext("Unable to create server door\n"));
4841 g_stat_door
= door_create(zsd_stat_server
, NULL
, DOOR_UNREF_MULTI
|
4842 DOOR_REFUSE_DESC
| DOOR_NO_CANCEL
);
4843 if (g_stat_door
< 0)
4844 zsd_error(gettext("Unable to create statistics door\n"));
4846 fattach_all_zones(B_FALSE
);
4848 if (thr_create(NULL
, 0, stat_thread
, NULL
, 0, &tid
) != 0)
4849 zsd_error(gettext("Unable to create statistics thread\n"));
4853 /* Wait for signal to quit */
4854 while (g_quit
== B_FALSE
)
4858 fattach_all_zones(B_TRUE
);
4860 (void) door_revoke(g_server_door
);
4861 (void) door_revoke(g_stat_door
);
4863 /* kick stat thread and wait for it to close the statistics */
4864 (void) mutex_lock(&g_usage_cache_lock
);
4866 (void) cond_signal(&g_usage_cache_kick
);
4867 (void) mutex_unlock(&g_usage_cache_lock
);
4869 (void) thr_join(tid
, NULL
, NULL
);