4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
24 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
36 #include <libcontract.h>
40 #include <zonestat_impl.h>
54 #include <sys/acctctl.h>
55 #include <sys/contract/process.h>
58 #include <sys/param.h>
59 #include <sys/priocntl.h>
60 #include <sys/fxpriocntl.h>
61 #include <sys/processor.h>
63 #include <sys/socket.h>
65 #include <sys/statvfs.h>
67 #include <sys/systeminfo.h>
71 #include <sys/types.h>
72 #include <sys/vm_usage.h>
82 #define MAX_PSET_NAME 1024 /* Taken from PV_NAME_MAX_LEN */
83 #define ZSD_PSET_UNLIMITED UINT16_MAX
84 #define ZONESTAT_EXACCT_FILE "/var/adm/exacct/zonestat-process"
87 * zonestatd implements gathering cpu and memory utilization data for
88 * running zones. It has these components:
91 * Door server to respond to client connections. Each client
92 * will connect using libzonestat.so, which will open and
93 * call /var/tmp/.zonestat_door. Each connecting client is given
94 * a file descriptor to the stat server.
96 * The zsd_server also responds to zoneadmd, which reports when a
97 * new zone is booted. This is used to fattach the zsd_server door
101 * Receives client requests for the current utilization data. Each
102 * client request will cause zonestatd to update the current utilization
103 * data by kicking the stat_thread.
105 * If the client is in a non-global zone, the utilization data will
106 * be filtered to only show the given zone. The usage by all other zones
107 * will be added to the system utilization.
110 * The stat thread implements querying the system to determine the
111 * current utilization data for each running zone. This includes
112 * inspecting the system's processor set configuration, as well as details
113 * of each zone, such as their configured limits, and which processor
114 * sets they are running in.
116 * The stat_thread will only update memory utilization data as often as
117 * the configured config/sample_interval on the zones-monitoring service.
121 * The private vmusage structure unfortunately uses size_t types, and assumes
122 * the caller's bitness matches the kernel's bitness. Since the getvmusage()
123 * system call is contracted, and zonestatd is 32 bit, the following structures
124 * are used to interact with a 32bit or 64 bit kernel.
126 typedef struct zsd_vmusage32
{
131 uint32_t vmu_rss_all
;
132 uint32_t vmu_rss_private
;
133 uint32_t vmu_rss_shared
;
134 uint32_t vmu_swap_all
;
135 uint32_t vmu_swap_private
;
136 uint32_t vmu_swap_shared
;
139 typedef struct zsd_vmusage64
{
144 * An amd64 kernel will align the following uint64_t members, but a
145 * 32bit i386 process will not without help.
147 int vmu_align_next_members_on_8_bytes
;
148 uint64_t vmu_rss_all
;
149 uint64_t vmu_rss_private
;
150 uint64_t vmu_rss_shared
;
151 uint64_t vmu_swap_all
;
152 uint64_t vmu_swap_private
;
153 uint64_t vmu_swap_shared
;
158 /* Used to store a zone's usage of a pset */
159 typedef struct zsd_pset_usage
{
160 struct zsd_zone
*zsu_zone
;
161 struct zsd_pset
*zsu_pset
;
163 list_node_t zsu_next
;
166 boolean_t zsu_found
; /* zone bound at end of interval */
167 boolean_t zsu_active
; /* zone was bound during interval */
168 boolean_t zsu_new
; /* zone newly bound in this interval */
169 boolean_t zsu_deleted
; /* zone was unbound in this interval */
170 boolean_t zsu_empty
; /* no procs in pset in this interval */
171 time_t zsu_start
; /* time when zone was found in pset */
172 hrtime_t zsu_hrstart
; /* time when zone was found in pset */
173 uint64_t zsu_cpu_shares
;
174 uint_t zsu_scheds
; /* schedulers found in this pass */
175 timestruc_t zsu_cpu_usage
; /* cpu time used */
178 /* Used to store a pset's utilization */
179 typedef struct zsd_pset
{
181 list_node_t zsp_next
;
182 char zsp_name
[ZS_PSETNAME_MAX
];
184 uint_t zsp_cputype
; /* default, dedicated or shared */
185 boolean_t zsp_found
; /* pset found at end of interval */
186 boolean_t zsp_new
; /* pset new in this interval */
187 boolean_t zsp_deleted
; /* pset deleted in this interval */
188 boolean_t zsp_active
; /* pset existed during interval */
189 boolean_t zsp_empty
; /* no processes in pset */
191 hrtime_t zsp_hrstart
;
193 uint64_t zsp_online
; /* online cpus in interval */
194 uint64_t zsp_size
; /* size in this interval */
195 uint64_t zsp_min
; /* configured min in this interval */
196 uint64_t zsp_max
; /* configured max in this interval */
197 int64_t zsp_importance
; /* configured max in this interval */
199 uint_t zsp_scheds
; /* scheds of processes found in pset */
200 uint64_t zsp_cpu_shares
; /* total shares in this interval */
202 timestruc_t zsp_total_time
;
203 timestruc_t zsp_usage_kern
;
204 timestruc_t zsp_usage_zones
;
206 /* Individual zone usages of pset */
207 list_t zsp_usage_list
;
210 /* Summed kstat values from individual cpus in pset */
211 timestruc_t zsp_idle
;
212 timestruc_t zsp_intr
;
213 timestruc_t zsp_kern
;
214 timestruc_t zsp_user
;
218 /* Used to track an individual cpu's utilization as reported by kstats */
219 typedef struct zsd_cpu
{
220 processorid_t zsc_id
;
221 list_node_t zsc_next
;
223 psetid_t zsc_psetid_prev
;
224 zsd_pset_t
*zsc_pset
;
226 boolean_t zsc_found
; /* cpu online in this interval */
227 boolean_t zsc_onlined
; /* cpu onlined during this interval */
228 boolean_t zsc_offlined
; /* cpu offlined during this interval */
229 boolean_t zsc_active
; /* cpu online during this interval */
230 boolean_t zsc_allocated
; /* True if cpu has ever been found */
232 /* kstats this interval */
233 uint64_t zsc_nsec_idle
;
234 uint64_t zsc_nsec_intr
;
235 uint64_t zsc_nsec_kern
;
236 uint64_t zsc_nsec_user
;
238 /* kstats in most recent interval */
239 uint64_t zsc_nsec_idle_prev
;
240 uint64_t zsc_nsec_intr_prev
;
241 uint64_t zsc_nsec_kern_prev
;
242 uint64_t zsc_nsec_user_prev
;
244 /* Total kstat increases since zonestatd started reading kstats */
245 timestruc_t zsc_idle
;
246 timestruc_t zsc_intr
;
247 timestruc_t zsc_kern
;
248 timestruc_t zsc_user
;
252 /* Used to describe an individual zone and its utilization */
253 typedef struct zsd_zone
{
255 list_node_t zsz_next
;
256 char zsz_name
[ZS_ZONENAME_MAX
];
260 hrtime_t zsz_hrstart
;
262 char zsz_pool
[ZS_POOLNAME_MAX
];
263 char zsz_pset
[ZS_PSETNAME_MAX
];
264 int zsz_default_sched
;
265 /* These are deduced by inspecting processes */
269 boolean_t zsz_new
; /* zone booted during this interval */
270 boolean_t zsz_deleted
; /* halted during this interval */
271 boolean_t zsz_active
; /* running in this interval */
272 boolean_t zsz_empty
; /* no processes in this interval */
273 boolean_t zsz_gone
; /* not installed in this interval */
274 boolean_t zsz_found
; /* Running at end of this interval */
276 uint64_t zsz_cpu_shares
;
277 uint64_t zsz_cpu_cap
;
278 uint64_t zsz_ram_cap
;
279 uint64_t zsz_locked_cap
;
282 uint64_t zsz_cpus_online
;
283 timestruc_t zsz_cpu_usage
; /* cpu time of cpu cap */
284 timestruc_t zsz_cap_time
; /* cpu time of cpu cap */
285 timestruc_t zsz_share_time
; /* cpu time of share of cpu */
286 timestruc_t zsz_pset_time
; /* time of all psets zone is bound to */
288 uint64_t zsz_usage_ram
;
289 uint64_t zsz_usage_locked
;
290 uint64_t zsz_usage_vm
;
292 uint64_t zsz_processes_cap
;
293 uint64_t zsz_lwps_cap
;
294 uint64_t zsz_shm_cap
;
295 uint64_t zsz_shmids_cap
;
296 uint64_t zsz_semids_cap
;
297 uint64_t zsz_msgids_cap
;
298 uint64_t zsz_lofi_cap
;
300 uint64_t zsz_processes
;
311 * Used to track the cpu usage of an individual processes.
313 * zonestatd sweeps /proc each interval and charges the cpu usage of processes.
314 * to their zone. As processes exit, their extended accounting records are
315 * read and the difference of their total and known usage is charged to their
318 * If a process is never seen in /proc, the total usage on its extended
319 * accounting record will be charged to its zone.
321 typedef struct zsd_proc
{
322 list_node_t zspr_next
;
324 psetid_t zspr_psetid
;
325 zoneid_t zspr_zoneid
;
327 timestruc_t zspr_usage
;
330 /* Used to track the overall resource usage of the system */
331 typedef struct zsd_system
{
333 uint64_t zss_ram_total
;
334 uint64_t zss_ram_kern
;
335 uint64_t zss_ram_zones
;
337 uint64_t zss_locked_kern
;
338 uint64_t zss_locked_zones
;
340 uint64_t zss_vm_total
;
341 uint64_t zss_vm_kern
;
342 uint64_t zss_vm_zones
;
344 uint64_t zss_swap_total
;
345 uint64_t zss_swap_used
;
347 timestruc_t zss_idle
;
348 timestruc_t zss_intr
;
349 timestruc_t zss_kern
;
350 timestruc_t zss_user
;
352 timestruc_t zss_cpu_total_time
;
353 timestruc_t zss_cpu_usage_kern
;
354 timestruc_t zss_cpu_usage_zones
;
357 uint64_t zss_processes_max
;
358 uint64_t zss_lwps_max
;
359 uint64_t zss_shm_max
;
360 uint64_t zss_shmids_max
;
361 uint64_t zss_semids_max
;
362 uint64_t zss_msgids_max
;
363 uint64_t zss_lofi_max
;
365 uint64_t zss_processes
;
374 uint64_t zss_ncpus_online
;
379 * A dumping ground for various information and structures used to compute
382 * This structure is used to track the system while clients are connected.
383 * When The first client connects, a zsd_ctl is allocated and configured by
384 * zsd_open(). When all clients disconnect, the zsd_ctl is closed.
386 typedef struct zsd_ctl
{
387 kstat_ctl_t
*zsctl_kstat_ctl
;
389 /* To track extended accounting */
390 int zsctl_proc_fd
; /* Log currently being used */
391 ea_file_t zsctl_proc_eaf
;
392 struct stat zsctl_proc_stat
;
394 int zsctl_proc_fd_next
; /* Log file to use next */
395 ea_file_t zsctl_proc_eaf_next
;
396 struct stat zsctl_proc_stat_next
;
397 int zsctl_proc_open_next
;
399 /* pool configuration handle */
400 pool_conf_t
*zsctl_pool_conf
;
401 int zsctl_pool_status
;
402 int zsctl_pool_changed
;
404 /* The above usage tacking structures */
405 zsd_system_t
*zsctl_system
;
409 zsd_cpu_t
*zsctl_cpu_array
;
410 zsd_proc_t
*zsctl_proc_array
;
412 /* Various system info */
413 uint64_t zsctl_maxcpuid
;
414 uint64_t zsctl_maxproc
;
415 uint64_t zsctl_kern_bits
;
416 uint64_t zsctl_pagesize
;
418 /* Used to track time available under a cpu cap. */
419 uint64_t zsctl_hrtime
;
420 uint64_t zsctl_hrtime_prev
;
421 timestruc_t zsctl_hrtime_total
;
423 struct timeval zsctl_timeofday
;
425 /* Caches for arrays allocated for use by various system calls */
426 psetid_t
*zsctl_pset_cache
;
427 uint_t zsctl_pset_ncache
;
428 processorid_t
*zsctl_cpu_cache
;
429 uint_t zsctl_cpu_ncache
;
430 zoneid_t
*zsctl_zone_cache
;
431 uint_t zsctl_zone_ncache
;
432 struct swaptable
*zsctl_swap_cache
;
433 uint64_t zsctl_swap_cache_size
;
434 uint64_t zsctl_swap_cache_num
;
435 zsd_vmusage64_t
*zsctl_vmusage_cache
;
436 uint64_t zsctl_vmusage_cache_num
;
438 /* Info about procfs for scanning /proc */
439 struct dirent
*zsctl_procfs_dent
;
440 long zsctl_procfs_dent_size
;
441 pool_value_t
*zsctl_pool_vals
[3];
443 /* Counts on tracked entities */
446 uint_t zsctl_npset_usages
;
450 boolean_t g_open
; /* True if g_ctl is open */
451 int g_hasclient
; /* True if any clients are connected */
454 * The usage cache is updated by the stat_thread, and copied to clients by
455 * the zsd_stat_server. Mutex and cond are to synchronize between the
456 * stat_thread and the stat_server.
458 zs_usage_cache_t
*g_usage_cache
;
459 mutex_t g_usage_cache_lock
;
460 cond_t g_usage_cache_kick
;
461 uint_t g_usage_cache_kickers
;
462 cond_t g_usage_cache_wait
;
463 char *g_usage_cache_buf
;
464 uint_t g_usage_cache_bufsz
;
467 /* fds of door servers */
472 * Starting and current time. Used to throttle memory calculation, and to
473 * mark new zones and psets with their boot and creation time.
488 zsd_warn(const char *fmt
, ...)
492 va_start(alist
, fmt
);
494 (void) fprintf(stderr
, gettext("zonestat: Warning: "));
495 (void) vfprintf(stderr
, fmt
, alist
);
496 (void) fprintf(stderr
, "\n");
502 zsd_error(const char *fmt
, ...)
506 va_start(alist
, fmt
);
508 (void) fprintf(stderr
, gettext("zonestat: Error: "));
509 (void) vfprintf(stderr
, fmt
, alist
);
510 (void) fprintf(stderr
, "\n");
515 /* Turns on extended accounting if not configured externally */
517 zsd_enable_cpu_stats()
519 char *path
= ZONESTAT_EXACCT_FILE
;
520 char oldfile
[MAXPATHLEN
];
521 int ret
, state
= AC_ON
;
525 * Start a new accounting file if accounting not configured
529 res
[0].ar_id
= AC_PROC_PID
;
530 res
[0].ar_state
= AC_ON
;
531 res
[1].ar_id
= AC_PROC_ANCPID
;
532 res
[1].ar_state
= AC_ON
;
533 res
[2].ar_id
= AC_PROC_CPU
;
534 res
[2].ar_state
= AC_ON
;
535 res
[3].ar_id
= AC_PROC_TIME
;
536 res
[3].ar_state
= AC_ON
;
537 res
[4].ar_id
= AC_PROC_ZONENAME
;
538 res
[4].ar_state
= AC_ON
;
539 res
[5].ar_id
= AC_NONE
;
540 res
[5].ar_state
= AC_ON
;
541 if (acctctl(AC_PROC
| AC_RES_SET
, res
, sizeof (res
)) != 0) {
542 zsd_warn(gettext("Unable to set accounting resources"));
545 /* Only set accounting file if none is configured */
546 ret
= acctctl(AC_PROC
| AC_FILE_GET
, oldfile
, sizeof (oldfile
));
550 if (acctctl(AC_PROC
| AC_FILE_SET
, path
, strlen(path
) + 1)
552 zsd_warn(gettext("Unable to set accounting file"));
556 if (acctctl(AC_PROC
| AC_STATE_SET
, &state
, sizeof (state
)) == -1) {
557 zsd_warn(gettext("Unable to enable accounting"));
563 /* Turns off extended accounting if not configured externally */
565 zsd_disable_cpu_stats()
567 char *path
= ZONESTAT_EXACCT_FILE
;
568 int ret
, state
= AC_OFF
;
570 char oldfile
[MAXPATHLEN
];
572 /* If accounting file is externally configured, leave it alone */
573 ret
= acctctl(AC_PROC
| AC_FILE_GET
, oldfile
, sizeof (oldfile
));
574 if (ret
== 0 && strcmp(oldfile
, path
) != 0)
577 res
[0].ar_id
= AC_PROC_PID
;
578 res
[0].ar_state
= AC_OFF
;
579 res
[1].ar_id
= AC_PROC_ANCPID
;
580 res
[1].ar_state
= AC_OFF
;
581 res
[2].ar_id
= AC_PROC_CPU
;
582 res
[2].ar_state
= AC_OFF
;
583 res
[3].ar_id
= AC_PROC_TIME
;
584 res
[3].ar_state
= AC_OFF
;
585 res
[4].ar_id
= AC_PROC_ZONENAME
;
586 res
[4].ar_state
= AC_OFF
;
587 res
[5].ar_id
= AC_NONE
;
588 res
[5].ar_state
= AC_OFF
;
589 if (acctctl(AC_PROC
| AC_RES_SET
, res
, sizeof (res
)) != 0) {
590 zsd_warn(gettext("Unable to clear accounting resources"));
593 if (acctctl(AC_PROC
| AC_FILE_SET
, NULL
, 0) == -1) {
594 zsd_warn(gettext("Unable to clear accounting file"));
597 if (acctctl(AC_PROC
| AC_STATE_SET
, &state
, sizeof (state
)) == -1) {
598 zsd_warn(gettext("Unable to diable accounting"));
607 * If not configured externally, deletes the current extended accounting file
608 * and starts a new one.
610 * Since the stat_thread holds an open handle to the accounting file, it will
611 * read all remaining entries from the old file before switching to
615 zsd_roll_exacct(void)
618 char *path
= ZONESTAT_EXACCT_FILE
;
619 char oldfile
[MAXPATHLEN
];
621 /* If accounting file is externally configured, leave it alone */
622 ret
= acctctl(AC_PROC
| AC_FILE_GET
, oldfile
, sizeof (oldfile
));
623 if (ret
== 0 && strcmp(oldfile
, path
) != 0)
626 if (unlink(path
) != 0)
627 /* Roll it next time */
630 if (acctctl(AC_PROC
| AC_FILE_SET
, path
, strlen(path
) + 1) == -1) {
631 zsd_warn(gettext("Unable to set accounting file"));
637 /* Contract stuff for zone_enter() */
644 fd
= open(CTFS_ROOT
"/process/template", O_RDWR
);
649 * For now, zoneadmd doesn't do anything with the contract.
650 * Deliver no events, don't inherit, and allow it to be orphaned.
652 err
|= ct_tmpl_set_critical(fd
, 0);
653 err
|= ct_tmpl_set_informative(fd
, 0);
654 err
|= ct_pr_tmpl_set_fatal(fd
, CT_PR_EV_HWERR
);
655 err
|= ct_pr_tmpl_set_param(fd
, CT_PR_PGRPONLY
| CT_PR_REGENT
);
656 if (err
|| ct_tmpl_activate(fd
)) {
665 * Contract stuff for zone_enter()
668 contract_latest(ctid_t
*id
)
674 if ((cfd
= open(CTFS_ROOT
"/process/latest", O_RDONLY
)) == -1)
677 if ((r
= ct_status_read(cfd
, CTD_COMMON
, &st
)) != 0) {
682 result
= ct_status_get_id(st
);
691 close_on_exec(int fd
)
693 int flags
= fcntl(fd
, F_GETFD
, 0);
694 if ((flags
!= -1) && (fcntl(fd
, F_SETFD
, flags
| FD_CLOEXEC
) != -1))
700 contract_open(ctid_t ctid
, const char *type
, const char *file
, int oflag
)
708 n
= snprintf(path
, PATH_MAX
, CTFS_ROOT
"/%s/%ld/%s", type
, ctid
, file
);
709 if (n
>= sizeof (path
)) {
710 errno
= ENAMETOOLONG
;
714 fd
= open(path
, oflag
);
716 if (close_on_exec(fd
) == -1) {
727 contract_abandon_id(ctid_t ctid
)
731 fd
= contract_open(ctid
, "all", "ctl", O_WRONLY
);
735 err
= ct_ctl_abandon(fd
);
741 * Attach the zsd_server to a zone. Called for each zone when zonestatd
742 * starts, and for each newly booted zone when zoneadmd contacts the zsd_server
744 * Zone_enter is used to avoid reaching into zone to fattach door.
747 zsd_fattach_zone(zoneid_t zid
, int door
, boolean_t detach_only
)
749 char *path
= ZS_DOOR_PATH
;
750 int fd
, pid
, stat
, tmpl_fd
;
753 if ((tmpl_fd
= init_template()) == -1) {
754 zsd_warn("Unable to init template");
760 (void) ct_tmpl_clear(tmpl_fd
);
762 "Unable to fork to add zonestat to zoneid %d\n"), zid
);
767 (void) ct_tmpl_clear(tmpl_fd
);
768 (void) close(tmpl_fd
);
769 if (zid
!= 0 && zone_enter(zid
) != 0) {
770 if (errno
== EINVAL
) {
775 (void) fdetach(path
);
779 fd
= open(path
, O_CREAT
|O_RDWR
, 0644);
782 if (fattach(door
, path
) != 0)
786 if (contract_latest(&ct
) == -1)
788 (void) ct_tmpl_clear(tmpl_fd
);
789 (void) close(tmpl_fd
);
790 (void) contract_abandon_id(ct
);
791 while (waitpid(pid
, &stat
, 0) != pid
)
793 if (WIFEXITED(stat
) && WEXITSTATUS(stat
) == 0)
796 zsd_warn(gettext("Unable to attach door to zoneid: %d"), zid
);
798 if (WEXITSTATUS(stat
) == 1)
799 zsd_warn(gettext("Cannot entering zone"));
800 else if (WEXITSTATUS(stat
) == 2)
801 zsd_warn(gettext("Unable to create door file: %s"), path
);
802 else if (WEXITSTATUS(stat
) == 3)
803 zsd_warn(gettext("Unable to fattach file: %s"), path
);
805 zsd_warn(gettext("Internal error entering zone: %d"), zid
);
809 * Zone lookup and allocation functions to manage list of currently running
813 zsd_lookup_zone(zsd_ctl_t
*ctl
, char *zonename
, zoneid_t zoneid
)
817 for (zone
= list_head(&ctl
->zsctl_zones
); zone
!= NULL
;
818 zone
= list_next(&ctl
->zsctl_zones
, zone
)) {
819 if (strcmp(zone
->zsz_name
, zonename
) == 0) {
821 zone
->zsz_id
= zoneid
;
829 zsd_lookup_zone_byid(zsd_ctl_t
*ctl
, zoneid_t zoneid
)
833 for (zone
= list_head(&ctl
->zsctl_zones
); zone
!= NULL
;
834 zone
= list_next(&ctl
->zsctl_zones
, zone
)) {
835 if (zone
->zsz_id
== zoneid
)
842 zsd_allocate_zone(zsd_ctl_t
*ctl
, char *zonename
, zoneid_t zoneid
)
846 if ((zone
= (zsd_zone_t
*)calloc(1, sizeof (zsd_zone_t
))) == NULL
)
849 (void) strlcpy(zone
->zsz_name
, zonename
, sizeof (zone
->zsz_name
));
850 zone
->zsz_id
= zoneid
;
851 zone
->zsz_found
= B_FALSE
;
854 * Allocate as deleted so if not found in first pass, zone is deleted
855 * from list. This can happen if zone is returned by zone_list, but
856 * exits before first attempt to fetch zone details.
858 zone
->zsz_start
= g_now
;
859 zone
->zsz_hrstart
= g_hrnow
;
860 zone
->zsz_deleted
= B_TRUE
;
862 zone
->zsz_cpu_shares
= ZS_LIMIT_NONE
;
863 zone
->zsz_cpu_cap
= ZS_LIMIT_NONE
;
864 zone
->zsz_ram_cap
= ZS_LIMIT_NONE
;
865 zone
->zsz_locked_cap
= ZS_LIMIT_NONE
;
866 zone
->zsz_vm_cap
= ZS_LIMIT_NONE
;
868 zone
->zsz_processes_cap
= ZS_LIMIT_NONE
;
869 zone
->zsz_lwps_cap
= ZS_LIMIT_NONE
;
870 zone
->zsz_shm_cap
= ZS_LIMIT_NONE
;
871 zone
->zsz_shmids_cap
= ZS_LIMIT_NONE
;
872 zone
->zsz_semids_cap
= ZS_LIMIT_NONE
;
873 zone
->zsz_msgids_cap
= ZS_LIMIT_NONE
;
874 zone
->zsz_lofi_cap
= ZS_LIMIT_NONE
;
882 zsd_lookup_insert_zone(zsd_ctl_t
*ctl
, char *zonename
, zoneid_t zoneid
)
884 zsd_zone_t
*zone
, *tmp
;
886 if ((zone
= zsd_lookup_zone(ctl
, zonename
, zoneid
)) != NULL
)
889 if ((zone
= zsd_allocate_zone(ctl
, zonename
, zoneid
)) == NULL
)
892 /* Insert sorted by zonename */
893 tmp
= list_head(&ctl
->zsctl_zones
);
894 while (tmp
!= NULL
&& strcmp(zonename
, tmp
->zsz_name
) > 0)
895 tmp
= list_next(&ctl
->zsctl_zones
, tmp
);
897 list_insert_before(&ctl
->zsctl_zones
, tmp
, zone
);
902 * Mark all zones as not existing. As zones are found, they will
903 * be marked as existing. If a zone is not found, then it must have
907 zsd_mark_zones_start(zsd_ctl_t
*ctl
)
912 for (zone
= list_head(&ctl
->zsctl_zones
); zone
!= NULL
;
913 zone
= list_next(&ctl
->zsctl_zones
, zone
)) {
914 zone
->zsz_found
= B_FALSE
;
919 * Mark each zone as not using pset. If processes are found using the
920 * pset, the zone will remain bound to the pset. If none of a zones
921 * processes are bound to the pset, the zone's usage of the pset will
926 zsd_mark_pset_usage_start(zsd_pset_t
*pset
)
928 zsd_pset_usage_t
*usage
;
930 for (usage
= list_head(&pset
->zsp_usage_list
);
932 usage
= list_next(&pset
->zsp_usage_list
, usage
)) {
933 usage
->zsu_found
= B_FALSE
;
934 usage
->zsu_empty
= B_TRUE
;
939 * Mark each pset as not existing. If a pset is found, it will be marked
940 * as existing. If a pset is not found, it wil be deleted.
943 zsd_mark_psets_start(zsd_ctl_t
*ctl
)
947 for (pset
= list_head(&ctl
->zsctl_psets
); pset
!= NULL
;
948 pset
= list_next(&ctl
->zsctl_psets
, pset
)) {
949 pset
->zsp_found
= B_FALSE
;
950 zsd_mark_pset_usage_start(pset
);
955 * A pset was found. Update its information
958 zsd_mark_pset_found(zsd_pset_t
*pset
, uint_t type
, uint64_t online
,
959 uint64_t size
, uint64_t min
, uint64_t max
, int64_t importance
)
961 pset
->zsp_empty
= B_TRUE
;
962 pset
->zsp_deleted
= B_FALSE
;
964 assert(pset
->zsp_found
== B_FALSE
);
966 /* update pset flags */
967 if (pset
->zsp_active
== B_FALSE
)
968 /* pset not seen on previous interval. It is new. */
969 pset
->zsp_new
= B_TRUE
;
971 pset
->zsp_new
= B_FALSE
;
973 pset
->zsp_found
= B_TRUE
;
974 pset
->zsp_cputype
= type
;
975 pset
->zsp_online
= online
;
976 pset
->zsp_size
= size
;
979 pset
->zsp_importance
= importance
;
980 pset
->zsp_cpu_shares
= 0;
981 pset
->zsp_scheds
= 0;
982 pset
->zsp_active
= B_TRUE
;
986 * A zone's process was found using a pset. Charge the process to the pset and
987 * the per-zone data for the pset.
990 zsd_mark_pset_usage_found(zsd_pset_usage_t
*usage
, uint_t sched
)
992 zsd_zone_t
*zone
= usage
->zsu_zone
;
993 zsd_pset_t
*pset
= usage
->zsu_pset
;
995 /* Nothing to do if already found */
996 if (usage
->zsu_found
== B_TRUE
)
999 usage
->zsu_found
= B_TRUE
;
1000 usage
->zsu_empty
= B_FALSE
;
1002 usage
->zsu_deleted
= B_FALSE
;
1003 /* update usage flags */
1004 if (usage
->zsu_active
== B_FALSE
)
1005 usage
->zsu_new
= B_TRUE
;
1007 usage
->zsu_new
= B_FALSE
;
1009 usage
->zsu_scheds
= 0;
1010 usage
->zsu_cpu_shares
= ZS_LIMIT_NONE
;
1011 usage
->zsu_active
= B_TRUE
;
1012 pset
->zsp_empty
= B_FALSE
;
1013 zone
->zsz_empty
= B_FALSE
;
1016 /* Detect zone's pset id, and if it is bound to multiple psets */
1017 if (zone
->zsz_psetid
== ZS_PSET_ERROR
)
1018 zone
->zsz_psetid
= pset
->zsp_id
;
1019 else if (zone
->zsz_psetid
!= pset
->zsp_id
)
1020 zone
->zsz_psetid
= ZS_PSET_MULTI
;
1022 usage
->zsu_scheds
|= sched
;
1023 pset
->zsp_scheds
|= sched
;
1024 zone
->zsz_scheds
|= sched
;
1026 /* Record if FSS is co-habitating with conflicting scheduler */
1027 if ((pset
->zsp_scheds
& ZS_SCHED_FSS
) &&
1028 usage
->zsu_scheds
& (
1029 ZS_SCHED_TS
| ZS_SCHED_IA
| ZS_SCHED_FX
)) {
1030 usage
->zsu_scheds
|= ZS_SCHED_CONFLICT
;
1032 pset
->zsp_scheds
|= ZS_SCHED_CONFLICT
;
1037 /* Add cpu time for a process to a pset, zone, and system totals */
1039 zsd_add_usage(zsd_ctl_t
*ctl
, zsd_pset_usage_t
*usage
, timestruc_t
*delta
)
1041 zsd_system_t
*system
= ctl
->zsctl_system
;
1042 zsd_zone_t
*zone
= usage
->zsu_zone
;
1043 zsd_pset_t
*pset
= usage
->zsu_pset
;
1045 TIMESTRUC_ADD_TIMESTRUC(usage
->zsu_cpu_usage
, *delta
);
1046 TIMESTRUC_ADD_TIMESTRUC(pset
->zsp_usage_zones
, *delta
);
1047 TIMESTRUC_ADD_TIMESTRUC(zone
->zsz_cpu_usage
, *delta
);
1048 TIMESTRUC_ADD_TIMESTRUC(system
->zss_cpu_usage_zones
, *delta
);
1051 /* Determine which processor sets have been deleted */
1053 zsd_mark_psets_end(zsd_ctl_t
*ctl
)
1055 zsd_pset_t
*pset
, *tmp
;
1058 * Mark pset as not exists, and deleted if it existed
1059 * previous interval.
1061 pset
= list_head(&ctl
->zsctl_psets
);
1062 while (pset
!= NULL
) {
1063 if (pset
->zsp_found
== B_FALSE
) {
1064 pset
->zsp_empty
= B_TRUE
;
1065 if (pset
->zsp_deleted
== B_TRUE
) {
1067 pset
= list_next(&ctl
->zsctl_psets
, pset
);
1068 list_remove(&ctl
->zsctl_psets
, tmp
);
1070 ctl
->zsctl_npsets
--;
1073 /* Pset vanished during this interval */
1074 pset
->zsp_new
= B_FALSE
;
1075 pset
->zsp_deleted
= B_TRUE
;
1076 pset
->zsp_active
= B_TRUE
;
1079 pset
= list_next(&ctl
->zsctl_psets
, pset
);
1083 /* Determine which zones are no longer bound to processor sets */
1085 zsd_mark_pset_usages_end(zsd_ctl_t
*ctl
)
1089 zsd_pset_usage_t
*usage
, *tmp
;
1092 * Mark pset as not exists, and deleted if it existed previous
1095 for (pset
= list_head(&ctl
->zsctl_psets
); pset
!= NULL
;
1096 pset
= list_next(&ctl
->zsctl_psets
, pset
)) {
1097 usage
= list_head(&pset
->zsp_usage_list
);
1098 while (usage
!= NULL
) {
1100 * Mark pset as not exists, and deleted if it existed
1101 * previous interval.
1103 if (usage
->zsu_found
== B_FALSE
||
1104 usage
->zsu_zone
->zsz_deleted
== B_TRUE
||
1105 usage
->zsu_pset
->zsp_deleted
== B_TRUE
) {
1107 usage
= list_next(&pset
->zsp_usage_list
,
1109 list_remove(&pset
->zsp_usage_list
, tmp
);
1112 ctl
->zsctl_npset_usages
--;
1115 usage
->zsu_new
= B_FALSE
;
1116 usage
->zsu_deleted
= B_TRUE
;
1117 usage
->zsu_active
= B_TRUE
;
1119 /* Add cpu shares for usages that are in FSS */
1120 zone
= usage
->zsu_zone
;
1121 if (usage
->zsu_scheds
& ZS_SCHED_FSS
&&
1122 zone
->zsz_cpu_shares
!= ZS_SHARES_UNLIMITED
&&
1123 zone
->zsz_cpu_shares
!= 0) {
1124 zone
= usage
->zsu_zone
;
1125 usage
->zsu_cpu_shares
= zone
->zsz_cpu_shares
;
1126 pset
->zsp_cpu_shares
+= zone
->zsz_cpu_shares
;
1128 usage
= list_next(&pset
->zsp_usage_list
,
1134 /* A zone has been found. Update its information */
1136 zsd_mark_zone_found(zsd_ctl_t
*ctl
, zsd_zone_t
*zone
, uint64_t cpu_shares
,
1137 uint64_t cpu_cap
, uint64_t ram_cap
, uint64_t locked_cap
,
1138 uint64_t vm_cap
, uint64_t processes_cap
, uint64_t processes
,
1139 uint64_t lwps_cap
, uint64_t lwps
, uint64_t shm_cap
, uint64_t shm
,
1140 uint64_t shmids_cap
, uint64_t shmids
, uint64_t semids_cap
,
1141 uint64_t semids
, uint64_t msgids_cap
, uint64_t msgids
, uint64_t lofi_cap
,
1142 uint64_t lofi
, char *poolname
, char *psetname
, uint_t sched
, uint_t cputype
,
1145 zsd_system_t
*sys
= ctl
->zsctl_system
;
1147 assert(zone
->zsz_found
== B_FALSE
);
1150 * Mark zone as exists, and new if it did not exist in previous
1153 zone
->zsz_found
= B_TRUE
;
1154 zone
->zsz_empty
= B_TRUE
;
1155 zone
->zsz_deleted
= B_FALSE
;
1158 * Zone is new. Assume zone's properties are the same over entire
1161 if (zone
->zsz_active
== B_FALSE
)
1162 zone
->zsz_new
= B_TRUE
;
1164 zone
->zsz_new
= B_FALSE
;
1166 (void) strlcpy(zone
->zsz_pool
, poolname
, sizeof (zone
->zsz_pool
));
1167 (void) strlcpy(zone
->zsz_pset
, psetname
, sizeof (zone
->zsz_pset
));
1168 zone
->zsz_default_sched
= sched
;
1170 /* Schedulers updated later as processes are found */
1171 zone
->zsz_scheds
= 0;
1173 /* Cpus updated later as psets bound are identified */
1174 zone
->zsz_cpus_online
= 0;
1176 zone
->zsz_cputype
= cputype
;
1177 zone
->zsz_iptype
= iptype
;
1178 zone
->zsz_psetid
= ZS_PSET_ERROR
;
1179 zone
->zsz_cpu_cap
= cpu_cap
;
1180 zone
->zsz_cpu_shares
= cpu_shares
;
1181 zone
->zsz_ram_cap
= ram_cap
;
1182 zone
->zsz_locked_cap
= locked_cap
;
1183 zone
->zsz_vm_cap
= vm_cap
;
1184 zone
->zsz_processes_cap
= processes_cap
;
1185 zone
->zsz_processes
= processes
;
1186 zone
->zsz_lwps_cap
= lwps_cap
;
1187 zone
->zsz_lwps
= lwps
;
1188 zone
->zsz_shm_cap
= shm_cap
;
1189 zone
->zsz_shm
= shm
;
1190 zone
->zsz_shmids_cap
= shmids_cap
;
1191 zone
->zsz_shmids
= shmids
;
1192 zone
->zsz_semids_cap
= semids_cap
;
1193 zone
->zsz_semids
= semids
;
1194 zone
->zsz_msgids_cap
= msgids_cap
;
1195 zone
->zsz_msgids
= msgids
;
1196 zone
->zsz_lofi_cap
= lofi_cap
;
1197 zone
->zsz_lofi
= lofi
;
1199 sys
->zss_processes
+= processes
;
1200 sys
->zss_lwps
+= lwps
;
1201 sys
->zss_shm
+= shm
;
1202 sys
->zss_shmids
+= shmids
;
1203 sys
->zss_semids
+= semids
;
1204 sys
->zss_msgids
+= msgids
;
1205 sys
->zss_lofi
+= lofi
;
1206 zone
->zsz_active
= B_TRUE
;
1210 /* Determine which zones have halted */
1212 zsd_mark_zones_end(zsd_ctl_t
*ctl
)
1214 zsd_zone_t
*zone
, *tmp
;
1217 * Mark zone as not existing, or delete if it did not exist in
1218 * previous interval.
1220 zone
= list_head(&ctl
->zsctl_zones
);
1221 while (zone
!= NULL
) {
1222 if (zone
->zsz_found
== B_FALSE
) {
1223 zone
->zsz_empty
= B_TRUE
;
1224 if (zone
->zsz_deleted
== B_TRUE
) {
1226 * Zone deleted in prior interval,
1227 * so it no longer exists.
1230 zone
= list_next(&ctl
->zsctl_zones
, zone
);
1231 list_remove(&ctl
->zsctl_zones
, tmp
);
1233 ctl
->zsctl_nzones
--;
1236 zone
->zsz_new
= B_FALSE
;
1237 zone
->zsz_deleted
= B_TRUE
;
1238 zone
->zsz_active
= B_TRUE
;
1241 zone
= list_next(&ctl
->zsctl_zones
, zone
);
1246 * Mark cpus as not existing. If a cpu is found, it will be updated. If
1247 * a cpu is not found, then it must have gone offline, so it will be
1250 * The kstat tracking data is rolled so that the usage since the previous
1251 * interval can be determined.
1254 zsd_mark_cpus_start(zsd_ctl_t
*ctl
, boolean_t roll
)
1259 * Mark all cpus as not existing. As cpus are found, they will
1260 * be marked as existing.
1262 for (cpu
= list_head(&ctl
->zsctl_cpus
); cpu
!= NULL
;
1263 cpu
= list_next(&ctl
->zsctl_cpus
, cpu
)) {
1264 cpu
->zsc_found
= B_FALSE
;
1265 if (cpu
->zsc_active
== B_TRUE
&& roll
) {
1266 cpu
->zsc_psetid_prev
= cpu
->zsc_psetid
;
1267 cpu
->zsc_nsec_idle_prev
= cpu
->zsc_nsec_idle
;
1268 cpu
->zsc_nsec_intr_prev
= cpu
->zsc_nsec_intr
;
1269 cpu
->zsc_nsec_kern_prev
= cpu
->zsc_nsec_kern
;
1270 cpu
->zsc_nsec_user_prev
= cpu
->zsc_nsec_user
;
1276 * An array the size of the maximum number of cpus is kept. Within this array
1277 * a list of the online cpus is maintained.
1280 zsd_lookup_insert_cpu(zsd_ctl_t
*ctl
, processorid_t cpuid
)
1284 assert(cpuid
< ctl
->zsctl_maxcpuid
);
1285 cpu
= &(ctl
->zsctl_cpu_array
[cpuid
]);
1286 assert(cpuid
== cpu
->zsc_id
);
1288 if (cpu
->zsc_allocated
== B_FALSE
) {
1289 cpu
->zsc_allocated
= B_TRUE
;
1290 list_insert_tail(&ctl
->zsctl_cpus
, cpu
);
1295 /* A cpu has been found. Update its information */
1297 zsd_mark_cpu_found(zsd_cpu_t
*cpu
, zsd_pset_t
*pset
, psetid_t psetid
)
1300 * legacy processor sets, the cpu may move while zonestatd is
1301 * inspecting, causing it to be found twice. In this case, just
1302 * leave cpu in the first processor set in which it was found.
1304 if (cpu
->zsc_found
== B_TRUE
)
1307 /* Mark cpu as online */
1308 cpu
->zsc_found
= B_TRUE
;
1309 cpu
->zsc_offlined
= B_FALSE
;
1310 cpu
->zsc_pset
= pset
;
1312 * cpu is newly online.
1314 if (cpu
->zsc_active
== B_FALSE
) {
1316 * Cpu is newly online.
1318 cpu
->zsc_onlined
= B_TRUE
;
1319 cpu
->zsc_psetid
= psetid
;
1320 cpu
->zsc_psetid_prev
= psetid
;
1323 * cpu online during previous interval. Save properties at
1326 cpu
->zsc_onlined
= B_FALSE
;
1327 cpu
->zsc_psetid
= psetid
;
1330 cpu
->zsc_active
= B_TRUE
;
1333 /* Remove all offlined cpus from the list of tracked cpus */
1335 zsd_mark_cpus_end(zsd_ctl_t
*ctl
)
1337 zsd_cpu_t
*cpu
, *tmp
;
1340 /* Mark cpu as online or offline */
1341 cpu
= list_head(&ctl
->zsctl_cpus
);
1342 while (cpu
!= NULL
) {
1343 if (cpu
->zsc_found
== B_FALSE
) {
1344 if (cpu
->zsc_offlined
== B_TRUE
) {
1346 * cpu offlined in prior interval. It is gone.
1349 cpu
= list_next(&ctl
->zsctl_cpus
, cpu
);
1350 list_remove(&ctl
->zsctl_cpus
, tmp
);
1351 /* Clear structure for future use */
1353 bzero(tmp
, sizeof (zsd_cpu_t
));
1355 tmp
->zsc_allocated
= B_FALSE
;
1356 tmp
->zsc_psetid
= ZS_PSET_ERROR
;
1357 tmp
->zsc_psetid_prev
= ZS_PSET_ERROR
;
1361 * cpu online at start of interval. Treat
1362 * as still online, since it was online for
1363 * some portion of the interval.
1365 cpu
->zsc_offlined
= B_TRUE
;
1366 cpu
->zsc_onlined
= B_FALSE
;
1367 cpu
->zsc_active
= B_TRUE
;
1368 cpu
->zsc_psetid
= cpu
->zsc_psetid_prev
;
1369 cpu
->zsc_pset
= NULL
;
1372 cpu
= list_next(&ctl
->zsctl_cpus
, cpu
);
1376 /* Some utility functions for managing the list of processor sets */
1378 zsd_lookup_pset_byid(zsd_ctl_t
*ctl
, psetid_t psetid
)
1382 for (pset
= list_head(&ctl
->zsctl_psets
); pset
!= NULL
;
1383 pset
= list_next(&ctl
->zsctl_psets
, pset
)) {
1384 if (pset
->zsp_id
== psetid
)
1391 zsd_lookup_pset(zsd_ctl_t
*ctl
, char *psetname
, psetid_t psetid
)
1395 for (pset
= list_head(&ctl
->zsctl_psets
); pset
!= NULL
;
1396 pset
= list_next(&ctl
->zsctl_psets
, pset
)) {
1397 if (strcmp(pset
->zsp_name
, psetname
) == 0) {
1399 pset
->zsp_id
= psetid
;
1407 zsd_allocate_pset(zsd_ctl_t
*ctl
, char *psetname
, psetid_t psetid
)
1411 if ((pset
= (zsd_pset_t
*)calloc(1, sizeof (zsd_pset_t
))) == NULL
)
1414 (void) strlcpy(pset
->zsp_name
, psetname
, sizeof (pset
->zsp_name
));
1415 pset
->zsp_id
= psetid
;
1416 pset
->zsp_found
= B_FALSE
;
1418 * Allocate as deleted so if not found in first pass, pset is deleted
1419 * from list. This can happen if pset is returned by pset_list, but
1420 * is destroyed before first attempt to fetch pset details.
1422 list_create(&pset
->zsp_usage_list
, sizeof (zsd_pset_usage_t
),
1423 offsetof(zsd_pset_usage_t
, zsu_next
));
1425 pset
->zsp_hrstart
= g_hrnow
;
1426 pset
->zsp_deleted
= B_TRUE
;
1427 pset
->zsp_empty
= B_TRUE
;
1428 ctl
->zsctl_npsets
++;
1434 zsd_lookup_insert_pset(zsd_ctl_t
*ctl
, char *psetname
, psetid_t psetid
)
1436 zsd_pset_t
*pset
, *tmp
;
1438 if ((pset
= zsd_lookup_pset(ctl
, psetname
, psetid
)) != NULL
)
1441 if ((pset
= zsd_allocate_pset(ctl
, psetname
, psetid
)) == NULL
)
1444 /* Insert sorted by psetname */
1445 tmp
= list_head(&ctl
->zsctl_psets
);
1446 while (tmp
!= NULL
&& strcmp(psetname
, tmp
->zsp_name
) > 0)
1447 tmp
= list_next(&ctl
->zsctl_psets
, tmp
);
1449 list_insert_before(&ctl
->zsctl_psets
, tmp
, pset
);
1453 /* Some utility functions for managing the list of zones using each pset */
1454 static zsd_pset_usage_t
*
1455 zsd_lookup_usage(zsd_pset_t
*pset
, zsd_zone_t
*zone
)
1457 zsd_pset_usage_t
*usage
;
1459 for (usage
= list_head(&pset
->zsp_usage_list
); usage
!= NULL
;
1460 usage
= list_next(&pset
->zsp_usage_list
, usage
))
1461 if (usage
->zsu_zone
== zone
)
1467 static zsd_pset_usage_t
*
1468 zsd_allocate_pset_usage(zsd_ctl_t
*ctl
, zsd_pset_t
*pset
, zsd_zone_t
*zone
)
1470 zsd_pset_usage_t
*usage
;
1472 if ((usage
= (zsd_pset_usage_t
*)calloc(1, sizeof (zsd_pset_usage_t
)))
1476 list_link_init(&usage
->zsu_next
);
1477 usage
->zsu_zone
= zone
;
1478 usage
->zsu_zoneid
= zone
->zsz_id
;
1479 usage
->zsu_pset
= pset
;
1480 usage
->zsu_found
= B_FALSE
;
1481 usage
->zsu_active
= B_FALSE
;
1482 usage
->zsu_new
= B_FALSE
;
1484 * Allocate as not deleted. If a process is found in a pset for
1485 * a zone, the usage will not be deleted until at least the next
1488 usage
->zsu_start
= g_now
;
1489 usage
->zsu_hrstart
= g_hrnow
;
1490 usage
->zsu_deleted
= B_FALSE
;
1491 usage
->zsu_empty
= B_TRUE
;
1492 usage
->zsu_scheds
= 0;
1493 usage
->zsu_cpu_shares
= ZS_LIMIT_NONE
;
1495 ctl
->zsctl_npset_usages
++;
1501 static zsd_pset_usage_t
*
1502 zsd_lookup_insert_usage(zsd_ctl_t
*ctl
, zsd_pset_t
*pset
, zsd_zone_t
*zone
)
1504 zsd_pset_usage_t
*usage
, *tmp
;
1506 if ((usage
= zsd_lookup_usage(pset
, zone
))
1510 if ((usage
= zsd_allocate_pset_usage(ctl
, pset
, zone
)) == NULL
)
1513 tmp
= list_head(&pset
->zsp_usage_list
);
1514 while (tmp
!= NULL
&& strcmp(zone
->zsz_name
, tmp
->zsu_zone
->zsz_name
)
1516 tmp
= list_next(&pset
->zsp_usage_list
, tmp
);
1518 list_insert_before(&pset
->zsp_usage_list
, tmp
, usage
);
1523 zsd_refresh_system(zsd_ctl_t
*ctl
)
1525 zsd_system_t
*system
= ctl
->zsctl_system
;
1527 /* Re-count these values each interval */
1528 system
->zss_processes
= 0;
1529 system
->zss_lwps
= 0;
1530 system
->zss_shm
= 0;
1531 system
->zss_shmids
= 0;
1532 system
->zss_semids
= 0;
1533 system
->zss_msgids
= 0;
1534 system
->zss_lofi
= 0;
1538 /* Reads each cpu's kstats, and adds the usage to the cpu's pset */
1540 zsd_update_cpu_stats(zsd_ctl_t
*ctl
, zsd_cpu_t
*cpu
)
1543 processorid_t cpuid
;
1544 zsd_pset_t
*pset_prev
;
1549 uint64_t idle
, intr
, kern
, user
;
1551 sys
= ctl
->zsctl_system
;
1552 pset
= cpu
->zsc_pset
;
1555 cpuid
= cpu
->zsc_id
;
1557 /* Get the cpu time totals for this cpu */
1558 kstat
= kstat_lookup(ctl
->zsctl_kstat_ctl
, "cpu", cpuid
, "sys");
1562 kid
= kstat_read(ctl
->zsctl_kstat_ctl
, kstat
, NULL
);
1566 knp
= kstat_data_lookup(kstat
, "cpu_nsec_idle");
1567 if (knp
== NULL
|| knp
->data_type
!= KSTAT_DATA_UINT64
)
1570 idle
= knp
->value
.ui64
;
1572 knp
= kstat_data_lookup(kstat
, "cpu_nsec_kernel");
1573 if (knp
== NULL
|| knp
->data_type
!= KSTAT_DATA_UINT64
)
1576 kern
= knp
->value
.ui64
;
1578 knp
= kstat_data_lookup(kstat
, "cpu_nsec_user");
1579 if (knp
== NULL
|| knp
->data_type
!= KSTAT_DATA_UINT64
)
1582 user
= knp
->value
.ui64
;
1585 * Tracking intr time per cpu just exists for future enhancements.
1586 * The value is presently always zero.
1589 cpu
->zsc_nsec_idle
= idle
;
1590 cpu
->zsc_nsec_intr
= intr
;
1591 cpu
->zsc_nsec_kern
= kern
;
1592 cpu
->zsc_nsec_user
= user
;
1594 if (cpu
->zsc_onlined
== B_TRUE
) {
1596 * cpu is newly online. There is no reference value,
1597 * so just record its current stats for comparison
1598 * on next stat read.
1600 cpu
->zsc_nsec_idle_prev
= cpu
->zsc_nsec_idle
;
1601 cpu
->zsc_nsec_intr_prev
= cpu
->zsc_nsec_intr
;
1602 cpu
->zsc_nsec_kern_prev
= cpu
->zsc_nsec_kern
;
1603 cpu
->zsc_nsec_user_prev
= cpu
->zsc_nsec_user
;
1608 * Calculate relative time since previous refresh.
1609 * Paranoia. Don't let time go backwards.
1611 idle
= intr
= kern
= user
= 0;
1612 if (cpu
->zsc_nsec_idle
> cpu
->zsc_nsec_idle_prev
)
1613 idle
= cpu
->zsc_nsec_idle
- cpu
->zsc_nsec_idle_prev
;
1615 if (cpu
->zsc_nsec_intr
> cpu
->zsc_nsec_intr_prev
)
1616 intr
= cpu
->zsc_nsec_intr
- cpu
->zsc_nsec_intr_prev
;
1618 if (cpu
->zsc_nsec_kern
> cpu
->zsc_nsec_kern_prev
)
1619 kern
= cpu
->zsc_nsec_kern
- cpu
->zsc_nsec_kern_prev
;
1621 if (cpu
->zsc_nsec_user
> cpu
->zsc_nsec_user_prev
)
1622 user
= cpu
->zsc_nsec_user
- cpu
->zsc_nsec_user_prev
;
1624 /* Update totals for cpu usage */
1625 TIMESTRUC_ADD_NANOSEC(cpu
->zsc_idle
, idle
);
1626 TIMESTRUC_ADD_NANOSEC(cpu
->zsc_intr
, intr
);
1627 TIMESTRUC_ADD_NANOSEC(cpu
->zsc_kern
, kern
);
1628 TIMESTRUC_ADD_NANOSEC(cpu
->zsc_user
, user
);
1631 * Add cpu's stats to its pset if it is known to be in
1632 * the pset since previous read.
1634 if (cpu
->zsc_psetid
== cpu
->zsc_psetid_prev
||
1635 cpu
->zsc_psetid_prev
== ZS_PSET_ERROR
||
1636 (pset_prev
= zsd_lookup_pset_byid(ctl
,
1637 cpu
->zsc_psetid_prev
)) == NULL
) {
1638 TIMESTRUC_ADD_NANOSEC(pset
->zsp_idle
, idle
);
1639 TIMESTRUC_ADD_NANOSEC(pset
->zsp_intr
, intr
);
1640 TIMESTRUC_ADD_NANOSEC(pset
->zsp_kern
, kern
);
1641 TIMESTRUC_ADD_NANOSEC(pset
->zsp_user
, user
);
1644 * Last pset was different than current pset.
1645 * Best guess is to split usage between the two.
1647 TIMESTRUC_ADD_NANOSEC(pset_prev
->zsp_idle
, idle
/ 2);
1648 TIMESTRUC_ADD_NANOSEC(pset_prev
->zsp_intr
, intr
/ 2);
1649 TIMESTRUC_ADD_NANOSEC(pset_prev
->zsp_kern
, kern
/ 2);
1650 TIMESTRUC_ADD_NANOSEC(pset_prev
->zsp_user
, user
/ 2);
1652 TIMESTRUC_ADD_NANOSEC(pset
->zsp_idle
,
1653 (idle
/ 2) + (idle
% 2));
1654 TIMESTRUC_ADD_NANOSEC(pset
->zsp_intr
,
1655 (intr
/ 2) + (intr
% 2));
1656 TIMESTRUC_ADD_NANOSEC(pset
->zsp_kern
,
1657 (kern
/ 2) + (kern
% 2));
1658 TIMESTRUC_ADD_NANOSEC(pset
->zsp_user
,
1659 (user
/ 2) + (user
% 2));
1661 TIMESTRUC_ADD_NANOSEC(sys
->zss_idle
, idle
);
1662 TIMESTRUC_ADD_NANOSEC(sys
->zss_intr
, intr
);
1663 TIMESTRUC_ADD_NANOSEC(sys
->zss_kern
, kern
);
1664 TIMESTRUC_ADD_NANOSEC(sys
->zss_user
, user
);
1667 /* Determine the details of a processor set by pset_id */
1669 zsd_get_pool_pset(zsd_ctl_t
*ctl
, psetid_t psetid
, char *psetname
,
1670 size_t namelen
, uint_t
*cputype
, uint64_t *online
, uint64_t *size
,
1671 uint64_t *min
, uint64_t *max
, int64_t *importance
)
1675 pool_conf_t
*conf
= ctl
->zsctl_pool_conf
;
1676 pool_value_t
**vals
= ctl
->zsctl_pool_vals
;
1677 pool_resource_t
**res_list
= NULL
;
1678 pool_resource_t
*pset
;
1679 pool_component_t
**cpus
= NULL
;
1680 processorid_t
*cache
;
1686 if (ctl
->zsctl_pool_status
== POOL_DISABLED
) {
1689 * Inspect legacy psets
1692 old
= num
= ctl
->zsctl_cpu_ncache
;
1693 ret
= pset_info(psetid
, &type
, &num
,
1694 ctl
->zsctl_cpu_cache
);
1696 /* pset is gone. Tell caller to retry */
1704 if ((cache
= reallocarray(ctl
->zsctl_cpu_cache
, num
,
1705 sizeof (processorid_t
))) != NULL
) {
1706 ctl
->zsctl_cpu_ncache
= num
;
1707 ctl
->zsctl_cpu_cache
= cache
;
1710 * Could not allocate to get new cpu list.
1713 "Could not allocate for cpu list"));
1719 * Old school pset. Just make min and max equal
1722 if (psetid
== ZS_PSET_DEFAULT
) {
1723 *cputype
= ZS_CPUTYPE_DEFAULT_PSET
;
1724 (void) strlcpy(psetname
, "pset_default", namelen
);
1726 *cputype
= ZS_CPUTYPE_PSRSET_PSET
;
1727 (void) snprintf(psetname
, namelen
,
1728 "SUNWlegacy_pset_%d", psetid
);
1732 * Just treat legacy pset as a simple pool pset
1743 /* Look up the pool pset using the pset id */
1745 pool_value_set_int64(vals
[1], psetid
);
1746 if (pool_value_set_name(vals
[1], "pset.sys_id")
1750 if (pool_value_set_name(vals
[0], "type") != PO_SUCCESS
)
1752 if (pool_value_set_string(vals
[0], "pset") != PO_SUCCESS
)
1754 if ((res_list
= pool_query_resources(conf
, &num
, vals
)) == NULL
)
1761 if (pool_get_property(conf
, pool_resource_to_elem(conf
, pset
),
1762 "pset.name", vals
[0]) != POC_STRING
||
1763 pool_value_get_string(vals
[0], &string
) != PO_SUCCESS
)
1766 (void) strlcpy(psetname
, string
, namelen
);
1767 if (strncmp(psetname
, "SUNWtmp", strlen("SUNWtmp")) == 0)
1768 *cputype
= ZS_CPUTYPE_DEDICATED
;
1769 else if (psetid
== ZS_PSET_DEFAULT
)
1770 *cputype
= ZS_CPUTYPE_DEFAULT_PSET
;
1772 *cputype
= ZS_CPUTYPE_POOL_PSET
;
1774 /* Get size, min, max, and importance */
1775 if (pool_get_property(conf
, pool_resource_to_elem(conf
,
1776 pset
), "pset.size", vals
[0]) == POC_UINT
&&
1777 pool_value_get_uint64(vals
[0], &uint64
) == PO_SUCCESS
)
1782 /* Get size, min, max, and importance */
1783 if (pool_get_property(conf
, pool_resource_to_elem(conf
,
1784 pset
), "pset.min", vals
[0]) == POC_UINT
&&
1785 pool_value_get_uint64(vals
[0], &uint64
) == PO_SUCCESS
)
1789 if (*min
>= ZSD_PSET_UNLIMITED
)
1790 *min
= ZS_LIMIT_NONE
;
1792 if (pool_get_property(conf
, pool_resource_to_elem(conf
,
1793 pset
), "pset.max", vals
[0]) == POC_UINT
&&
1794 pool_value_get_uint64(vals
[0], &uint64
) == PO_SUCCESS
)
1797 *max
= ZS_LIMIT_NONE
;
1799 if (*max
>= ZSD_PSET_UNLIMITED
)
1800 *max
= ZS_LIMIT_NONE
;
1802 if (pool_get_property(conf
, pool_resource_to_elem(conf
,
1803 pset
), "pset.importance", vals
[0]) == POC_INT
&&
1804 pool_value_get_int64(vals
[0], &int64
) == PO_SUCCESS
)
1805 *importance
= int64
;
1807 *importance
= (uint64_t)1;
1814 cpus
= pool_query_resource_components(conf
, pset
, &num
, NULL
);
1818 /* Make sure there is space for cpu id list */
1819 if (num
> ctl
->zsctl_cpu_ncache
) {
1820 if ((cache
= reallocarray(ctl
->zsctl_cpu_cache
, num
,
1821 sizeof (processorid_t
))) != NULL
) {
1822 ctl
->zsctl_cpu_ncache
= num
;
1823 ctl
->zsctl_cpu_cache
= cache
;
1826 * Could not allocate to get new cpu list.
1829 "Could not allocate for cpu list"));
1834 /* count the online cpus */
1835 for (i
= 0; i
< num
; i
++) {
1836 if (pool_get_property(conf
, pool_component_to_elem(
1837 conf
, cpus
[i
]), "cpu.status", vals
[0]) != POC_STRING
||
1838 pool_value_get_string(vals
[0], &string
) != PO_SUCCESS
)
1841 if (strcmp(string
, "on-line") != 0 &&
1842 strcmp(string
, "no-intr") != 0)
1845 if (pool_get_property(conf
, pool_component_to_elem(
1846 conf
, cpus
[i
]), "cpu.sys_id", vals
[0]) != POC_INT
||
1847 pool_value_get_int64(vals
[0], &int64
) != PO_SUCCESS
)
1851 ctl
->zsctl_cpu_cache
[i
] = (psetid_t
)int64
;
1860 * The pools operations should succeed since the conf is a consistent
1861 * snapshot. Tell caller there is no need to retry.
1868 * Update the current list of processor sets.
1869 * This also updates the list of online cpus, and each cpu's pset membership.
1872 zsd_refresh_psets(zsd_ctl_t
*ctl
)
1874 int i
, j
, ret
, state
;
1877 int64_t sys_id
, importance
;
1878 uint64_t online
, size
, min
, max
;
1879 zsd_system_t
*system
;
1883 char psetname
[ZS_PSETNAME_MAX
];
1884 processorid_t cpuid
;
1885 pool_value_t
*pv_save
= NULL
;
1886 pool_resource_t
**res_list
= NULL
;
1887 pool_resource_t
*res
;
1888 pool_value_t
**vals
;
1890 boolean_t roll_cpus
= B_TRUE
;
1892 /* Zero cpu counters to recount them */
1893 system
= ctl
->zsctl_system
;
1894 system
->zss_ncpus
= 0;
1895 system
->zss_ncpus_online
= 0;
1897 ret
= pool_get_status(&state
);
1898 if (ret
== 0 && state
== POOL_ENABLED
) {
1900 conf
= ctl
->zsctl_pool_conf
;
1901 vals
= ctl
->zsctl_pool_vals
;
1905 if (ctl
->zsctl_pool_status
== POOL_DISABLED
) {
1906 if (pool_conf_open(ctl
->zsctl_pool_conf
,
1907 pool_dynamic_location(), PO_RDONLY
) == 0) {
1908 ctl
->zsctl_pool_status
= POOL_ENABLED
;
1909 ctl
->zsctl_pool_changed
= POU_PSET
;
1912 ctl
->zsctl_pool_changed
= 0;
1913 ret
= pool_conf_update(ctl
->zsctl_pool_conf
,
1914 &(ctl
->zsctl_pool_changed
));
1916 /* Pools must have become disabled */
1917 (void) pool_conf_close(ctl
->zsctl_pool_conf
);
1918 ctl
->zsctl_pool_status
= POOL_DISABLED
;
1919 if (pool_error() == POE_SYSTEM
&& errno
==
1924 "Unable to update pool configuration"));
1925 /* Not able to get pool info. Don't update. */
1929 /* Get the list of psets using libpool */
1930 if (pool_value_set_name(vals
[0], "type") != PO_SUCCESS
)
1933 if (pool_value_set_string(vals
[0], "pset") != PO_SUCCESS
)
1935 if ((res_list
= pool_query_resources(conf
, &num
, vals
))
1939 if (num
> ctl
->zsctl_pset_ncache
) {
1940 if ((cache
= reallocarray(ctl
->zsctl_pset_cache
, num
,
1941 sizeof (psetid_t
))) == NULL
) {
1944 ctl
->zsctl_pset_ncache
= num
;
1945 ctl
->zsctl_pset_cache
= cache
;
1947 /* Save the pset id of each pset */
1948 for (i
= 0; i
< num
; i
++) {
1950 if (pool_get_property(conf
, pool_resource_to_elem(conf
,
1951 res
), "pset.sys_id", vals
[0]) != POC_INT
||
1952 pool_value_get_int64(vals
[0], &sys_id
)
1955 ctl
->zsctl_pset_cache
[i
] = (int)sys_id
;
1960 if (ctl
->zsctl_pool_status
== POOL_ENABLED
) {
1961 (void) pool_conf_close(ctl
->zsctl_pool_conf
);
1962 ctl
->zsctl_pool_status
= POOL_DISABLED
;
1964 /* Get the pset list using legacy psets */
1966 old
= num
= ctl
->zsctl_pset_ncache
;
1967 (void) pset_list(ctl
->zsctl_pset_cache
, &num
);
1968 if ((num
+ 1) <= old
) {
1971 if ((cache
= reallocarray(ctl
->zsctl_pset_cache
,
1972 num
+ 1, sizeof (psetid_t
))) != NULL
) {
1973 ctl
->zsctl_pset_ncache
= num
+ 1;
1974 ctl
->zsctl_pset_cache
= cache
;
1977 * Could not allocate to get new pset list.
1983 /* Add the default pset to list */
1984 ctl
->zsctl_pset_cache
[num
] = ctl
->zsctl_pset_cache
[0];
1985 ctl
->zsctl_pset_cache
[0] = ZS_PSET_DEFAULT
;
1989 zsd_mark_cpus_start(ctl
, roll_cpus
);
1990 zsd_mark_psets_start(ctl
);
1991 roll_cpus
= B_FALSE
;
1993 /* Refresh cpu membership of all psets */
1994 for (i
= 0; i
< num
; i
++) {
1996 /* Get pool pset information */
1997 sys_id
= ctl
->zsctl_pset_cache
[i
];
1998 if (zsd_get_pool_pset(ctl
, sys_id
, psetname
, sizeof (psetname
),
1999 &cputype
, &online
, &size
, &min
, &max
, &importance
)
2003 zsd_warn(gettext("Failed to get info for pset %d"),
2008 system
->zss_ncpus
+= size
;
2009 system
->zss_ncpus_online
+= online
;
2011 pset
= zsd_lookup_insert_pset(ctl
, psetname
,
2012 ctl
->zsctl_pset_cache
[i
]);
2014 /* update pset info */
2015 zsd_mark_pset_found(pset
, cputype
, online
, size
, min
,
2018 /* update each cpu in pset */
2019 for (j
= 0; j
< pset
->zsp_online
; j
++) {
2020 cpuid
= ctl
->zsctl_cpu_cache
[j
];
2021 cpu
= zsd_lookup_insert_cpu(ctl
, cpuid
);
2022 zsd_mark_cpu_found(cpu
, pset
, sys_id
);
2027 if (pv_save
!= NULL
)
2034 * Fetch the current pool and pset name for the given zone.
2037 zsd_get_zone_pool_pset(zsd_ctl_t
*ctl
, zsd_zone_t
*zone
,
2038 char *pool
, int poollen
, char *pset
, int psetlen
, uint_t
*cputype
)
2041 pool_t
**pools
= NULL
;
2042 pool_resource_t
**res_list
= NULL
;
2043 char poolname
[ZS_POOLNAME_MAX
];
2044 char psetname
[ZS_PSETNAME_MAX
];
2045 pool_conf_t
*conf
= ctl
->zsctl_pool_conf
;
2046 pool_value_t
*pv_save
= NULL
;
2047 pool_value_t
**vals
= ctl
->zsctl_pool_vals
;
2053 ret
= zone_getattr(zone
->zsz_id
, ZONE_ATTR_POOLID
,
2054 &poolid
, sizeof (poolid
));
2063 /* Default values if lookup fails */
2064 (void) strlcpy(poolname
, "pool_default", sizeof (poolname
));
2065 (void) strlcpy(psetname
, "pset_default", sizeof (poolname
));
2066 *cputype
= ZS_CPUTYPE_DEFAULT_PSET
;
2068 /* no dedicated cpu if pools are disabled */
2069 if (ctl
->zsctl_pool_status
== POOL_DISABLED
)
2072 /* Get the pool name using the id */
2073 pool_value_set_int64(vals
[0], poolid
);
2074 if (pool_value_set_name(vals
[0], "pool.sys_id") != PO_SUCCESS
)
2077 if ((pools
= pool_query_pools(conf
, &num
, vals
)) == NULL
)
2083 if (pool_get_property(conf
, pool_to_elem(conf
, pools
[0]),
2084 "pool.name", vals
[0]) != POC_STRING
||
2085 pool_value_get_string(vals
[0], &string
) != PO_SUCCESS
)
2087 (void) strlcpy(poolname
, (char *)string
, sizeof (poolname
));
2089 /* Get the name of the pset for the pool */
2090 if (pool_value_set_name(vals
[0], "type") != PO_SUCCESS
)
2093 if (pool_value_set_string(vals
[0], "pset") != PO_SUCCESS
)
2096 if ((res_list
= pool_query_pool_resources(conf
, pools
[0], &num
, vals
))
2103 if (pool_get_property(conf
, pool_resource_to_elem(conf
,
2104 res_list
[0]), "pset.sys_id", vals
[0]) != POC_INT
||
2105 pool_value_get_int64(vals
[0], &int64
) != PO_SUCCESS
)
2108 if (int64
== ZS_PSET_DEFAULT
)
2109 *cputype
= ZS_CPUTYPE_DEFAULT_PSET
;
2111 if (pool_get_property(conf
, pool_resource_to_elem(conf
,
2112 res_list
[0]), "pset.name", vals
[0]) != POC_STRING
||
2113 pool_value_get_string(vals
[0], &string
) != PO_SUCCESS
)
2116 (void) strlcpy(psetname
, (char *)string
, sizeof (psetname
));
2118 if (strncmp(psetname
, "SUNWtmp_", strlen("SUNWtmp_")) == 0)
2119 *cputype
= ZS_CPUTYPE_DEDICATED
;
2120 if (strncmp(psetname
, "SUNW_legacy_", strlen("SUNW_legacy_")) == 0)
2121 *cputype
= ZS_CPUTYPE_PSRSET_PSET
;
2123 *cputype
= ZS_CPUTYPE_POOL_PSET
;
2127 if (pv_save
!= NULL
)
2133 (void) strlcpy(pool
, poolname
, poollen
);
2134 (void) strlcpy(pset
, psetname
, psetlen
);
2137 /* Convert scheduler names to ZS_* scheduler flags */
2139 zsd_schedname2int(char *clname
, int pri
)
2143 if (strcmp(clname
, "TS") == 0) {
2144 sched
= ZS_SCHED_TS
;
2145 } else if (strcmp(clname
, "IA") == 0) {
2146 sched
= ZS_SCHED_IA
;
2147 } else if (strcmp(clname
, "FX") == 0) {
2149 sched
= ZS_SCHED_FX_60
;
2151 sched
= ZS_SCHED_FX
;
2153 } else if (strcmp(clname
, "RT") == 0) {
2154 sched
= ZS_SCHED_RT
;
2156 } else if (strcmp(clname
, "FSS") == 0) {
2157 sched
= ZS_SCHED_FSS
;
2163 zsd_get_zone_rctl_limit(char *name
)
2167 rblk
= (rctlblk_t
*)alloca(rctlblk_size());
2168 if (getrctl(name
, NULL
, rblk
, RCTL_FIRST
)
2170 return (ZS_LIMIT_NONE
);
2172 return (rctlblk_get_value(rblk
));
2176 zsd_get_zone_rctl_usage(char *name
)
2180 rblk
= (rctlblk_t
*)alloca(rctlblk_size());
2181 if (getrctl(name
, NULL
, rblk
, RCTL_USAGE
)
2185 return (rctlblk_get_value(rblk
));
2188 #define ZSD_NUM_RCTL_VALS 19
2191 * Fetch the limit information for a zone. This uses zone_enter() as the
2192 * getrctl(2) system call only returns rctl information for the zone of
2196 zsd_get_zone_caps(zsd_ctl_t
*ctl
, zsd_zone_t
*zone
, uint64_t *cpu_shares
,
2197 uint64_t *cpu_cap
, uint64_t *ram_cap
, uint64_t *locked_cap
,
2198 uint64_t *vm_cap
, uint64_t *processes_cap
, uint64_t *processes
,
2199 uint64_t *lwps_cap
, uint64_t *lwps
, uint64_t *shm_cap
, uint64_t *shm
,
2200 uint64_t *shmids_cap
, uint64_t *shmids
, uint64_t *semids_cap
,
2201 uint64_t *semids
, uint64_t *msgids_cap
, uint64_t *msgids
,
2202 uint64_t *lofi_cap
, uint64_t *lofi
, uint_t
*sched
)
2204 int p
[2], pid
, tmpl_fd
, ret
;
2206 char class[PC_CLNMSZ
];
2207 uint64_t vals
[ZSD_NUM_RCTL_VALS
];
2208 zsd_system_t
*sys
= ctl
->zsctl_system
;
2212 /* Treat all caps as no cap on error */
2213 *cpu_shares
= ZS_LIMIT_NONE
;
2214 *cpu_cap
= ZS_LIMIT_NONE
;
2215 *ram_cap
= ZS_LIMIT_NONE
;
2216 *locked_cap
= ZS_LIMIT_NONE
;
2217 *vm_cap
= ZS_LIMIT_NONE
;
2219 *processes_cap
= ZS_LIMIT_NONE
;
2220 *lwps_cap
= ZS_LIMIT_NONE
;
2221 *shm_cap
= ZS_LIMIT_NONE
;
2222 *shmids_cap
= ZS_LIMIT_NONE
;
2223 *semids_cap
= ZS_LIMIT_NONE
;
2224 *msgids_cap
= ZS_LIMIT_NONE
;
2225 *lofi_cap
= ZS_LIMIT_NONE
;
2235 /* Get the ram cap first since it is a zone attr */
2236 ret
= zone_getattr(zone
->zsz_id
, ZONE_ATTR_PHYS_MCAP
,
2237 ram_cap
, sizeof (*ram_cap
));
2238 if (ret
< 0 || *ram_cap
== 0)
2239 *ram_cap
= ZS_LIMIT_NONE
;
2241 /* Get the zone's default scheduling class */
2242 ret
= zone_getattr(zone
->zsz_id
, ZONE_ATTR_SCHED_CLASS
,
2243 class, sizeof (class));
2247 *sched
= zsd_schedname2int(class, 0);
2249 /* rctl caps must be fetched from within the zone */
2253 if ((tmpl_fd
= init_template()) == -1) {
2260 (void) ct_tmpl_clear(tmpl_fd
);
2267 (void) ct_tmpl_clear(tmpl_fd
);
2268 (void) close(tmpl_fd
);
2270 if (zone
->zsz_id
!= getzoneid()) {
2271 if (zone_enter(zone
->zsz_id
) < 0) {
2277 /* Get caps for zone, and write them to zonestatd parent. */
2278 vals
[i
++] = zsd_get_zone_rctl_limit("zone.cpu-shares");
2279 vals
[i
++] = zsd_get_zone_rctl_limit("zone.cpu-cap");
2280 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-locked-memory");
2281 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-swap");
2282 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-processes");
2283 vals
[i
++] = zsd_get_zone_rctl_usage("zone.max-processes");
2284 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-lwps");
2285 vals
[i
++] = zsd_get_zone_rctl_usage("zone.max-lwps");
2286 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-shm-memory");
2287 vals
[i
++] = zsd_get_zone_rctl_usage("zone.max-shm-memory");
2288 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-shm-ids");
2289 vals
[i
++] = zsd_get_zone_rctl_usage("zone.max-shm-ids");
2290 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-sem-ids");
2291 vals
[i
++] = zsd_get_zone_rctl_usage("zone.max-sem-ids");
2292 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-msg-ids");
2293 vals
[i
++] = zsd_get_zone_rctl_usage("zone.max-msg-ids");
2294 vals
[i
++] = zsd_get_zone_rctl_limit("zone.max-lofi");
2295 vals
[i
++] = zsd_get_zone_rctl_usage("zone.max-lofi");
2297 if (write(p
[1], vals
, ZSD_NUM_RCTL_VALS
* sizeof (uint64_t)) !=
2298 ZSD_NUM_RCTL_VALS
* sizeof (uint64_t)) {
2306 if (contract_latest(&ct
) == -1)
2309 (void) ct_tmpl_clear(tmpl_fd
);
2310 (void) close(tmpl_fd
);
2312 while (waitpid(pid
, NULL
, 0) != pid
)
2315 /* Read cap from child in zone */
2316 if (read(p
[0], vals
, ZSD_NUM_RCTL_VALS
* sizeof (uint64_t)) !=
2317 ZSD_NUM_RCTL_VALS
* sizeof (uint64_t)) {
2322 *cpu_shares
= vals
[i
++];
2323 *cpu_cap
= vals
[i
++];
2324 *locked_cap
= vals
[i
++];
2325 *vm_cap
= vals
[i
++];
2326 *processes_cap
= vals
[i
++];
2327 *processes
= vals
[i
++];
2328 *lwps_cap
= vals
[i
++];
2330 *shm_cap
= vals
[i
++];
2332 *shmids_cap
= vals
[i
++];
2333 *shmids
= vals
[i
++];
2334 *semids_cap
= vals
[i
++];
2335 *semids
= vals
[i
++];
2336 *msgids_cap
= vals
[i
++];
2337 *msgids
= vals
[i
++];
2338 *lofi_cap
= vals
[i
++];
2341 /* Interpret maximum values as no cap */
2342 if (*cpu_cap
== UINT32_MAX
|| *cpu_cap
== 0)
2343 *cpu_cap
= ZS_LIMIT_NONE
;
2344 if (*processes_cap
== sys
->zss_processes_max
)
2345 *processes_cap
= ZS_LIMIT_NONE
;
2346 if (*lwps_cap
== sys
->zss_lwps_max
)
2347 *lwps_cap
= ZS_LIMIT_NONE
;
2348 if (*shm_cap
== sys
->zss_shm_max
)
2349 *shm_cap
= ZS_LIMIT_NONE
;
2350 if (*shmids_cap
== sys
->zss_shmids_max
)
2351 *shmids_cap
= ZS_LIMIT_NONE
;
2352 if (*semids_cap
== sys
->zss_semids_max
)
2353 *semids_cap
= ZS_LIMIT_NONE
;
2354 if (*msgids_cap
== sys
->zss_msgids_max
)
2355 *msgids_cap
= ZS_LIMIT_NONE
;
2356 if (*lofi_cap
== sys
->zss_lofi_max
)
2357 *lofi_cap
= ZS_LIMIT_NONE
;
2362 (void) ct_tmpl_clear(tmpl_fd
);
2363 (void) close(tmpl_fd
);
2364 (void) contract_abandon_id(ct
);
2369 /* Update the current list of running zones */
2371 zsd_refresh_zones(zsd_ctl_t
*ctl
)
2378 uint64_t cpu_shares
;
2381 uint64_t locked_cap
;
2383 uint64_t processes_cap
;
2389 uint64_t shmids_cap
;
2391 uint64_t semids_cap
;
2393 uint64_t msgids_cap
;
2398 char zonename
[ZS_ZONENAME_MAX
];
2399 char poolname
[ZS_POOLNAME_MAX
];
2400 char psetname
[ZS_PSETNAME_MAX
];
2405 /* Get the current list of running zones */
2407 old
= num
= ctl
->zsctl_zone_ncache
;
2408 (void) zone_list(ctl
->zsctl_zone_cache
, &num
);
2411 if ((cache
= reallocarray(ctl
->zsctl_zone_cache
, num
,
2412 sizeof (zoneid_t
))) != NULL
) {
2413 ctl
->zsctl_zone_ncache
= num
;
2414 ctl
->zsctl_zone_cache
= cache
;
2416 /* Could not allocate to get new zone list. Give up */
2421 zsd_mark_zones_start(ctl
);
2423 for (i
= 0; i
< num
; i
++) {
2425 ret
= getzonenamebyid(ctl
->zsctl_zone_cache
[i
],
2426 zonename
, sizeof (zonename
));
2430 zone
= zsd_lookup_insert_zone(ctl
, zonename
,
2431 ctl
->zsctl_zone_cache
[i
]);
2433 ret
= zone_getattr(ctl
->zsctl_zone_cache
[i
], ZONE_ATTR_FLAGS
,
2434 &flags
, sizeof (flags
));
2438 if (flags
& ZF_NET_EXCL
)
2439 iptype
= ZS_IPTYPE_EXCLUSIVE
;
2441 iptype
= ZS_IPTYPE_SHARED
;
2443 zsd_get_zone_pool_pset(ctl
, zone
, poolname
, sizeof (poolname
),
2444 psetname
, sizeof (psetname
), &cputype
);
2446 if (zsd_get_zone_caps(ctl
, zone
, &cpu_shares
, &cpu_cap
,
2447 &ram_cap
, &locked_cap
, &vm_cap
, &processes_cap
, &processes
,
2448 &lwps_cap
, &lwps
, &shm_cap
, &shm
, &shmids_cap
, &shmids
,
2449 &semids_cap
, &semids
, &msgids_cap
, &msgids
, &lofi_cap
,
2450 &lofi
, &sched
) != 0)
2453 zsd_mark_zone_found(ctl
, zone
, cpu_shares
, cpu_cap
, ram_cap
,
2454 locked_cap
, vm_cap
, processes_cap
, processes
, lwps_cap
,
2455 lwps
, shm_cap
, shm
, shmids_cap
, shmids
, semids_cap
,
2456 semids
, msgids_cap
, msgids
, lofi_cap
, lofi
, poolname
,
2457 psetname
, sched
, cputype
, iptype
);
2461 /* Fetch the details of a process from its psinfo_t */
2463 zsd_get_proc_info(zsd_ctl_t
*ctl
, psinfo_t
*psinfo
, psetid_t
*psetid
,
2464 psetid_t
*prev_psetid
, zoneid_t
*zoneid
, zoneid_t
*prev_zoneid
,
2465 timestruc_t
*delta
, uint_t
*sched
)
2470 /* Get cached data for proc */
2471 proc
= &(ctl
->zsctl_proc_array
[psinfo
->pr_pid
]);
2472 *psetid
= psinfo
->pr_lwp
.pr_bindpset
;
2474 if (proc
->zspr_psetid
== ZS_PSET_ERROR
)
2475 *prev_psetid
= *psetid
;
2477 *prev_psetid
= proc
->zspr_psetid
;
2479 *zoneid
= psinfo
->pr_zoneid
;
2480 if (proc
->zspr_zoneid
== -1)
2481 *prev_zoneid
= *zoneid
;
2483 *prev_zoneid
= proc
->zspr_zoneid
;
2485 TIMESTRUC_DELTA(d
, psinfo
->pr_time
, proc
->zspr_usage
);
2488 *sched
= zsd_schedname2int(psinfo
->pr_lwp
.pr_clname
,
2489 psinfo
->pr_lwp
.pr_pri
);
2491 /* Update cached data for proc */
2492 proc
->zspr_psetid
= psinfo
->pr_lwp
.pr_bindpset
;
2493 proc
->zspr_zoneid
= psinfo
->pr_zoneid
;
2494 proc
->zspr_sched
= *sched
;
2495 proc
->zspr_usage
.tv_sec
= psinfo
->pr_time
.tv_sec
;
2496 proc
->zspr_usage
.tv_nsec
= psinfo
->pr_time
.tv_nsec
;
2497 proc
->zspr_ppid
= psinfo
->pr_ppid
;
2501 * Reset the known cpu usage of a process. This is done after a process
2502 * exits so that if the pid is recycled, data from its previous life is
2506 zsd_flush_proc_info(zsd_proc_t
*proc
)
2508 proc
->zspr_usage
.tv_sec
= 0;
2509 proc
->zspr_usage
.tv_nsec
= 0;
2513 * Open the current extended accounting file. On initialization, open the
2514 * file as the current file to be used. Otherwise, open the file as the
2515 * next file to use of the current file reaches EOF.
2518 zsd_open_exacct(zsd_ctl_t
*ctl
, boolean_t init
)
2520 int ret
, oret
, state
, trys
= 0, flags
;
2524 char path
[MAXPATHLEN
];
2527 * The accounting file is first opened at the tail. Following
2528 * opens to new accounting files are opened at the head.
2530 if (init
== B_TRUE
) {
2531 flags
= EO_NO_VALID_HDR
| EO_TAIL
;
2532 fd
= &ctl
->zsctl_proc_fd
;
2533 eaf
= &ctl
->zsctl_proc_eaf
;
2534 stat
= &ctl
->zsctl_proc_stat
;
2535 opn
= &ctl
->zsctl_proc_open
;
2537 flags
= EO_NO_VALID_HDR
| EO_HEAD
;
2538 fd
= &ctl
->zsctl_proc_fd_next
;
2539 eaf
= &ctl
->zsctl_proc_eaf_next
;
2540 stat
= &ctl
->zsctl_proc_stat_next
;
2541 opn
= &ctl
->zsctl_proc_open_next
;
2547 /* open accounting files for cpu consumption */
2548 ret
= acctctl(AC_STATE_GET
| AC_PROC
, &state
, sizeof (state
));
2550 zsd_warn(gettext("Unable to get process accounting state"));
2553 if (state
!= AC_ON
) {
2556 "Unable to enable process accounting"));
2559 (void) zsd_enable_cpu_stats();
2564 ret
= acctctl(AC_FILE_GET
| AC_PROC
, path
, sizeof (path
));
2566 zsd_warn(gettext("Unable to get process accounting file"));
2570 if ((*fd
= open(path
, O_RDONLY
, 0)) >= 0 &&
2571 (oret
= ea_fdopen(eaf
, *fd
, NULL
, flags
, O_RDONLY
)) == 0)
2572 ret
= fstat(*fd
, stat
);
2574 if (*fd
< 0 || oret
< 0 || ret
< 0) {
2578 * It is possible the accounting file is momentarily unavailable
2579 * because it is being rolled. Try for up to half a second.
2581 * If failure to open accounting file persists, give up.
2584 (void) ea_close(eaf
);
2589 "Unable to open process accounting file"));
2592 /* wait one millisecond */
2594 ts
.tv_nsec
= NANOSEC
/ 1000;
2595 (void) nanosleep(&ts
, NULL
);
2609 * Walk /proc and charge each process to its zone and processor set.
2610 * Then read exacct data for exited processes, and charge them as well.
2613 zsd_refresh_procs(zsd_ctl_t
*ctl
, boolean_t init
)
2616 struct dirent
*dent
, *dresult
;
2619 zsd_proc_t
*proc
, *pproc
, *tmp
, *next
;
2620 list_t pplist
, plist
;
2621 zsd_zone_t
*zone
, *prev_zone
;
2622 zsd_pset_t
*pset
, *prev_pset
;
2623 psetid_t psetid
, prev_psetid
;
2624 zoneid_t zoneid
, prev_zoneid
;
2625 zsd_pset_usage_t
*usage
, *prev_usage
;
2626 char path
[MAXPATHLEN
];
2629 ea_object_t pobject
;
2630 boolean_t hrtime_expired
= B_FALSE
;
2631 struct timeval interval_end
;
2633 timestruc_t delta
, d1
, d2
;
2637 * Get the current accounting file. The current accounting file
2638 * may be different than the file in use, as the accounting file
2639 * may have been rolled, or manually changed by an admin.
2641 ret
= zsd_open_exacct(ctl
, init
);
2643 zsd_warn(gettext("Unable to track process accounting"));
2648 * Mark the current time as the interval end time. Don't track
2649 * processes that exit after this time.
2651 (void) gettimeofday(&interval_end
, NULL
);
2653 dir
= opendir("/proc");
2655 zsd_warn(gettext("Unable to open /proc"));
2659 dent
= ctl
->zsctl_procfs_dent
;
2661 (void) memset(dent
, 0, ctl
->zsctl_procfs_dent_size
);
2663 /* Walk all processes and compute each zone's usage on each pset. */
2664 while (readdir_r(dir
, dent
, &dresult
) == 0) {
2666 if (strcmp(dent
->d_name
, ".") == 0 ||
2667 strcmp(dent
->d_name
, "..") == 0)
2670 (void) snprintf(path
, sizeof (path
), "/proc/%s/psinfo",
2673 fd
= open(path
, O_RDONLY
);
2677 if (read(fd
, &psinfo
, sizeof (psinfo
)) != sizeof (psinfo
)) {
2683 zsd_get_proc_info(ctl
, &psinfo
, &psetid
, &prev_psetid
,
2684 &zoneid
, &prev_zoneid
, &delta
, &sched
);
2686 d1
.tv_sec
= delta
.tv_sec
/ 2;
2687 d1
.tv_nsec
= delta
.tv_nsec
/ 2;
2688 d2
.tv_sec
= (delta
.tv_sec
/ 2) + (delta
.tv_sec
% 2);
2689 d2
.tv_nsec
= (delta
.tv_nsec
/ 2) + (delta
.tv_nsec
% 2);
2691 /* Get the zone and pset this process is running in */
2692 zone
= zsd_lookup_zone_byid(ctl
, zoneid
);
2695 pset
= zsd_lookup_pset_byid(ctl
, psetid
);
2698 usage
= zsd_lookup_insert_usage(ctl
, pset
, zone
);
2703 * Get the usage of the previous zone and pset if they were
2706 if (zoneid
!= prev_zoneid
)
2707 prev_zone
= zsd_lookup_zone_byid(ctl
, prev_zoneid
);
2711 if (psetid
!= prev_psetid
)
2712 prev_pset
= zsd_lookup_pset_byid(ctl
, prev_psetid
);
2717 if (prev_zone
!= NULL
|| prev_pset
!= NULL
) {
2718 if (prev_zone
== NULL
)
2720 if (prev_pset
== NULL
)
2723 prev_usage
= zsd_lookup_insert_usage(ctl
, prev_pset
,
2727 /* Update the usage with the processes info */
2728 if (prev_usage
== NULL
) {
2729 zsd_mark_pset_usage_found(usage
, sched
);
2731 zsd_mark_pset_usage_found(usage
, sched
);
2732 zsd_mark_pset_usage_found(prev_usage
, sched
);
2736 * First time around is just to get a starting point. All
2737 * usages will be zero.
2742 if (prev_usage
== NULL
) {
2743 zsd_add_usage(ctl
, usage
, &delta
);
2745 zsd_add_usage(ctl
, usage
, &d1
);
2746 zsd_add_usage(ctl
, prev_usage
, &d2
);
2749 (void) closedir(dir
);
2752 * No need to collect exited proc data on initialization. Just
2753 * caching the usage of the known processes to get a zero starting
2760 * Add accounting records to account for processes which have
2763 list_create(&plist
, sizeof (zsd_proc_t
),
2764 offsetof(zsd_proc_t
, zspr_next
));
2765 list_create(&pplist
, sizeof (zsd_proc_t
),
2766 offsetof(zsd_proc_t
, zspr_next
));
2771 timestruc_t user
, sys
, proc_usage
;
2775 bzero(&object
, sizeof (object
));
2780 ret
= ea_get_object(&ctl
->zsctl_proc_eaf
, &object
);
2781 if (ret
== EO_ERROR
) {
2782 if (ea_error() == EXR_EOF
) {
2785 struct stat
*stat_next
;
2788 * See if the next accounting file is the
2789 * same as the current accounting file.
2791 stat
= &(ctl
->zsctl_proc_stat
);
2792 stat_next
= &(ctl
->zsctl_proc_stat_next
);
2793 if (stat
->st_ino
== stat_next
->st_ino
&&
2794 stat
->st_dev
== stat_next
->st_dev
) {
2796 * End of current accounting file is
2797 * reached, so finished. Clear EOF
2798 * bit for next time around.
2800 ea_clear(&ctl
->zsctl_proc_eaf
);
2804 * Accounting file has changed. Move
2805 * to current accounting file.
2807 (void) ea_close(&ctl
->zsctl_proc_eaf
);
2809 ctl
->zsctl_proc_fd
=
2810 ctl
->zsctl_proc_fd_next
;
2811 ctl
->zsctl_proc_eaf
=
2812 ctl
->zsctl_proc_eaf_next
;
2813 ctl
->zsctl_proc_stat
=
2814 ctl
->zsctl_proc_stat_next
;
2816 ctl
->zsctl_proc_fd_next
= -1;
2817 ctl
->zsctl_proc_open_next
= 0;
2822 * Other accounting error. Give up on
2828 /* Skip if not a process group */
2829 if ((object
.eo_catalog
& EXT_TYPE_MASK
) != EXT_GROUP
||
2830 (object
.eo_catalog
& EXD_DATA_MASK
) != EXD_GROUP_PROC
) {
2831 (void) ea_free_item(&object
, EUP_ALLOC
);
2835 /* The process group entry should be complete */
2836 while (numfound
< 9) {
2837 bzero(&pobject
, sizeof (pobject
));
2838 ret
= ea_get_object(&ctl
->zsctl_proc_eaf
,
2841 (void) ea_free_item(&object
, EUP_ALLOC
);
2843 "unable to get process accounting data");
2846 /* Next entries should be process data */
2847 if ((pobject
.eo_catalog
& EXT_TYPE_MASK
) ==
2849 (void) ea_free_item(&object
, EUP_ALLOC
);
2850 (void) ea_free_item(&pobject
, EUP_ALLOC
);
2852 "process data of wrong type");
2855 switch (pobject
.eo_catalog
& EXD_DATA_MASK
) {
2857 pid
= pobject
.eo_item
.ei_uint32
;
2858 proc
= &(ctl
->zsctl_proc_array
[pid
]);
2860 * This process should not be currently in
2861 * the list of processes to process.
2863 assert(!list_link_active(&proc
->zspr_next
));
2866 case EXD_PROC_ANCPID
:
2867 ppid
= pobject
.eo_item
.ei_uint32
;
2868 pproc
= &(ctl
->zsctl_proc_array
[ppid
]);
2871 case EXD_PROC_ZONENAME
:
2872 zone
= zsd_lookup_zone(ctl
,
2873 pobject
.eo_item
.ei_string
, -1);
2876 case EXD_PROC_CPU_USER_SEC
:
2878 pobject
.eo_item
.ei_uint64
;
2881 case EXD_PROC_CPU_USER_NSEC
:
2883 pobject
.eo_item
.ei_uint64
;
2886 case EXD_PROC_CPU_SYS_SEC
:
2888 pobject
.eo_item
.ei_uint64
;
2891 case EXD_PROC_CPU_SYS_NSEC
:
2893 pobject
.eo_item
.ei_uint64
;
2896 case EXD_PROC_FINISH_SEC
:
2898 pobject
.eo_item
.ei_uint64
;
2901 case EXD_PROC_FINISH_NSEC
:
2903 pobject
.eo_item
.ei_uint64
;
2907 (void) ea_free_item(&pobject
, EUP_ALLOC
);
2909 (void) ea_free_item(&object
, EUP_ALLOC
);
2910 if (numfound
!= 9) {
2912 "Malformed process accounting entry found"));
2916 if (finish
.tv_sec
> interval_end
.tv_sec
||
2917 (finish
.tv_sec
== interval_end
.tv_sec
&&
2918 finish
.tv_nsec
> (interval_end
.tv_usec
* 1000)))
2919 hrtime_expired
= B_TRUE
;
2922 * Try to identify the zone and pset to which this
2923 * exited process belongs.
2928 /* Save proc info */
2929 proc
->zspr_ppid
= ppid
;
2930 proc
->zspr_zoneid
= zone
->zsz_id
;
2932 prev_psetid
= ZS_PSET_ERROR
;
2936 * The following tries to deduce the processes pset.
2938 * First choose pset and sched using cached value from the
2939 * most recent time the process has been seen.
2941 * pset and sched can change across zone_enter, so make sure
2942 * most recent sighting of this process was in the same
2943 * zone before using most recent known value.
2945 * If there is no known value, use value of processes
2946 * parent. If parent is unknown, walk parents until a known
2949 * If no parent in the zone is found, use the zone's default
2950 * pset and scheduling class.
2952 if (proc
->zspr_psetid
!= ZS_PSET_ERROR
) {
2953 prev_psetid
= proc
->zspr_psetid
;
2954 pset
= zsd_lookup_pset_byid(ctl
, prev_psetid
);
2955 sched
= proc
->zspr_sched
;
2956 } else if (pproc
->zspr_zoneid
== zone
->zsz_id
&&
2957 pproc
->zspr_psetid
!= ZS_PSET_ERROR
) {
2958 prev_psetid
= pproc
->zspr_psetid
;
2959 pset
= zsd_lookup_pset_byid(ctl
, prev_psetid
);
2960 sched
= pproc
->zspr_sched
;
2965 * Process or processes parent has never been seen.
2966 * Save to deduce a known parent later.
2969 TIMESTRUC_ADD_TIMESTRUC(proc_usage
, user
);
2970 TIMESTRUC_DELTA(delta
, proc_usage
,
2972 proc
->zspr_usage
= delta
;
2973 list_insert_tail(&plist
, proc
);
2977 /* Add the zone's usage to the pset */
2978 usage
= zsd_lookup_insert_usage(ctl
, pset
, zone
);
2982 zsd_mark_pset_usage_found(usage
, sched
);
2984 /* compute the usage to add for the exited proc */
2986 TIMESTRUC_ADD_TIMESTRUC(proc_usage
, user
);
2987 TIMESTRUC_DELTA(delta
, proc_usage
,
2990 zsd_add_usage(ctl
, usage
, &delta
);
2992 zsd_flush_proc_info(proc
);
2994 if (hrtime_expired
== B_TRUE
)
2998 * close next accounting file.
3000 if (ctl
->zsctl_proc_open_next
) {
3002 &ctl
->zsctl_proc_eaf_next
);
3003 ctl
->zsctl_proc_open_next
= 0;
3004 ctl
->zsctl_proc_fd_next
= -1;
3007 /* For the remaining processes, use pset and sched of a known parent */
3008 proc
= list_head(&plist
);
3009 while (proc
!= NULL
) {
3012 if (next
->zspr_ppid
== 0 || next
->zspr_ppid
== -1) {
3014 * Kernel process, or parent is unknown, skip
3015 * process, remove from process list.
3018 proc
= list_next(&plist
, proc
);
3019 list_link_init(&tmp
->zspr_next
);
3022 pproc
= &(ctl
->zsctl_proc_array
[next
->zspr_ppid
]);
3023 if (pproc
->zspr_zoneid
!= proc
->zspr_zoneid
) {
3025 * Parent in different zone. Save process and
3026 * use zone's default pset and sched below
3029 proc
= list_next(&plist
, proc
);
3030 list_remove(&plist
, tmp
);
3031 list_insert_tail(&pplist
, tmp
);
3034 /* Parent has unknown pset, Search parent's parent */
3035 if (pproc
->zspr_psetid
== ZS_PSET_ERROR
) {
3039 /* Found parent with known pset. Use its info */
3040 proc
->zspr_psetid
= pproc
->zspr_psetid
;
3041 proc
->zspr_sched
= pproc
->zspr_sched
;
3042 next
->zspr_psetid
= pproc
->zspr_psetid
;
3043 next
->zspr_sched
= pproc
->zspr_sched
;
3044 zone
= zsd_lookup_zone_byid(ctl
,
3048 proc
= list_next(&plist
, proc
);
3049 list_remove(&plist
, tmp
);
3050 list_link_init(&tmp
->zspr_next
);
3053 pset
= zsd_lookup_pset_byid(ctl
,
3057 proc
= list_next(&plist
, proc
);
3058 list_remove(&plist
, tmp
);
3059 list_link_init(&tmp
->zspr_next
);
3062 /* Add the zone's usage to the pset */
3063 usage
= zsd_lookup_insert_usage(ctl
, pset
, zone
);
3064 if (usage
== NULL
) {
3066 proc
= list_next(&plist
, proc
);
3067 list_remove(&plist
, tmp
);
3068 list_link_init(&tmp
->zspr_next
);
3071 zsd_mark_pset_usage_found(usage
, proc
->zspr_sched
);
3072 zsd_add_usage(ctl
, usage
, &proc
->zspr_usage
);
3073 zsd_flush_proc_info(proc
);
3075 proc
= list_next(&plist
, proc
);
3076 list_remove(&plist
, tmp
);
3077 list_link_init(&tmp
->zspr_next
);
3082 * Process has never been seen. Using zone info to
3083 * determine pset and scheduling class.
3085 proc
= list_head(&pplist
);
3086 while (proc
!= NULL
) {
3088 zone
= zsd_lookup_zone_byid(ctl
, proc
->zspr_zoneid
);
3091 if (zone
->zsz_psetid
!= ZS_PSET_ERROR
&&
3092 zone
->zsz_psetid
!= ZS_PSET_MULTI
) {
3093 prev_psetid
= zone
->zsz_psetid
;
3094 pset
= zsd_lookup_pset_byid(ctl
, prev_psetid
);
3096 pset
= zsd_lookup_pset(ctl
, zone
->zsz_pset
, -1);
3098 prev_psetid
= pset
->zsp_id
;
3103 sched
= zone
->zsz_scheds
;
3105 * Ignore FX high scheduling class if it is not the
3106 * only scheduling class in the zone.
3108 if (sched
!= ZS_SCHED_FX_60
)
3109 sched
&= (~ZS_SCHED_FX_60
);
3111 * If more than one scheduling class has been found
3112 * in the zone, use zone's default scheduling class for
3115 if ((sched
& (sched
- 1)) != 0)
3116 sched
= zone
->zsz_default_sched
;
3118 /* Add the zone's usage to the pset */
3119 usage
= zsd_lookup_insert_usage(ctl
, pset
, zone
);
3123 zsd_mark_pset_usage_found(usage
, sched
);
3124 zsd_add_usage(ctl
, usage
, &proc
->zspr_usage
);
3127 proc
= list_next(&pplist
, proc
);
3128 zsd_flush_proc_info(tmp
);
3129 list_link_init(&tmp
->zspr_next
);
3134 * Close the next accounting file if we have not transitioned to it
3137 if (ctl
->zsctl_proc_open_next
) {
3138 (void) ea_close(&ctl
->zsctl_proc_eaf_next
);
3139 ctl
->zsctl_proc_open_next
= 0;
3140 ctl
->zsctl_proc_fd_next
= -1;
3145 * getvmusage(2) uses size_t's in the passwd data structure, which differ
3146 * in size for 32bit and 64 bit kernels. Since this is a contracted interface,
3147 * and zonestatd does not necessarily match the kernel's bitness, marshal
3148 * results appropriately.
3151 zsd_getvmusage(zsd_ctl_t
*ctl
, uint_t flags
, time_t age
, zsd_vmusage64_t
*buf
,
3154 zsd_vmusage32_t
*vmu32
;
3155 zsd_vmusage64_t
*vmu64
;
3160 if (ctl
->zsctl_kern_bits
== 32) {
3162 ret
= syscall(SYS_rusagesys
, _RUSAGESYS_GETVMUSAGE
,
3163 flags
, age
, (uintptr_t)buf
, (uintptr_t)&nres32
);
3165 if (ret
== 0 && buf
!= NULL
) {
3167 * An array of vmusage32_t's has been returned.
3168 * Convert it to an array of vmusage64_t's.
3170 vmu32
= (zsd_vmusage32_t
*)buf
;
3171 vmu64
= (zsd_vmusage64_t
*)buf
;
3172 for (i
= nres32
- 1; i
>= 0; i
--) {
3174 vmu64
[i
].vmu_zoneid
= vmu32
[i
].vmu_zoneid
;
3175 vmu64
[i
].vmu_type
= vmu32
[i
].vmu_type
;
3176 vmu64
[i
].vmu_type
= vmu32
[i
].vmu_type
;
3177 vmu64
[i
].vmu_rss_all
= vmu32
[i
].vmu_rss_all
;
3178 vmu64
[i
].vmu_rss_private
=
3179 vmu32
[i
].vmu_rss_private
;
3180 vmu64
[i
].vmu_rss_shared
=
3181 vmu32
[i
].vmu_rss_shared
;
3182 vmu64
[i
].vmu_swap_all
= vmu32
[i
].vmu_swap_all
;
3183 vmu64
[i
].vmu_swap_private
=
3184 vmu32
[i
].vmu_swap_private
;
3185 vmu64
[i
].vmu_swap_shared
=
3186 vmu32
[i
].vmu_swap_shared
;
3192 * kernel is 64 bit, so use 64 bit structures as zonestat
3195 return (syscall(SYS_rusagesys
, _RUSAGESYS_GETVMUSAGE
,
3196 flags
, age
, (uintptr_t)buf
, (uintptr_t)nres
));
3202 * Update the current physical, virtual, and locked memory usage of the
3206 zsd_refresh_memory(zsd_ctl_t
*ctl
, boolean_t init
)
3209 uint64_t phys_total
;
3211 uint64_t phys_zones
;
3212 uint64_t phys_zones_overcount
;
3213 uint64_t phys_zones_extra
;
3214 uint64_t phys_zones_credit
;
3219 uint64_t disk_swap_total
;
3220 uint64_t disk_swap_used
; /* disk swap with contents */
3224 uint64_t arc_size
= 0;
3225 struct anoninfo ani
;
3227 int num_swap_devices
;
3228 struct swaptable
*swt
;
3229 struct swapent
*swent
;
3233 zsd_vmusage64_t
*vmusage
;
3234 uint64_t num_vmusage
;
3243 char kstat_name
[KSTAT_STRLEN
];
3250 sys
= ctl
->zsctl_system
;
3252 /* interrogate swap devices to find the amount of disk swap */
3254 num_swap_devices
= swapctl(SC_GETNSWP
, NULL
);
3256 if (num_swap_devices
== 0) {
3257 sys
->zss_swap_total
= disk_swap_total
= 0;
3258 sys
->zss_swap_used
= disk_swap_used
= 0;
3260 goto disk_swap_done
;
3262 /* see if swap table needs to be larger */
3263 if (num_swap_devices
> ctl
->zsctl_swap_cache_num
) {
3264 swt_size
= sizeof (int) +
3265 (num_swap_devices
* sizeof (struct swapent
)) +
3266 (num_swap_devices
* MAXPATHLEN
);
3267 free(ctl
->zsctl_swap_cache
);
3269 swt
= (struct swaptable
*)malloc(swt_size
);
3272 * Could not allocate to get list of swap devices.
3273 * Just use data from the most recent read, which will
3274 * be zero if this is the first read.
3276 zsd_warn(gettext("Unable to allocate to determine "
3278 disk_swap_total
= sys
->zss_swap_total
;
3279 disk_swap_used
= sys
->zss_swap_used
;
3280 goto disk_swap_done
;
3282 swent
= swt
->swt_ent
;
3283 path
= (char *)swt
+ (sizeof (int) +
3284 num_swap_devices
* sizeof (swapent_t
));
3285 for (i
= 0; i
< num_swap_devices
; i
++, swent
++) {
3286 swent
->ste_path
= path
;
3289 swt
->swt_n
= num_swap_devices
;
3290 ctl
->zsctl_swap_cache
= swt
;
3291 ctl
->zsctl_swap_cache_size
= swt_size
;
3292 ctl
->zsctl_swap_cache_num
= num_swap_devices
;
3294 num_swap_devices
= swapctl(SC_LIST
, ctl
->zsctl_swap_cache
);
3295 if (num_swap_devices
< 0) {
3296 /* More swap devices have arrived */
3297 if (errno
== ENOMEM
)
3298 goto disk_swap_again
;
3300 zsd_warn(gettext("Unable to determine disk swap devices"));
3301 /* Unexpected error. Use existing data */
3302 disk_swap_total
= sys
->zss_swap_total
;
3303 disk_swap_used
= sys
->zss_swap_used
;
3304 goto disk_swap_done
;
3307 /* add up the disk swap */
3308 disk_swap_total
= 0;
3310 swent
= ctl
->zsctl_swap_cache
->swt_ent
;
3311 for (i
= 0; i
< num_swap_devices
; i
++, swent
++) {
3312 disk_swap_total
+= swent
->ste_pages
;
3313 disk_swap_used
+= (swent
->ste_pages
- swent
->ste_free
);
3315 disk_swap_total
*= ctl
->zsctl_pagesize
;
3316 disk_swap_used
*= ctl
->zsctl_pagesize
;
3318 sys
->zss_swap_total
= disk_swap_total
;
3319 sys
->zss_swap_used
= disk_swap_used
;
3323 /* get system pages kstat */
3325 kstat
= kstat_lookup(ctl
->zsctl_kstat_ctl
, "unix", 0, "system_pages");
3327 zsd_warn(gettext("Unable to lookup system pages kstat"));
3329 kid
= kstat_read(ctl
->zsctl_kstat_ctl
, kstat
, NULL
);
3332 zsd_warn(gettext("Unable to read system pages kstat"));
3335 knp
= kstat_data_lookup(kstat
, "physmem");
3337 zsd_warn(gettext("Unable to read physmem"));
3339 if (knp
->data_type
== KSTAT_DATA_UINT64
)
3340 physmem
= knp
->value
.ui64
;
3341 else if (knp
->data_type
== KSTAT_DATA_UINT32
)
3342 physmem
= knp
->value
.ui32
;
3346 knp
= kstat_data_lookup(kstat
, "pp_kernel");
3348 zsd_warn(gettext("Unable to read pp_kernel"));
3350 if (knp
->data_type
== KSTAT_DATA_UINT64
)
3351 pp_kernel
= knp
->value
.ui64
;
3352 else if (knp
->data_type
== KSTAT_DATA_UINT32
)
3353 pp_kernel
= knp
->value
.ui32
;
3358 physmem
*= ctl
->zsctl_pagesize
;
3359 pp_kernel
*= ctl
->zsctl_pagesize
;
3361 /* get the zfs arc size if available */
3364 kstat
= kstat_lookup(ctl
->zsctl_kstat_ctl
, "zfs", 0, "arcstats");
3366 kid
= kstat_read(ctl
->zsctl_kstat_ctl
, kstat
, NULL
);
3368 knp
= kstat_data_lookup(kstat
, "size");
3370 if (knp
->data_type
== KSTAT_DATA_UINT64
)
3371 arc_size
= knp
->value
.ui64
;
3374 /* Try to get swap information */
3375 if (swapctl(SC_AINFO
, &ani
) < 0) {
3376 zsd_warn(gettext("Unable to get swap info"));
3381 /* getvmusage to get physical memory usage */
3382 vmusage
= ctl
->zsctl_vmusage_cache
;
3383 num_vmusage
= ctl
->zsctl_vmusage_cache_num
;
3385 ret
= zsd_getvmusage(ctl
, VMUSAGE_SYSTEM
| VMUSAGE_ALL_ZONES
, 0,
3386 vmusage
, &num_vmusage
);
3389 /* Unexpected error. Use existing data */
3390 if (errno
!= EOVERFLOW
) {
3392 "Unable to read physical memory usage"));
3393 phys_zones
= sys
->zss_ram_zones
;
3397 /* vmusage results cache too small */
3398 if (num_vmusage
> ctl
->zsctl_vmusage_cache_num
) {
3400 size_t size
= sizeof (zsd_vmusage64_t
) * num_vmusage
;
3402 free(ctl
->zsctl_vmusage_cache
);
3403 vmusage
= (zsd_vmusage64_t
*)malloc(size
);
3404 if (vmusage
== NULL
) {
3405 zsd_warn(gettext("Unable to alloc to determine "
3406 "physical memory usage"));
3407 phys_zones
= sys
->zss_ram_zones
;
3410 ctl
->zsctl_vmusage_cache
= vmusage
;
3411 ctl
->zsctl_vmusage_cache_num
= num_vmusage
;
3415 phys_zones_overcount
= 0;
3417 for (i
= 0; i
< num_vmusage
; i
++) {
3418 switch (vmusage
[i
].vmu_type
) {
3419 case VMUSAGE_SYSTEM
:
3420 /* total pages backing user process mappings */
3421 phys_zones
= sys
->zss_ram_zones
=
3422 vmusage
[i
].vmu_rss_all
;
3426 phys_zones_overcount
+= vmusage
[i
].vmu_rss_all
;
3427 zone
= zsd_lookup_zone_byid(ctl
, vmusage
[i
].vmu_id
);
3429 zone
->zsz_usage_ram
= vmusage
[i
].vmu_rss_all
;
3436 * Figure how much memory was double counted due to text sharing
3437 * between zones. Credit this back so that the sum of the zones
3438 * equals the total zone ram usage;
3440 phys_zones_extra
= phys_zones_overcount
- phys_zones
;
3441 phys_zones_credit
= phys_zones_extra
/ vmu_nzones
;
3445 /* walk the zones to get swap and locked kstats. Fetch ram cap. */
3446 sys
->zss_locked_zones
= 0;
3447 sys
->zss_vm_zones
= 0;
3448 for (zone
= list_head(&ctl
->zsctl_zones
); zone
!= NULL
;
3449 zone
= list_next(&ctl
->zsctl_zones
, zone
)) {
3451 /* If zone halted during interval, show memory usage as none */
3452 if (zone
->zsz_active
== B_FALSE
||
3453 zone
->zsz_deleted
== B_TRUE
) {
3454 zone
->zsz_usage_ram
= 0;
3455 zone
->zsz_usage_vm
= 0;
3456 zone
->zsz_usage_locked
= 0;
3460 if (phys_zones_credit
> 0) {
3461 if (zone
->zsz_usage_ram
> phys_zones_credit
) {
3462 zone
->zsz_usage_ram
-= phys_zones_credit
;
3466 * Get zone's swap usage. Since zone could have halted,
3467 * treats as zero if cannot read
3469 zone
->zsz_usage_vm
= 0;
3470 (void) snprintf(kstat_name
, sizeof (kstat_name
),
3471 "swapresv_zone_%d", zone
->zsz_id
);
3473 kstat
= kstat_lookup(ctl
->zsctl_kstat_ctl
, "caps",
3474 zone
->zsz_id
, kstat_name
);
3476 kid
= kstat_read(ctl
->zsctl_kstat_ctl
, kstat
, NULL
);
3478 knp
= kstat_data_lookup(kstat
, "usage");
3480 knp
->data_type
== KSTAT_DATA_UINT64
) {
3481 zone
->zsz_usage_vm
= knp
->value
.ui64
;
3482 sys
->zss_vm_zones
+= knp
->value
.ui64
;
3486 * Get zone's locked usage. Since zone could have halted,
3487 * treats as zero if cannot read
3489 zone
->zsz_usage_locked
= 0;
3490 (void) snprintf(kstat_name
, sizeof (kstat_name
),
3491 "lockedmem_zone_%d", zone
->zsz_id
);
3493 kstat
= kstat_lookup(ctl
->zsctl_kstat_ctl
, "caps",
3494 zone
->zsz_id
, kstat_name
);
3496 kid
= kstat_read(ctl
->zsctl_kstat_ctl
, kstat
, NULL
);
3498 knp
= kstat_data_lookup(kstat
, "usage");
3500 knp
->data_type
== KSTAT_DATA_UINT64
) {
3501 zone
->zsz_usage_locked
= knp
->value
.ui64
;
3503 * Since locked memory accounting for zones
3504 * can double count ddi locked memory, cap each
3505 * zone's locked usage at its ram usage.
3507 if (zone
->zsz_usage_locked
>
3508 zone
->zsz_usage_ram
)
3509 zone
->zsz_usage_locked
=
3510 zone
->zsz_usage_ram
;
3511 sys
->zss_locked_zones
+=
3512 zone
->zsz_usage_locked
;
3518 sysconf(_SC_PHYS_PAGES
) * ctl
->zsctl_pagesize
;
3520 phys_used
= (sysconf(_SC_PHYS_PAGES
) - sysconf(_SC_AVPHYS_PAGES
))
3521 * ctl
->zsctl_pagesize
;
3523 /* Compute remaining statistics */
3524 sys
->zss_ram_total
= phys_total
;
3525 sys
->zss_ram_zones
= phys_zones
;
3526 sys
->zss_ram_kern
= phys_used
- phys_zones
- arc_size
;
3529 * The total for kernel locked memory should include
3530 * segkp locked pages, but oh well. The arc size is subtracted,
3531 * as that physical memory is reclaimable.
3533 sys
->zss_locked_kern
= pp_kernel
- arc_size
;
3534 /* Add memory used by kernel startup and obp to kernel locked */
3535 if ((phys_total
- physmem
) > 0)
3536 sys
->zss_locked_kern
+= phys_total
- physmem
;
3539 * Add in the portion of (RAM+DISK) that is not available as swap,
3540 * and consider it swap used by the kernel.
3542 sys
->zss_vm_total
= phys_total
+ disk_swap_total
;
3543 vm_free
= (ani
.ani_max
- ani
.ani_resv
) * ctl
->zsctl_pagesize
;
3544 vm_used
= sys
->zss_vm_total
- vm_free
;
3545 sys
->zss_vm_kern
= vm_used
- sys
->zss_vm_zones
- arc_size
;
3549 * Charge each cpu's usage to its processor sets. Also add the cpu's total
3550 * time to each zone using the processor set. This tracks the maximum
3551 * amount of cpu time that a zone could have used.
3554 zsd_refresh_cpu_stats(zsd_ctl_t
*ctl
, boolean_t init
)
3558 zsd_pset_usage_t
*usage
;
3560 zsd_cpu_t
*cpu_next
;
3566 /* Update the per-cpu kstat data */
3567 cpu_next
= list_head(&ctl
->zsctl_cpus
);
3568 while (cpu_next
!= NULL
) {
3570 cpu_next
= list_next(&ctl
->zsctl_cpus
, cpu
);
3571 zsd_update_cpu_stats(ctl
, cpu
);
3573 /* Update the elapsed real time */
3574 hrtime
= gethrtime();
3576 /* first time around, store hrtime for future comparision */
3577 ctl
->zsctl_hrtime
= hrtime
;
3578 ctl
->zsctl_hrtime_prev
= hrtime
;
3581 /* Compute increase in hrtime since the most recent read */
3582 ctl
->zsctl_hrtime_prev
= ctl
->zsctl_hrtime
;
3583 ctl
->zsctl_hrtime
= hrtime
;
3584 if ((hrtime
= hrtime
- ctl
->zsctl_hrtime_prev
) > 0)
3585 TIMESTRUC_ADD_NANOSEC(ctl
->zsctl_hrtime_total
, hrtime
);
3588 /* On initialization, all psets have zero time */
3592 for (pset
= list_head(&ctl
->zsctl_psets
); pset
!= NULL
;
3593 pset
= list_next(&ctl
->zsctl_psets
, pset
)) {
3595 if (pset
->zsp_active
== B_FALSE
) {
3596 zsd_warn(gettext("Internal error,inactive pset found"));
3600 /* sum total used time for pset */
3603 TIMESTRUC_ADD_TIMESTRUC(ts
, pset
->zsp_intr
);
3604 TIMESTRUC_ADD_TIMESTRUC(ts
, pset
->zsp_kern
);
3605 TIMESTRUC_ADD_TIMESTRUC(ts
, pset
->zsp_user
);
3606 /* kernel time in pset is total time minus zone time */
3607 TIMESTRUC_DELTA(pset
->zsp_usage_kern
, ts
,
3608 pset
->zsp_usage_zones
);
3609 if (pset
->zsp_usage_kern
.tv_sec
< 0 ||
3610 pset
->zsp_usage_kern
.tv_nsec
< 0) {
3611 pset
->zsp_usage_kern
.tv_sec
= 0;
3612 pset
->zsp_usage_kern
.tv_nsec
= 0;
3614 /* Total pset elapsed time is used time plus idle time */
3615 TIMESTRUC_ADD_TIMESTRUC(ts
, pset
->zsp_idle
);
3617 TIMESTRUC_DELTA(delta
, ts
, pset
->zsp_total_time
);
3619 for (usage
= list_head(&pset
->zsp_usage_list
); usage
!= NULL
;
3620 usage
= list_next(&pset
->zsp_usage_list
, usage
)) {
3622 zone
= usage
->zsu_zone
;
3623 if (usage
->zsu_cpu_shares
!= ZS_LIMIT_NONE
&&
3624 usage
->zsu_cpu_shares
!= ZS_SHARES_UNLIMITED
&&
3625 usage
->zsu_cpu_shares
!= 0) {
3627 * Figure out how many nanoseconds of share time
3628 * to give to the zone
3630 hrtime
= delta
.tv_sec
;
3632 hrtime
+= delta
.tv_nsec
;
3633 hrtime
*= usage
->zsu_cpu_shares
;
3634 hrtime
/= pset
->zsp_cpu_shares
;
3635 TIMESTRUC_ADD_NANOSEC(zone
->zsz_share_time
,
3638 /* Add pset time to each zone using pset */
3639 TIMESTRUC_ADD_TIMESTRUC(zone
->zsz_pset_time
, delta
);
3641 zone
->zsz_cpus_online
+= pset
->zsp_online
;
3643 pset
->zsp_total_time
= ts
;
3646 for (zone
= list_head(&ctl
->zsctl_zones
); zone
!= NULL
;
3647 zone
= list_next(&ctl
->zsctl_zones
, zone
)) {
3649 /* update cpu cap tracking if the zone has a cpu cap */
3650 if (zone
->zsz_cpu_cap
!= ZS_LIMIT_NONE
) {
3653 elapsed
= ctl
->zsctl_hrtime
- ctl
->zsctl_hrtime_prev
;
3654 elapsed
*= zone
->zsz_cpu_cap
;
3655 elapsed
= elapsed
/ 100;
3656 TIMESTRUC_ADD_NANOSEC(zone
->zsz_cap_time
, elapsed
);
3659 sys
= ctl
->zsctl_system
;
3662 TIMESTRUC_ADD_TIMESTRUC(ts
, sys
->zss_intr
);
3663 TIMESTRUC_ADD_TIMESTRUC(ts
, sys
->zss_kern
);
3664 TIMESTRUC_ADD_TIMESTRUC(ts
, sys
->zss_user
);
3666 /* kernel time in pset is total time minus zone time */
3667 TIMESTRUC_DELTA(sys
->zss_cpu_usage_kern
, ts
,
3668 sys
->zss_cpu_usage_zones
);
3669 if (sys
->zss_cpu_usage_kern
.tv_sec
< 0 ||
3670 sys
->zss_cpu_usage_kern
.tv_nsec
< 0) {
3671 sys
->zss_cpu_usage_kern
.tv_sec
= 0;
3672 sys
->zss_cpu_usage_kern
.tv_nsec
= 0;
3674 /* Total pset elapsed time is used time plus idle time */
3675 TIMESTRUC_ADD_TIMESTRUC(ts
, sys
->zss_idle
);
3676 sys
->zss_cpu_total_time
= ts
;
3680 * Saves current usage data to a cache that is read by libzonestat when
3681 * calling zs_usage_read().
3683 * All pointers in the cached data structure are set to NULL. When
3684 * libzonestat reads the cached data, it will set the pointers relative to
3685 * its address space.
3688 zsd_usage_cache_update(zsd_ctl_t
*ctl
)
3690 zs_usage_cache_t
*cache
;
3691 zs_usage_cache_t
*old
;
3696 zs_zone_t
*zone
= NULL
;
3698 zs_pset_t
*pset
= NULL
;
3700 zs_pset_zone_t
*pusage
;
3701 zsd_pset_usage_t
*dpusage
;
3707 sizeof (zs_usage_cache_t
) +
3708 sizeof (zs_usage_t
) +
3709 sizeof (zs_system_t
) +
3710 sizeof (zs_zone_t
) * ctl
->zsctl_nzones
+
3711 sizeof (zs_pset_t
) * ctl
->zsctl_npsets
+
3712 sizeof (zs_pset_zone_t
) * ctl
->zsctl_npset_usages
;
3714 cache
= (zs_usage_cache_t
*)malloc(size
);
3715 if (cache
== NULL
) {
3716 zsd_warn(gettext("Unable to allocate usage cache\n"));
3720 next
= (char *)cache
;
3721 cache
->zsuc_size
= size
- sizeof (zs_usage_cache_t
);
3722 next
+= sizeof (zs_usage_cache_t
);
3725 usage
= cache
->zsuc_usage
= (zs_usage_t
*)next
;
3726 next
+= sizeof (zs_usage_t
);
3727 usage
->zsu_start
= g_start
;
3728 usage
->zsu_hrstart
= g_hrstart
;
3729 usage
->zsu_time
= g_now
;
3730 usage
->zsu_hrtime
= g_hrnow
;
3731 usage
->zsu_nzones
= ctl
->zsctl_nzones
;
3732 usage
->zsu_npsets
= ctl
->zsctl_npsets
;
3733 usage
->zsu_system
= NULL
;
3736 sys
= (zs_system_t
*)next
;
3737 next
+= sizeof (zs_system_t
);
3738 dsys
= ctl
->zsctl_system
;
3739 sys
->zss_ram_total
= dsys
->zss_ram_total
;
3740 sys
->zss_ram_kern
= dsys
->zss_ram_kern
;
3741 sys
->zss_ram_zones
= dsys
->zss_ram_zones
;
3742 sys
->zss_locked_kern
= dsys
->zss_locked_kern
;
3743 sys
->zss_locked_zones
= dsys
->zss_locked_zones
;
3744 sys
->zss_vm_total
= dsys
->zss_vm_total
;
3745 sys
->zss_vm_kern
= dsys
->zss_vm_kern
;
3746 sys
->zss_vm_zones
= dsys
->zss_vm_zones
;
3747 sys
->zss_swap_total
= dsys
->zss_swap_total
;
3748 sys
->zss_swap_used
= dsys
->zss_swap_used
;
3749 sys
->zss_ncpus
= dsys
->zss_ncpus
;
3750 sys
->zss_ncpus_online
= dsys
->zss_ncpus_online
;
3752 sys
->zss_processes_max
= dsys
->zss_maxpid
;
3753 sys
->zss_lwps_max
= dsys
->zss_lwps_max
;
3754 sys
->zss_shm_max
= dsys
->zss_shm_max
;
3755 sys
->zss_shmids_max
= dsys
->zss_shmids_max
;
3756 sys
->zss_semids_max
= dsys
->zss_semids_max
;
3757 sys
->zss_msgids_max
= dsys
->zss_msgids_max
;
3758 sys
->zss_lofi_max
= dsys
->zss_lofi_max
;
3760 sys
->zss_processes
= dsys
->zss_processes
;
3761 sys
->zss_lwps
= dsys
->zss_lwps
;
3762 sys
->zss_shm
= dsys
->zss_shm
;
3763 sys
->zss_shmids
= dsys
->zss_shmids
;
3764 sys
->zss_semids
= dsys
->zss_semids
;
3765 sys
->zss_msgids
= dsys
->zss_msgids
;
3766 sys
->zss_lofi
= dsys
->zss_lofi
;
3768 sys
->zss_cpu_total_time
= dsys
->zss_cpu_total_time
;
3769 sys
->zss_cpu_usage_zones
= dsys
->zss_cpu_usage_zones
;
3770 sys
->zss_cpu_usage_kern
= dsys
->zss_cpu_usage_kern
;
3772 for (i
= 0, dzone
= list_head(&ctl
->zsctl_zones
);
3773 i
< ctl
->zsctl_nzones
;
3774 i
++, dzone
= list_next(&ctl
->zsctl_zones
, dzone
)) {
3776 zone
= (zs_zone_t
*)next
;
3777 next
+= sizeof (zs_zone_t
);
3778 list_link_init(&zone
->zsz_next
);
3779 zone
->zsz_system
= NULL
;
3781 (void) strlcpy(zone
->zsz_name
, dzone
->zsz_name
,
3782 sizeof (zone
->zsz_name
));
3783 (void) strlcpy(zone
->zsz_pool
, dzone
->zsz_pool
,
3784 sizeof (zone
->zsz_pool
));
3785 (void) strlcpy(zone
->zsz_pset
, dzone
->zsz_pset
,
3786 sizeof (zone
->zsz_pset
));
3787 zone
->zsz_id
= dzone
->zsz_id
;
3788 zone
->zsz_cputype
= dzone
->zsz_cputype
;
3789 zone
->zsz_iptype
= dzone
->zsz_iptype
;
3790 zone
->zsz_start
= dzone
->zsz_start
;
3791 zone
->zsz_hrstart
= dzone
->zsz_hrstart
;
3792 zone
->zsz_scheds
= dzone
->zsz_scheds
;
3793 zone
->zsz_cpu_shares
= dzone
->zsz_cpu_shares
;
3794 zone
->zsz_cpu_cap
= dzone
->zsz_cpu_cap
;
3795 zone
->zsz_ram_cap
= dzone
->zsz_ram_cap
;
3796 zone
->zsz_vm_cap
= dzone
->zsz_vm_cap
;
3797 zone
->zsz_locked_cap
= dzone
->zsz_locked_cap
;
3798 zone
->zsz_cpu_usage
= dzone
->zsz_cpu_usage
;
3799 zone
->zsz_cpus_online
= dzone
->zsz_cpus_online
;
3800 zone
->zsz_pset_time
= dzone
->zsz_pset_time
;
3801 zone
->zsz_cap_time
= dzone
->zsz_cap_time
;
3802 zone
->zsz_share_time
= dzone
->zsz_share_time
;
3803 zone
->zsz_usage_ram
= dzone
->zsz_usage_ram
;
3804 zone
->zsz_usage_locked
= dzone
->zsz_usage_locked
;
3805 zone
->zsz_usage_vm
= dzone
->zsz_usage_vm
;
3807 zone
->zsz_processes_cap
= dzone
->zsz_processes_cap
;
3808 zone
->zsz_lwps_cap
= dzone
->zsz_lwps_cap
;
3809 zone
->zsz_shm_cap
= dzone
->zsz_shm_cap
;
3810 zone
->zsz_shmids_cap
= dzone
->zsz_shmids_cap
;
3811 zone
->zsz_semids_cap
= dzone
->zsz_semids_cap
;
3812 zone
->zsz_msgids_cap
= dzone
->zsz_msgids_cap
;
3813 zone
->zsz_lofi_cap
= dzone
->zsz_lofi_cap
;
3815 zone
->zsz_processes
= dzone
->zsz_processes
;
3816 zone
->zsz_lwps
= dzone
->zsz_lwps
;
3817 zone
->zsz_shm
= dzone
->zsz_shm
;
3818 zone
->zsz_shmids
= dzone
->zsz_shmids
;
3819 zone
->zsz_semids
= dzone
->zsz_semids
;
3820 zone
->zsz_msgids
= dzone
->zsz_msgids
;
3821 zone
->zsz_lofi
= dzone
->zsz_lofi
;
3824 for (i
= 0, dpset
= list_head(&ctl
->zsctl_psets
);
3825 i
< ctl
->zsctl_npsets
;
3826 i
++, dpset
= list_next(&ctl
->zsctl_psets
, dpset
)) {
3828 pset
= (zs_pset_t
*)next
;
3829 next
+= sizeof (zs_pset_t
);
3830 list_link_init(&pset
->zsp_next
);
3831 (void) strlcpy(pset
->zsp_name
, dpset
->zsp_name
,
3832 sizeof (pset
->zsp_name
));
3833 pset
->zsp_id
= dpset
->zsp_id
;
3834 pset
->zsp_cputype
= dpset
->zsp_cputype
;
3835 pset
->zsp_start
= dpset
->zsp_start
;
3836 pset
->zsp_hrstart
= dpset
->zsp_hrstart
;
3837 pset
->zsp_online
= dpset
->zsp_online
;
3838 pset
->zsp_size
= dpset
->zsp_size
;
3839 pset
->zsp_min
= dpset
->zsp_min
;
3840 pset
->zsp_max
= dpset
->zsp_max
;
3841 pset
->zsp_importance
= dpset
->zsp_importance
;
3842 pset
->zsp_scheds
= dpset
->zsp_scheds
;
3843 pset
->zsp_cpu_shares
= dpset
->zsp_cpu_shares
;
3844 pset
->zsp_total_time
= dpset
->zsp_total_time
;
3845 pset
->zsp_usage_kern
= dpset
->zsp_usage_kern
;
3846 pset
->zsp_usage_zones
= dpset
->zsp_usage_zones
;
3847 pset
->zsp_nusage
= dpset
->zsp_nusage
;
3848 /* Add pset usages for pset */
3849 for (j
= 0, dpusage
= list_head(&dpset
->zsp_usage_list
);
3850 j
< dpset
->zsp_nusage
;
3851 j
++, dpusage
= list_next(&dpset
->zsp_usage_list
, dpusage
)) {
3853 pusage
= (zs_pset_zone_t
*)next
;
3854 next
+= sizeof (zs_pset_zone_t
);
3855 /* pointers are computed by client */
3856 pusage
->zspz_pset
= NULL
;
3857 pusage
->zspz_zone
= NULL
;
3858 list_link_init(&pusage
->zspz_next
);
3859 pusage
->zspz_zoneid
= dpusage
->zsu_zone
->zsz_id
;
3860 pusage
->zspz_start
= dpusage
->zsu_start
;
3861 pusage
->zspz_hrstart
= dpusage
->zsu_hrstart
;
3862 pusage
->zspz_hrstart
= dpusage
->zsu_hrstart
;
3863 pusage
->zspz_cpu_shares
= dpusage
->zsu_cpu_shares
;
3864 pusage
->zspz_scheds
= dpusage
->zsu_scheds
;
3865 pusage
->zspz_cpu_usage
= dpusage
->zsu_cpu_usage
;
3869 /* Update the current cache pointer */
3870 (void) mutex_lock(&g_usage_cache_lock
);
3871 old
= g_usage_cache
;
3872 cache
->zsuc_ref
= 1;
3873 cache
->zsuc_gen
= g_gen_next
;
3874 usage
->zsu_gen
= g_gen_next
;
3875 usage
->zsu_size
= size
;
3876 g_usage_cache
= cache
;
3879 if (old
->zsuc_ref
== 0)
3883 /* Wake up any clients that are waiting for this calculation */
3884 if (g_usage_cache_kickers
> 0) {
3885 (void) cond_broadcast(&g_usage_cache_wait
);
3887 (void) mutex_unlock(&g_usage_cache_lock
);
3890 static zs_usage_cache_t
*
3891 zsd_usage_cache_hold_locked()
3893 zs_usage_cache_t
*ret
;
3895 ret
= g_usage_cache
;
3901 zsd_usage_cache_rele(zs_usage_cache_t
*cache
)
3903 (void) mutex_lock(&g_usage_cache_lock
);
3905 if (cache
->zsuc_ref
== 0)
3907 (void) mutex_unlock(&g_usage_cache_lock
);
3910 /* Close the handles held by zsd_open() */
3912 zsd_close(zsd_ctl_t
*ctl
)
3916 zsd_pset_usage_t
*usage
;
3920 if (ctl
->zsctl_kstat_ctl
) {
3921 (void) kstat_close(ctl
->zsctl_kstat_ctl
);
3922 ctl
->zsctl_kstat_ctl
= NULL
;
3924 if (ctl
->zsctl_proc_open
) {
3925 (void) ea_close(&ctl
->zsctl_proc_eaf
);
3926 ctl
->zsctl_proc_open
= 0;
3927 ctl
->zsctl_proc_fd
= -1;
3929 if (ctl
->zsctl_pool_conf
) {
3930 if (ctl
->zsctl_pool_status
== POOL_ENABLED
)
3931 (void) pool_conf_close(ctl
->zsctl_pool_conf
);
3932 ctl
->zsctl_pool_status
= POOL_DISABLED
;
3935 while ((zone
= list_head(&ctl
->zsctl_zones
)) != NULL
) {
3936 list_remove(&ctl
->zsctl_zones
, zone
);
3938 ctl
->zsctl_nzones
--;
3941 while ((pset
= list_head(&ctl
->zsctl_psets
)) != NULL
) {
3942 while ((usage
= list_head(&pset
->zsp_usage_list
))
3944 list_remove(&pset
->zsp_usage_list
, usage
);
3945 ctl
->zsctl_npset_usages
--;
3948 list_remove(&ctl
->zsctl_psets
, pset
);
3950 ctl
->zsctl_npsets
--;
3953 /* Release all cpus being tracked */
3954 while (cpu
= list_head(&ctl
->zsctl_cpus
)) {
3955 list_remove(&ctl
->zsctl_cpus
, cpu
);
3957 bzero(cpu
, sizeof (zsd_cpu_t
));
3959 cpu
->zsc_allocated
= B_FALSE
;
3960 cpu
->zsc_psetid
= ZS_PSET_ERROR
;
3961 cpu
->zsc_psetid_prev
= ZS_PSET_ERROR
;
3964 assert(ctl
->zsctl_npset_usages
== 0);
3965 assert(ctl
->zsctl_npsets
== 0);
3966 assert(ctl
->zsctl_nzones
== 0);
3967 (void) zsd_disable_cpu_stats();
3972 * Update the utilization data for all zones and processor sets.
3975 zsd_read(zsd_ctl_t
*ctl
, boolean_t init
, boolean_t do_memory
)
3977 (void) kstat_chain_update(ctl
->zsctl_kstat_ctl
);
3978 (void) gettimeofday(&(ctl
->zsctl_timeofday
), NULL
);
3980 zsd_refresh_system(ctl
);
3983 * Memory calculation is expensive. Only update it on sample
3986 if (do_memory
== B_TRUE
)
3987 zsd_refresh_memory(ctl
, init
);
3988 zsd_refresh_zones(ctl
);
3989 zsd_refresh_psets(ctl
);
3990 zsd_refresh_procs(ctl
, init
);
3991 zsd_refresh_cpu_stats(ctl
, init
);
3994 * Delete objects that no longer exist.
3995 * Pset usages must be deleted first as they point to zone and
3998 zsd_mark_pset_usages_end(ctl
);
3999 zsd_mark_psets_end(ctl
);
4000 zsd_mark_cpus_end(ctl
);
4001 zsd_mark_zones_end(ctl
);
4004 * Save results for clients.
4006 zsd_usage_cache_update(ctl
);
4009 * Roll process accounting file.
4011 (void) zsd_roll_exacct();
4016 * Get the system rctl, which is the upper most limit
4019 zsd_get_system_rctl(char *name
)
4021 rctlblk_t
*rblk
, *rblk_last
;
4023 rblk
= (rctlblk_t
*)alloca(rctlblk_size());
4024 rblk_last
= (rctlblk_t
*)alloca(rctlblk_size());
4026 if (getrctl(name
, NULL
, rblk_last
, RCTL_FIRST
) != 0)
4027 return (ZS_LIMIT_NONE
);
4029 while (getrctl(name
, rblk_last
, rblk
, RCTL_NEXT
) == 0)
4030 (void) bcopy(rblk
, rblk_last
, rctlblk_size());
4032 return (rctlblk_get_value(rblk_last
));
4036 * Open any necessary subsystems for collecting utilization data,
4037 * allocate and initialize data structures, and get initial utilization.
4040 * ENOMEM out of memory
4041 * EINVAL other error
4044 zsd_open(zsd_ctl_t
*ctl
)
4046 zsd_system_t
*system
;
4048 char path
[MAXPATHLEN
];
4050 struct statvfs svfs
;
4056 if (ctl
== NULL
&& (ctl
= (zsd_ctl_t
*)calloc(1,
4057 sizeof (zsd_ctl_t
))) == NULL
) {
4058 zsd_warn(gettext("Out of Memory"));
4062 ctl
->zsctl_proc_fd
= -1;
4065 if (ctl
->zsctl_kstat_ctl
== NULL
&&
4066 (ctl
->zsctl_kstat_ctl
= kstat_open()) == NULL
) {
4068 zsd_warn(gettext("Unable to open kstats"));
4070 if (errno
!= ENOMEM
)
4076 * These are set when the accounting file is opened by
4077 * zsd_update_procs()
4079 ctl
->zsctl_proc_fd
= -1;
4080 ctl
->zsctl_proc_fd_next
= -1;
4081 ctl
->zsctl_proc_open
= 0;
4082 ctl
->zsctl_proc_open_next
= 0;
4085 (void) zsd_enable_cpu_stats();
4087 /* Create structures to track usage */
4088 if (ctl
->zsctl_system
== NULL
&& (ctl
->zsctl_system
= (zsd_system_t
*)
4089 calloc(1, sizeof (zsd_system_t
))) == NULL
) {
4091 zsd_warn(gettext("Out of Memory"));
4095 system
= ctl
->zsctl_system
;
4096 /* get the kernel bitness to know structure layout for getvmusage */
4097 ret
= sysinfo(SI_ARCHITECTURE_64
, path
, sizeof (path
));
4099 ctl
->zsctl_kern_bits
= 32;
4101 ctl
->zsctl_kern_bits
= 64;
4102 ctl
->zsctl_pagesize
= sysconf(_SC_PAGESIZE
);
4104 size
= sysconf(_SC_CPUID_MAX
);
4105 ctl
->zsctl_maxcpuid
= size
;
4106 if (ctl
->zsctl_cpu_array
== NULL
&& (ctl
->zsctl_cpu_array
=
4107 (zsd_cpu_t
*)calloc(size
+ 1, sizeof (zsd_cpu_t
))) == NULL
) {
4108 zsd_warn(gettext("Out of Memory"));
4112 for (i
= 0; i
<= ctl
->zsctl_maxcpuid
; i
++) {
4113 ctl
->zsctl_cpu_array
[i
].zsc_id
= i
;
4114 ctl
->zsctl_cpu_array
[i
].zsc_allocated
= B_FALSE
;
4115 ctl
->zsctl_cpu_array
[i
].zsc_psetid
= ZS_PSET_ERROR
;
4116 ctl
->zsctl_cpu_array
[i
].zsc_psetid_prev
= ZS_PSET_ERROR
;
4118 if (statvfs("/proc", &svfs
) != 0 ||
4119 strcmp("/proc", svfs
.f_fstr
) != 0) {
4120 zsd_warn(gettext("/proc not a procfs filesystem"));
4125 size
= sysconf(_SC_MAXPID
) + 1;
4126 ctl
->zsctl_maxproc
= size
;
4127 if (ctl
->zsctl_proc_array
== NULL
&&
4128 (ctl
->zsctl_proc_array
= (zsd_proc_t
*)calloc(size
,
4129 sizeof (zsd_proc_t
))) == NULL
) {
4130 zsd_warn(gettext("Out of Memory"));
4134 for (i
= 0; i
<= ctl
->zsctl_maxproc
; i
++) {
4135 list_link_init(&(ctl
->zsctl_proc_array
[i
].zspr_next
));
4136 ctl
->zsctl_proc_array
[i
].zspr_psetid
= ZS_PSET_ERROR
;
4137 ctl
->zsctl_proc_array
[i
].zspr_zoneid
= -1;
4138 ctl
->zsctl_proc_array
[i
].zspr_usage
.tv_sec
= 0;
4139 ctl
->zsctl_proc_array
[i
].zspr_usage
.tv_nsec
= 0;
4140 ctl
->zsctl_proc_array
[i
].zspr_ppid
= -1;
4143 list_create(&ctl
->zsctl_zones
, sizeof (zsd_zone_t
),
4144 offsetof(zsd_zone_t
, zsz_next
));
4146 list_create(&ctl
->zsctl_psets
, sizeof (zsd_pset_t
),
4147 offsetof(zsd_pset_t
, zsp_next
));
4149 list_create(&ctl
->zsctl_cpus
, sizeof (zsd_cpu_t
),
4150 offsetof(zsd_cpu_t
, zsc_next
));
4152 pathmax
= pathconf("/proc", _PC_NAME_MAX
);
4154 zsd_warn(gettext("Unable to determine max path of /proc"));
4158 size
= sizeof (struct dirent
) + pathmax
+ 1;
4160 ctl
->zsctl_procfs_dent_size
= size
;
4161 if (ctl
->zsctl_procfs_dent
== NULL
&&
4162 (ctl
->zsctl_procfs_dent
= (struct dirent
*)calloc(1, size
))
4164 zsd_warn(gettext("Out of Memory"));
4169 if (ctl
->zsctl_pool_conf
== NULL
&&
4170 (ctl
->zsctl_pool_conf
= pool_conf_alloc()) == NULL
) {
4171 zsd_warn(gettext("Out of Memory"));
4175 ctl
->zsctl_pool_status
= POOL_DISABLED
;
4176 ctl
->zsctl_pool_changed
= 0;
4178 if (ctl
->zsctl_pool_vals
[0] == NULL
&&
4179 (ctl
->zsctl_pool_vals
[0] = pool_value_alloc()) == NULL
) {
4180 zsd_warn(gettext("Out of Memory"));
4184 if (ctl
->zsctl_pool_vals
[1] == NULL
&&
4185 (ctl
->zsctl_pool_vals
[1] = pool_value_alloc()) == NULL
) {
4186 zsd_warn(gettext("Out of Memory"));
4190 ctl
->zsctl_pool_vals
[2] = NULL
;
4195 system
->zss_maxpid
= size
= sysconf(_SC_MAXPID
);
4196 system
->zss_processes_max
= zsd_get_system_rctl("zone.max-processes");
4197 system
->zss_lwps_max
= zsd_get_system_rctl("zone.max-lwps");
4198 system
->zss_shm_max
= zsd_get_system_rctl("zone.max-shm-memory");
4199 system
->zss_shmids_max
= zsd_get_system_rctl("zone.max-shm-ids");
4200 system
->zss_semids_max
= zsd_get_system_rctl("zone.max-sem-ids");
4201 system
->zss_msgids_max
= zsd_get_system_rctl("zone.max-msg-ids");
4202 system
->zss_lofi_max
= zsd_get_system_rctl("zone.max-lofi");
4206 if (zsd_read(ctl
, B_TRUE
, B_FALSE
) != 0)
4207 zsd_warn(gettext("Reading zone statistics failed"));
4217 /* Copy utilization data to buffer, filtering data if non-global zone. */
4219 zsd_usage_filter(zoneid_t zid
, zs_usage_cache_t
*cache
, zs_usage_t
*usage
,
4223 zs_system_t
*sys
, *csys
;
4224 zs_zone_t
*zone
, *czone
;
4225 zs_pset_t
*pset
, *cpset
;
4226 zs_pset_zone_t
*pz
, *cpz
, *foundpz
;
4227 size_t size
= 0, csize
= 0;
4228 char *start
, *cstart
;
4232 /* Privileged users in the global zone get everything */
4234 cusage
= cache
->zsuc_usage
;
4235 (void) bcopy(cusage
, usage
, cusage
->zsu_size
);
4239 /* Zones just get their own usage */
4240 cusage
= cache
->zsuc_usage
;
4242 start
= (char *)usage
;
4243 cstart
= (char *)cusage
;
4244 size
+= sizeof (zs_usage_t
);
4245 csize
+= sizeof (zs_usage_t
);
4247 usage
->zsu_start
= cusage
->zsu_start
;
4248 usage
->zsu_hrstart
= cusage
->zsu_hrstart
;
4249 usage
->zsu_time
= cusage
->zsu_time
;
4250 usage
->zsu_hrtime
= cusage
->zsu_hrtime
;
4251 usage
->zsu_gen
= cusage
->zsu_gen
;
4252 usage
->zsu_nzones
= 1;
4253 usage
->zsu_npsets
= 0;
4256 sys
= (zs_system_t
*)(start
+ size
);
4258 csys
= (zs_system_t
*)(cstart
+ csize
);
4259 size
+= sizeof (zs_system_t
);
4260 csize
+= sizeof (zs_system_t
);
4262 /* Save system limits but not usage */
4265 sys
->zss_ncpus_online
= 0;
4268 zone
= (zs_zone_t
*)(start
+ size
);
4270 czone
= (zs_zone_t
*)(cstart
+ csize
);
4271 /* Find the matching zone */
4272 for (i
= 0; i
< cusage
->zsu_nzones
; i
++) {
4273 if (czone
->zsz_id
== zid
) {
4275 size
+= sizeof (zs_zone_t
);
4277 csize
+= sizeof (zs_zone_t
);
4279 czone
= (zs_zone_t
*)(cstart
+ csize
);
4281 sys
->zss_ram_kern
+= (sys
->zss_ram_zones
- zone
->zsz_usage_ram
);
4282 sys
->zss_ram_zones
= zone
->zsz_usage_ram
;
4284 sys
->zss_vm_kern
+= (sys
->zss_vm_zones
- zone
->zsz_usage_vm
);
4285 sys
->zss_vm_zones
= zone
->zsz_usage_vm
;
4287 sys
->zss_locked_kern
+= (sys
->zss_locked_zones
-
4288 zone
->zsz_usage_locked
);
4289 sys
->zss_locked_zones
= zone
->zsz_usage_locked
;
4291 TIMESTRUC_DELTA(delta
, sys
->zss_cpu_usage_zones
, zone
->zsz_cpu_usage
);
4292 TIMESTRUC_ADD_TIMESTRUC(sys
->zss_cpu_usage_kern
, delta
);
4293 sys
->zss_cpu_usage_zones
= zone
->zsz_cpu_usage
;
4296 pset
= (zs_pset_t
*)(start
+ size
);
4298 cpset
= (zs_pset_t
*)(cstart
+ csize
);
4299 for (i
= 0; i
< cusage
->zsu_npsets
; i
++) {
4300 csize
+= sizeof (zs_pset_t
);
4302 cpz
= (zs_pset_zone_t
*)(csize
+ cstart
);
4304 for (j
= 0; j
< cpset
->zsp_nusage
; j
++) {
4305 if (cpz
->zspz_zoneid
== zid
)
4308 csize
+= sizeof (zs_pset_zone_t
);
4310 cpz
= (zs_pset_zone_t
*)(csize
+ cstart
);
4312 if (foundpz
!= NULL
) {
4313 size
+= sizeof (zs_pset_t
);
4315 pz
= (zs_pset_zone_t
*)(start
+ size
);
4316 size
+= sizeof (zs_pset_zone_t
);
4321 TIMESTRUC_DELTA(delta
, pset
->zsp_usage_zones
,
4322 pz
->zspz_cpu_usage
);
4323 TIMESTRUC_ADD_TIMESTRUC(pset
->zsp_usage_kern
, delta
);
4324 pset
->zsp_usage_zones
= pz
->zspz_cpu_usage
;
4325 pset
->zsp_nusage
= 1;
4326 usage
->zsu_npsets
++;
4327 sys
->zss_ncpus
+= pset
->zsp_size
;
4328 sys
->zss_ncpus_online
+= pset
->zsp_online
;
4331 cpset
= (zs_pset_t
*)(cstart
+ csize
);
4333 usage
->zsu_size
= size
;
4337 * Respond to new connections from libzonestat.so. Also respond to zoneadmd,
4338 * which reports new zones.
4342 zsd_server(void *cookie
, char *argp
, size_t arg_size
,
4343 door_desc_t
*dp
, uint_t n_desc
)
4348 const priv_set_t
*eset
;
4350 if (argp
== DOOR_UNREF_DATA
) {
4351 (void) door_return(NULL
, 0, NULL
, 0);
4355 if (arg_size
!= sizeof (cmd
) * 2) {
4356 (void) door_return(NULL
, 0, NULL
, 0);
4364 /* If connection, return door to stat server */
4365 if (cmd
== ZSD_CMD_CONNECT
) {
4367 /* Verify client compilation version */
4368 if (args
[1] != ZS_VERSION
) {
4369 args
[1] = ZSD_STATUS_VERSION_MISMATCH
;
4370 (void) door_return(argp
, sizeof (cmd
) * 2, NULL
, 0);
4373 ucred
= alloca(ucred_size());
4374 /* Verify client permission */
4375 if (door_ucred(&ucred
) != 0) {
4376 args
[1] = ZSD_STATUS_INTERNAL_ERROR
;
4377 (void) door_return(argp
, sizeof (cmd
) * 2, NULL
, 0);
4381 eset
= ucred_getprivset(ucred
, PRIV_EFFECTIVE
);
4383 args
[1] = ZSD_STATUS_INTERNAL_ERROR
;
4384 (void) door_return(argp
, sizeof (cmd
) * 2, NULL
, 0);
4387 if (!priv_ismember(eset
, PRIV_PROC_INFO
)) {
4388 args
[1] = ZSD_STATUS_PERMISSION
;
4389 (void) door_return(argp
, sizeof (cmd
) * 2, NULL
, 0);
4393 /* Return stat server door */
4394 args
[1] = ZSD_STATUS_OK
;
4395 door
.d_attributes
= DOOR_DESCRIPTOR
;
4396 door
.d_data
.d_desc
.d_descriptor
= g_stat_door
;
4397 (void) door_return(argp
, sizeof (cmd
) * 2, &door
, 1);
4401 /* Respond to zoneadmd informing zonestatd of a new zone */
4402 if (cmd
== ZSD_CMD_NEW_ZONE
) {
4403 zsd_fattach_zone(args
[1], g_server_door
, B_FALSE
);
4404 (void) door_return(NULL
, 0, NULL
, 0);
4408 args
[1] = ZSD_STATUS_INTERNAL_ERROR
;
4409 (void) door_return(argp
, sizeof (cmd
) * 2, NULL
, 0);
4414 * Respond to libzonestat.so clients with the current utlilzation data.
4418 zsd_stat_server(void *cookie
, char *argp
, size_t arg_size
,
4419 door_desc_t
*dp
, uint_t n_desc
)
4421 uint64_t *args
, cmd
;
4422 zs_usage_cache_t
*cache
;
4429 const priv_set_t
*eset
;
4430 boolean_t is_gz
= B_FALSE
;
4432 /* Tell stat thread there are no more clients */
4433 if (argp
== DOOR_UNREF_DATA
) {
4434 (void) mutex_lock(&g_usage_cache_lock
);
4435 g_hasclient
= B_FALSE
;
4436 (void) cond_signal(&g_usage_cache_kick
);
4437 (void) mutex_unlock(&g_usage_cache_lock
);
4438 (void) door_return(NULL
, 0, NULL
, 0);
4441 if (arg_size
!= sizeof (cmd
) * 2) {
4442 (void) door_return(NULL
, 0, NULL
, 0);
4446 args
= (uint64_t *)argp
;
4448 if (cmd
!= ZSD_CMD_READ
) {
4449 (void) door_return(NULL
, 0, NULL
, 0);
4452 ucred
= alloca(ucred_size());
4453 if (door_ucred(&ucred
) != 0) {
4454 (void) door_return(NULL
, 0, NULL
, 0);
4457 zoneid
= ucred_getzoneid(ucred
);
4459 if (zoneid
== GLOBAL_ZONEID
)
4462 eset
= ucred_getprivset(ucred
, PRIV_EFFECTIVE
);
4464 (void) door_return(NULL
, 0, NULL
, 0);
4467 if (!priv_ismember(eset
, PRIV_PROC_INFO
)) {
4468 (void) door_return(NULL
, 0, NULL
, 0);
4471 (void) mutex_lock(&g_usage_cache_lock
);
4472 g_hasclient
= B_TRUE
;
4475 * Force a new cpu calculation for client. This will force a
4476 * new memory calculation if the memory data is older than the
4479 g_usage_cache_kickers
++;
4480 (void) cond_signal(&g_usage_cache_kick
);
4481 ret
= cond_wait(&g_usage_cache_wait
, &g_usage_cache_lock
);
4482 g_usage_cache_kickers
--;
4483 if (ret
!= 0 && errno
== EINTR
) {
4484 (void) mutex_unlock(&g_usage_cache_lock
);
4486 "Interrupted before writing usage size to client\n"));
4487 (void) door_return(NULL
, 0, NULL
, 0);
4490 cache
= zsd_usage_cache_hold_locked();
4491 if (cache
== NULL
) {
4492 zsd_warn(gettext("Usage cache empty.\n"));
4493 (void) door_return(NULL
, 0, NULL
, 0);
4496 (void) mutex_unlock(&g_usage_cache_lock
);
4498 /* Copy current usage data to stack to send to client */
4499 usage
= (zs_usage_t
*)alloca(cache
->zsuc_size
);
4501 /* Filter out results if caller is non-global zone */
4502 zsd_usage_filter(zoneid
, cache
, usage
, is_gz
);
4504 rvalp
= (void *)usage
;
4505 rvals
= usage
->zsu_size
;
4506 zsd_usage_cache_rele(cache
);
4508 (void) door_return(rvalp
, rvals
, NULL
, 0);
4512 static volatile boolean_t g_quit
;
4516 zonestat_quithandler(int sig
)
4522 * The stat thread generates new utilization data when clients request
4523 * it. It also manages opening and closing the subsystems used to gather
4524 * data depending on if clients exist.
4528 stat_thread(void *arg
)
4533 boolean_t do_memory
;
4539 if (g_quit
== B_TRUE
)
4541 zsd_warn(gettext("Unable to fetch current time"));
4546 next_memory
= start
;
4547 while (g_quit
== B_FALSE
) {
4550 * These are used to decide if the most recent memory
4551 * calculation was within a sample interval,
4552 * and weather or not the usage collection needs to
4553 * be opened or closed.
4555 do_memory
= B_FALSE
;
4560 * If all clients have gone, close usage collecting
4562 (void) mutex_lock(&g_usage_cache_lock
);
4563 if (!g_hasclient
&& g_open
== B_TRUE
) {
4565 (void) mutex_unlock(&g_usage_cache_lock
);
4568 if (g_quit
== B_TRUE
) {
4569 (void) mutex_unlock(
4570 &g_usage_cache_lock
);
4574 * Wait for a usage data request
4576 if (g_usage_cache_kickers
== 0) {
4577 (void) cond_wait(&g_usage_cache_kick
,
4578 &g_usage_cache_lock
);
4582 if (g_quit
== B_TRUE
) {
4583 (void) mutex_unlock(
4584 &g_usage_cache_lock
);
4588 (void) mutex_unlock(&g_usage_cache_lock
);
4590 "Unable to fetch current time"));
4595 if (now
>= next_memory
) {
4597 next_memory
= now
+ g_interval
;
4602 (void) mutex_unlock(&g_usage_cache_lock
);
4603 if (do_read
|| do_close
)
4607 g_hrnow
= gethrtime();
4608 if (g_hasclient
&& g_open
== B_FALSE
) {
4610 g_hrstart
= g_hrnow
;
4611 g_ctl
= zsd_open(g_ctl
);
4614 "Unable to open zone statistics"));
4618 if (do_read
&& g_ctl
) {
4619 if (zsd_read(g_ctl
, B_FALSE
, do_memory
) != 0) {
4621 "Unable to read zone statistics"));
4626 (void) mutex_lock(&g_usage_cache_lock
);
4627 if (!g_hasclient
&& g_open
== B_TRUE
&& g_ctl
) {
4628 (void) mutex_unlock(&g_usage_cache_lock
);
4632 (void) mutex_unlock(&g_usage_cache_lock
);
4639 (void) thr_kill(g_main
, SIGINT
);
4650 (void) strlcpy(pcinfo
.pc_clname
, "FX", sizeof (pcinfo
.pc_clname
));
4651 if (priocntl(0, 0, PC_GETCID
, (caddr_t
)&pcinfo
) == -1) {
4652 zsd_warn(gettext("cannot get FX class parameters"));
4655 pcparms
.pc_cid
= pcinfo
.pc_cid
;
4656 ((fxparms_t
*)pcparms
.pc_clparms
)->fx_upri
= 60;
4657 ((fxparms_t
*)pcparms
.pc_clparms
)->fx_uprilim
= 60;
4658 ((fxparms_t
*)pcparms
.pc_clparms
)->fx_tqsecs
= 0;
4659 ((fxparms_t
*)pcparms
.pc_clparms
)->fx_tqnsecs
= FX_NOCHANGE
;
4660 if (priocntl(P_PID
, getpid(), PC_SETPARMS
, (caddr_t
)&pcparms
) == -1)
4661 zsd_warn(gettext("cannot enter the FX class"));
4667 daemonize_ready(char status
)
4670 * wake the parent with a clue
4672 (void) write(pipe_fd
, &status
, 1);
4673 (void) close(pipe_fd
);
4677 daemonize_start(void)
4688 if (pipe(filedes
) < 0)
4691 (void) fflush(NULL
);
4693 if ((pid
= fork1()) < 0)
4700 struct sigaction act
;
4702 act
.sa_sigaction
= SIG_DFL
;
4703 (void) sigemptyset(&act
.sa_mask
);
4706 (void) sigaction(SIGPIPE
, &act
, NULL
); /* ignore SIGPIPE */
4708 (void) close(filedes
[1]);
4709 if (read(filedes
[0], &data
, 1) == 1) {
4710 /* forward ready code via exit status */
4714 (void) wait4(pid
, &status
, 0, NULL
);
4715 /* daemon process exited before becoming ready */
4716 if (WIFEXITED(status
)) {
4717 /* assume daemon process printed useful message */
4718 exit(WEXITSTATUS(status
));
4720 zsd_warn(gettext("daemon process killed or died"));
4728 pipe_fd
= filedes
[1];
4729 (void) close(filedes
[0]);
4732 * generic Unix setup
4741 fattach_all_zones(boolean_t detach_only
)
4744 uint_t nzids
, nzids_last
;
4748 (void) zone_list(NULL
, &nzids
);
4750 zids
= (zoneid_t
*)malloc(sizeof (zoneid_t
) * nzids_last
);
4752 zsd_error(gettext("Out of memory"));
4754 (void) zone_list(zids
, &nzids
);
4755 if (nzids
> nzids_last
) {
4759 for (i
= 0; i
< nzids
; i
++)
4760 zsd_fattach_zone(zids
[i
], g_server_door
, detach_only
);
4766 main(int argc
, char *argv
[])
4771 scf_simple_prop_t
*prop
;
4772 uint64_t *intervalp
;
4773 boolean_t opt_cleanup
= B_FALSE
;
4775 g_main
= thr_self();
4777 (void) signal(SIGINT
, zonestat_quithandler
);
4778 (void) signal(SIGTERM
, zonestat_quithandler
);
4779 (void) signal(SIGHUP
, zonestat_quithandler
);
4780 /* (void) sigignore(SIGCHLD); */
4781 (void) sigignore(SIGPIPE
);
4783 if (getzoneid() != GLOBAL_ZONEID
)
4784 zsd_error(gettext("Must be run from global zone only"));
4786 while ((arg
= getopt(argc
, argv
, "c"))
4790 opt_cleanup
= B_TRUE
;
4793 zsd_error(gettext("Invalid option"));
4798 if (zsd_disable_cpu_stats() != 0)
4804 /* Get the configured sample interval */
4805 prop
= scf_simple_prop_get(NULL
, "svc:/system/zones-monitoring:default",
4806 "config", "sample_interval");
4808 zsd_error(gettext("Unable to fetch SMF property "
4809 "\"config/sample_interval\""));
4811 if (scf_simple_prop_type(prop
) != SCF_TYPE_COUNT
)
4812 zsd_error(gettext("Malformed SMF property "
4813 "\"config/sample_interval\". Must be of type \"count\""));
4815 intervalp
= scf_simple_prop_next_count(prop
);
4816 g_interval
= *intervalp
;
4817 if (g_interval
== 0)
4818 zsd_error(gettext("Malformed SMF property "
4819 "\"config/sample_interval\". Must be greater than zero"));
4821 scf_simple_prop_free(prop
);
4823 if (daemonize_start() < 0)
4824 zsd_error(gettext("Unable to start daemon\n"));
4826 /* Run at high priority */
4829 (void) mutex_init(&g_usage_cache_lock
, USYNC_THREAD
, NULL
);
4830 (void) cond_init(&g_usage_cache_kick
, USYNC_THREAD
, NULL
);
4831 (void) cond_init(&g_usage_cache_wait
, USYNC_THREAD
, NULL
);
4833 g_server_door
= door_create(zsd_server
, NULL
,
4834 DOOR_REFUSE_DESC
| DOOR_NO_CANCEL
);
4835 if (g_server_door
< 0)
4836 zsd_error(gettext("Unable to create server door\n"));
4839 g_stat_door
= door_create(zsd_stat_server
, NULL
, DOOR_UNREF_MULTI
|
4840 DOOR_REFUSE_DESC
| DOOR_NO_CANCEL
);
4841 if (g_stat_door
< 0)
4842 zsd_error(gettext("Unable to create statistics door\n"));
4844 fattach_all_zones(B_FALSE
);
4846 if (thr_create(NULL
, 0, stat_thread
, NULL
, 0, &tid
) != 0)
4847 zsd_error(gettext("Unable to create statistics thread\n"));
4851 /* Wait for signal to quit */
4852 while (g_quit
== B_FALSE
)
4856 fattach_all_zones(B_TRUE
);
4858 (void) door_revoke(g_server_door
);
4859 (void) door_revoke(g_stat_door
);
4861 /* kick stat thread and wait for it to close the statistics */
4862 (void) mutex_lock(&g_usage_cache_lock
);
4864 (void) cond_signal(&g_usage_cache_kick
);
4865 (void) mutex_unlock(&g_usage_cache_lock
);
4867 (void) thr_join(tid
, NULL
, NULL
);