4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2014, Joyent, Inc. All rights reserved.
27 * Kernel statistics framework
30 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/vmsystm.h>
34 #include <sys/t_lock.h>
35 #include <sys/param.h>
36 #include <sys/errno.h>
38 #include <sys/sysmacros.h>
39 #include <sys/cmn_err.h>
40 #include <sys/kstat.h>
41 #include <sys/sysinfo.h>
42 #include <sys/cpuvar.h>
43 #include <sys/fcntl.h>
44 #include <sys/flock.h>
45 #include <sys/vnode.h>
49 #include <sys/debug.h>
52 #include <sys/pool_pset.h>
53 #include <sys/cpupart.h>
55 #include <sys/loadavg.h>
58 #include <vm/seg_kmem.h>
61 * Global lock to protect the AVL trees and kstat_chain_id.
63 static kmutex_t kstat_chain_lock
;
66 * Every install/delete kstat bumps kstat_chain_id. This is used by:
68 * (1) /dev/kstat, to detect changes in the kstat chain across ioctls;
70 * (2) kstat_create(), to assign a KID (kstat ID) to each new kstat.
71 * /dev/kstat uses the KID as a cookie for kstat lookups.
73 * We reserve the first two IDs because some kstats are created before
74 * the well-known ones (kstat_headers = 0, kstat_types = 1).
76 * We also bump the kstat_chain_id if a zone is gaining or losing visibility
77 * into a particular kstat, which is logically equivalent to a kstat being
81 kid_t kstat_chain_id
= 2;
84 * As far as zones are concerned, there are 3 types of kstat:
86 * 1) Those which have a well-known name, and which should return per-zone data
87 * depending on which zone is doing the kstat_read(). sockfs:0:sock_unix_list
88 * is an example of this type of kstat.
90 * 2) Those which should only be exported to a particular list of zones.
91 * For example, in the case of nfs:*:mntinfo, we don't want zone A to be
92 * able to see NFS mounts associated with zone B, while we want the
93 * global zone to be able to see all mounts on the system.
95 * 3) Those that can be exported to all zones. Most system-related
96 * kstats fall within this category.
98 * An ekstat_t thus contains a list of kstats that the zone is to be
99 * exported to. The lookup of a name:instance:module thus translates to a
100 * lookup of name:instance:module:myzone; if the kstat is not exported
101 * to all zones, and does not have the caller's zoneid explicitly
102 * enumerated in the list of zones to be exported to, it is the same as
103 * if the kstat didn't exist.
105 * Writing to kstats is currently disallowed from within a non-global
106 * zone, although this restriction could be removed in the future.
108 typedef struct kstat_zone
{
110 struct kstat_zone
*next
;
114 * Extended kstat structure -- for internal use only.
116 typedef struct ekstat
{
117 kstat_t e_ks
; /* the kstat itself */
118 size_t e_size
; /* total allocation size */
119 kthread_t
*e_owner
; /* thread holding this kstat */
120 kcondvar_t e_cv
; /* wait for owner == NULL */
121 avl_node_t e_avl_bykid
; /* AVL tree to sort by KID */
122 avl_node_t e_avl_byname
; /* AVL tree to sort by name */
123 kstat_zone_t e_zone
; /* zone to export stats to */
126 static uint64_t kstat_initial
[8192];
127 static void *kstat_initial_ptr
= kstat_initial
;
128 static size_t kstat_initial_avail
= sizeof (kstat_initial
);
129 static vmem_t
*kstat_arena
;
131 #define KSTAT_ALIGN (sizeof (uint64_t))
133 static avl_tree_t kstat_avl_bykid
;
134 static avl_tree_t kstat_avl_byname
;
137 * Various pointers we need to create kstats at boot time in kstat_init()
139 extern kstat_named_t
*segmapcnt_ptr
;
140 extern uint_t segmapcnt_ndata
;
141 extern int segmap_kstat_update(kstat_t
*, int);
142 extern kstat_named_t
*biostats_ptr
;
143 extern uint_t biostats_ndata
;
144 extern kstat_named_t
*pollstats_ptr
;
145 extern uint_t pollstats_ndata
;
149 extern time_t boot_time
;
150 extern sysinfo_t sysinfo
;
151 extern vminfo_t vminfo
;
156 kstat_named_t deficit
;
157 kstat_named_t clk_intr
;
160 kstat_named_t avenrun_1min
;
161 kstat_named_t avenrun_5min
;
162 kstat_named_t avenrun_15min
;
163 kstat_named_t boot_time
;
164 } system_misc_kstat
= {
165 { "ncpus", KSTAT_DATA_UINT32
},
166 { "lbolt", KSTAT_DATA_UINT32
},
167 { "deficit", KSTAT_DATA_UINT32
},
168 { "clk_intr", KSTAT_DATA_UINT32
},
169 { "vac", KSTAT_DATA_UINT32
},
170 { "nproc", KSTAT_DATA_UINT32
},
171 { "avenrun_1min", KSTAT_DATA_UINT32
},
172 { "avenrun_5min", KSTAT_DATA_UINT32
},
173 { "avenrun_15min", KSTAT_DATA_UINT32
},
174 { "boot_time", KSTAT_DATA_UINT32
},
178 kstat_named_t physmem
;
179 kstat_named_t nalloc
;
181 kstat_named_t nalloc_calls
;
182 kstat_named_t nfree_calls
;
183 kstat_named_t kernelbase
;
184 kstat_named_t econtig
;
185 kstat_named_t freemem
;
186 kstat_named_t availrmem
;
187 kstat_named_t lotsfree
;
188 kstat_named_t desfree
;
189 kstat_named_t minfree
;
190 kstat_named_t fastscan
;
191 kstat_named_t slowscan
;
193 kstat_named_t desscan
;
194 kstat_named_t pp_kernel
;
195 kstat_named_t pagesfree
;
196 kstat_named_t pageslocked
;
197 kstat_named_t pagestotal
;
198 } system_pages_kstat
= {
199 { "physmem", KSTAT_DATA_ULONG
},
200 { "nalloc", KSTAT_DATA_ULONG
},
201 { "nfree", KSTAT_DATA_ULONG
},
202 { "nalloc_calls", KSTAT_DATA_ULONG
},
203 { "nfree_calls", KSTAT_DATA_ULONG
},
204 { "kernelbase", KSTAT_DATA_ULONG
},
205 { "econtig", KSTAT_DATA_ULONG
},
206 { "freemem", KSTAT_DATA_ULONG
},
207 { "availrmem", KSTAT_DATA_ULONG
},
208 { "lotsfree", KSTAT_DATA_ULONG
},
209 { "desfree", KSTAT_DATA_ULONG
},
210 { "minfree", KSTAT_DATA_ULONG
},
211 { "fastscan", KSTAT_DATA_ULONG
},
212 { "slowscan", KSTAT_DATA_ULONG
},
213 { "nscan", KSTAT_DATA_ULONG
},
214 { "desscan", KSTAT_DATA_ULONG
},
215 { "pp_kernel", KSTAT_DATA_ULONG
},
216 { "pagesfree", KSTAT_DATA_ULONG
},
217 { "pageslocked", KSTAT_DATA_ULONG
},
218 { "pagestotal", KSTAT_DATA_ULONG
},
221 static int header_kstat_update(kstat_t
*, int);
222 static int header_kstat_snapshot(kstat_t
*, void *, int);
223 static int system_misc_kstat_update(kstat_t
*, int);
224 static int system_pages_kstat_update(kstat_t
*, int);
227 char name
[KSTAT_STRLEN
];
231 } kstat_data_type
[KSTAT_NUM_TYPES
] = {
232 { "raw", 1, 0, INT_MAX
},
233 { "name=value", sizeof (kstat_named_t
), 0, INT_MAX
},
234 { "interrupt", sizeof (kstat_intr_t
), 1, 1 },
235 { "i/o", sizeof (kstat_io_t
), 1, 1 },
236 { "event_timer", sizeof (kstat_timer_t
), 0, INT_MAX
},
240 kstat_zone_find(kstat_t
*k
, zoneid_t zoneid
)
242 ekstat_t
*e
= (ekstat_t
*)k
;
245 ASSERT(MUTEX_HELD(&kstat_chain_lock
));
246 for (kz
= &e
->e_zone
; kz
!= NULL
; kz
= kz
->next
) {
247 if (zoneid
== ALL_ZONES
|| kz
->zoneid
== ALL_ZONES
)
249 if (zoneid
== kz
->zoneid
)
256 kstat_zone_remove(kstat_t
*k
, zoneid_t zoneid
)
258 ekstat_t
*e
= (ekstat_t
*)k
;
259 kstat_zone_t
*kz
, *t
= NULL
;
261 mutex_enter(&kstat_chain_lock
);
262 if (zoneid
== e
->e_zone
.zoneid
) {
265 e
->e_zone
.zoneid
= kz
->zoneid
;
266 e
->e_zone
.next
= kz
->next
;
269 for (kz
= &e
->e_zone
; kz
->next
!= NULL
; kz
= kz
->next
) {
270 if (kz
->next
->zoneid
== zoneid
) {
276 ASSERT(t
!= NULL
); /* we removed something */
280 mutex_exit(&kstat_chain_lock
);
281 kmem_free(kz
, sizeof (*kz
));
285 kstat_zone_add(kstat_t
*k
, zoneid_t zoneid
)
287 ekstat_t
*e
= (ekstat_t
*)k
;
290 kz
= kmem_alloc(sizeof (*kz
), KM_NOSLEEP
);
293 mutex_enter(&kstat_chain_lock
);
295 kz
->next
= e
->e_zone
.next
;
298 mutex_exit(&kstat_chain_lock
);
302 * Compare the list of zones for the given kstats, returning 0 if they match
303 * (ie, one list contains ALL_ZONES or both lists contain the same zoneid).
304 * In practice, this is called indirectly by kstat_hold_byname(), so one of the
305 * two lists always has one element, and this is an O(n) operation rather than
309 kstat_zone_compare(ekstat_t
*e1
, ekstat_t
*e2
)
311 kstat_zone_t
*kz1
, *kz2
;
313 ASSERT(MUTEX_HELD(&kstat_chain_lock
));
314 for (kz1
= &e1
->e_zone
; kz1
!= NULL
; kz1
= kz1
->next
) {
315 for (kz2
= &e2
->e_zone
; kz2
!= NULL
; kz2
= kz2
->next
) {
316 if (kz1
->zoneid
== ALL_ZONES
||
317 kz2
->zoneid
== ALL_ZONES
)
319 if (kz1
->zoneid
== kz2
->zoneid
)
323 return (e1
->e_zone
.zoneid
< e2
->e_zone
.zoneid
? -1 : 1);
327 * Support for keeping kstats sorted in AVL trees for fast lookups.
330 kstat_compare_bykid(const void *a1
, const void *a2
)
332 const kstat_t
*k1
= a1
;
333 const kstat_t
*k2
= a2
;
335 if (k1
->ks_kid
< k2
->ks_kid
)
337 if (k1
->ks_kid
> k2
->ks_kid
)
339 return (kstat_zone_compare((ekstat_t
*)k1
, (ekstat_t
*)k2
));
343 kstat_compare_byname(const void *a1
, const void *a2
)
345 const kstat_t
*k1
= a1
;
346 const kstat_t
*k2
= a2
;
349 s
= strcmp(k1
->ks_module
, k2
->ks_module
);
355 if (k1
->ks_instance
< k2
->ks_instance
)
357 if (k1
->ks_instance
> k2
->ks_instance
)
360 s
= strcmp(k1
->ks_name
, k2
->ks_name
);
366 return (kstat_zone_compare((ekstat_t
*)k1
, (ekstat_t
*)k2
));
370 kstat_hold(avl_tree_t
*t
, ekstat_t
*template)
375 mutex_enter(&kstat_chain_lock
);
377 ksp
= avl_find(t
, template, NULL
);
381 if (e
->e_owner
== NULL
) {
382 e
->e_owner
= curthread
;
385 cv_wait(&e
->e_cv
, &kstat_chain_lock
);
387 mutex_exit(&kstat_chain_lock
);
392 kstat_rele(kstat_t
*ksp
)
394 ekstat_t
*e
= (ekstat_t
*)ksp
;
396 mutex_enter(&kstat_chain_lock
);
397 ASSERT(e
->e_owner
== curthread
);
399 cv_broadcast(&e
->e_cv
);
400 mutex_exit(&kstat_chain_lock
);
404 kstat_hold_bykid(kid_t kid
, zoneid_t zoneid
)
409 e
.e_zone
.zoneid
= zoneid
;
410 e
.e_zone
.next
= NULL
;
412 return (kstat_hold(&kstat_avl_bykid
, &e
));
416 kstat_hold_byname(const char *ks_module
, int ks_instance
, const char *ks_name
,
421 kstat_set_string(e
.e_ks
.ks_module
, ks_module
);
422 e
.e_ks
.ks_instance
= ks_instance
;
423 kstat_set_string(e
.e_ks
.ks_name
, ks_name
);
424 e
.e_zone
.zoneid
= ks_zoneid
;
425 e
.e_zone
.next
= NULL
;
426 return (kstat_hold(&kstat_avl_byname
, &e
));
430 kstat_alloc(size_t size
)
434 size
= P2ROUNDUP(sizeof (ekstat_t
) + size
, KSTAT_ALIGN
);
436 if (kstat_arena
== NULL
) {
437 if (size
<= kstat_initial_avail
) {
438 e
= kstat_initial_ptr
;
439 kstat_initial_ptr
= (char *)kstat_initial_ptr
+ size
;
440 kstat_initial_avail
-= size
;
443 e
= vmem_alloc(kstat_arena
, size
, VM_NOSLEEP
);
449 cv_init(&e
->e_cv
, NULL
, CV_DEFAULT
, NULL
);
456 kstat_free(ekstat_t
*e
)
458 cv_destroy(&e
->e_cv
);
459 vmem_free(kstat_arena
, e
, e
->e_size
);
463 * Create various system kstats.
470 avl_tree_t
*t
= &kstat_avl_bykid
;
473 * Set up the kstat vmem arena.
475 kstat_arena
= vmem_create("kstat",
476 kstat_initial
, sizeof (kstat_initial
), KSTAT_ALIGN
,
477 segkmem_alloc
, segkmem_free
, heap_arena
, 0, VM_SLEEP
);
480 * Make initial kstats appear as though they were allocated.
482 for (e
= avl_first(t
); e
!= NULL
; e
= avl_walk(t
, e
, AVL_AFTER
))
483 (void) vmem_xalloc(kstat_arena
, e
->e_size
, KSTAT_ALIGN
,
484 0, 0, e
, (char *)e
+ e
->e_size
,
485 VM_NOSLEEP
| VM_BESTFIT
| VM_PANIC
);
488 * The mother of all kstats. The first kstat in the system, which
489 * always has KID 0, has the headers for all kstats (including itself)
490 * as its data. Thus, the kstat driver does not need any special
491 * interface to extract the kstat chain.
494 ksp
= kstat_create("unix", 0, "kstat_headers", "kstat", KSTAT_TYPE_RAW
,
495 0, KSTAT_FLAG_VIRTUAL
| KSTAT_FLAG_VAR_SIZE
);
497 ksp
->ks_lock
= &kstat_chain_lock
;
498 ksp
->ks_update
= header_kstat_update
;
499 ksp
->ks_snapshot
= header_kstat_snapshot
;
502 panic("cannot create kstat 'kstat_headers'");
505 ksp
= kstat_create("unix", 0, "kstat_types", "kstat",
506 KSTAT_TYPE_NAMED
, KSTAT_NUM_TYPES
, 0);
509 kstat_named_t
*kn
= KSTAT_NAMED_PTR(ksp
);
511 for (i
= 0; i
< KSTAT_NUM_TYPES
; i
++) {
512 kstat_named_init(&kn
[i
], kstat_data_type
[i
].name
,
519 ksp
= kstat_create("unix", 0, "sysinfo", "misc", KSTAT_TYPE_RAW
,
520 sizeof (sysinfo_t
), KSTAT_FLAG_VIRTUAL
);
522 ksp
->ks_data
= (void *) &sysinfo
;
526 ksp
= kstat_create("unix", 0, "vminfo", "vm", KSTAT_TYPE_RAW
,
527 sizeof (vminfo_t
), KSTAT_FLAG_VIRTUAL
);
529 ksp
->ks_data
= (void *) &vminfo
;
533 ksp
= kstat_create("unix", 0, "segmap", "vm", KSTAT_TYPE_NAMED
,
534 segmapcnt_ndata
, KSTAT_FLAG_VIRTUAL
);
536 ksp
->ks_data
= (void *) segmapcnt_ptr
;
537 ksp
->ks_update
= segmap_kstat_update
;
541 ksp
= kstat_create("unix", 0, "biostats", "misc", KSTAT_TYPE_NAMED
,
542 biostats_ndata
, KSTAT_FLAG_VIRTUAL
);
544 ksp
->ks_data
= (void *) biostats_ptr
;
548 ksp
= kstat_create("unix", 0, "var", "misc", KSTAT_TYPE_RAW
,
549 sizeof (struct var
), KSTAT_FLAG_VIRTUAL
);
551 ksp
->ks_data
= (void *) &v
;
555 ksp
= kstat_create("unix", 0, "system_misc", "misc", KSTAT_TYPE_NAMED
,
556 sizeof (system_misc_kstat
) / sizeof (kstat_named_t
),
559 ksp
->ks_data
= (void *) &system_misc_kstat
;
560 ksp
->ks_update
= system_misc_kstat_update
;
564 ksp
= kstat_create("unix", 0, "system_pages", "pages", KSTAT_TYPE_NAMED
,
565 sizeof (system_pages_kstat
) / sizeof (kstat_named_t
),
568 ksp
->ks_data
= (void *) &system_pages_kstat
;
569 ksp
->ks_update
= system_pages_kstat_update
;
573 ksp
= kstat_create("poll", 0, "pollstats", "misc", KSTAT_TYPE_NAMED
,
574 pollstats_ndata
, KSTAT_FLAG_VIRTUAL
| KSTAT_FLAG_WRITABLE
);
577 ksp
->ks_data
= pollstats_ptr
;
583 * Caller of this should ensure that the string pointed by src
584 * doesn't change while kstat's lock is held. Not doing so defeats
585 * kstat's snapshot strategy as explained in <sys/kstat.h>
588 kstat_named_setstr(kstat_named_t
*knp
, const char *src
)
590 if (knp
->data_type
!= KSTAT_DATA_STRING
)
591 panic("kstat_named_setstr('%p', '%p'): "
592 "named kstat is not of type KSTAT_DATA_STRING",
593 (void *)knp
, (void *)src
);
595 KSTAT_NAMED_STR_PTR(knp
) = (char *)src
;
597 KSTAT_NAMED_STR_BUFLEN(knp
) = strlen(src
) + 1;
599 KSTAT_NAMED_STR_BUFLEN(knp
) = 0;
603 kstat_set_string(char *dst
, const char *src
)
605 bzero(dst
, KSTAT_STRLEN
);
606 (void) strncpy(dst
, src
, KSTAT_STRLEN
- 1);
610 kstat_named_init(kstat_named_t
*knp
, const char *name
, uchar_t data_type
)
612 kstat_set_string(knp
->name
, name
);
613 knp
->data_type
= data_type
;
615 if (data_type
== KSTAT_DATA_STRING
)
616 kstat_named_setstr(knp
, NULL
);
620 kstat_timer_init(kstat_timer_t
*ktp
, const char *name
)
622 kstat_set_string(ktp
->name
, name
);
627 default_kstat_update(kstat_t
*ksp
, int rw
)
634 * Named kstats with variable-length long strings have a standard
635 * way of determining how much space is needed to hold the snapshot:
637 if (ksp
->ks_data
!= NULL
&& ksp
->ks_type
== KSTAT_TYPE_NAMED
&&
638 (ksp
->ks_flags
& KSTAT_FLAG_VAR_SIZE
)) {
641 * Add in the space required for the strings
643 knp
= KSTAT_NAMED_PTR(ksp
);
644 for (i
= 0; i
< ksp
->ks_ndata
; i
++, knp
++) {
645 if (knp
->data_type
== KSTAT_DATA_STRING
)
646 len
+= KSTAT_NAMED_STR_BUFLEN(knp
);
649 ksp
->ks_ndata
* sizeof (kstat_named_t
) + len
;
655 default_kstat_snapshot(kstat_t
*ksp
, void *buf
, int rw
)
661 ksp
->ks_snaptime
= cur_time
= gethrtime();
663 if (rw
== KSTAT_WRITE
) {
664 if (!(ksp
->ks_flags
& KSTAT_FLAG_WRITABLE
))
666 bcopy(buf
, ksp
->ks_data
, ksp
->ks_data_size
);
671 * KSTAT_TYPE_NAMED kstats are defined to have ks_ndata
672 * number of kstat_named_t structures, followed by an optional
673 * string segment. The ks_data generally holds only the
674 * kstat_named_t structures. So we copy it first. The strings,
675 * if any, are copied below. For other kstat types, ks_data holds the
679 namedsz
= sizeof (kstat_named_t
) * ksp
->ks_ndata
;
680 if (ksp
->ks_type
== KSTAT_TYPE_NAMED
&& ksp
->ks_data_size
> namedsz
)
681 bcopy(ksp
->ks_data
, buf
, namedsz
);
683 bcopy(ksp
->ks_data
, buf
, ksp
->ks_data_size
);
686 * Apply kstat type-specific data massaging
688 switch (ksp
->ks_type
) {
692 * Normalize time units and deal with incomplete transactions
694 kiop
= (kstat_io_t
*)buf
;
696 scalehrtime(&kiop
->wtime
);
697 scalehrtime(&kiop
->wlentime
);
698 scalehrtime(&kiop
->wlastupdate
);
699 scalehrtime(&kiop
->rtime
);
700 scalehrtime(&kiop
->rlentime
);
701 scalehrtime(&kiop
->rlastupdate
);
703 if (kiop
->wcnt
!= 0) {
704 /* like kstat_waitq_exit */
705 hrtime_t wfix
= cur_time
- kiop
->wlastupdate
;
706 kiop
->wlastupdate
= cur_time
;
707 kiop
->wlentime
+= kiop
->wcnt
* wfix
;
711 if (kiop
->rcnt
!= 0) {
712 /* like kstat_runq_exit */
713 hrtime_t rfix
= cur_time
- kiop
->rlastupdate
;
714 kiop
->rlastupdate
= cur_time
;
715 kiop
->rlentime
+= kiop
->rcnt
* rfix
;
720 case KSTAT_TYPE_NAMED
:
722 * Massage any long strings in at the end of the buffer
724 if (ksp
->ks_data_size
> namedsz
) {
726 kstat_named_t
*knp
= buf
;
727 char *dst
= (char *)(knp
+ ksp
->ks_ndata
);
729 * Copy strings and update pointers
731 for (i
= 0; i
< ksp
->ks_ndata
; i
++, knp
++) {
732 if (knp
->data_type
== KSTAT_DATA_STRING
&&
733 KSTAT_NAMED_STR_PTR(knp
) != NULL
) {
734 bcopy(KSTAT_NAMED_STR_PTR(knp
), dst
,
735 KSTAT_NAMED_STR_BUFLEN(knp
));
736 KSTAT_NAMED_STR_PTR(knp
) = dst
;
737 dst
+= KSTAT_NAMED_STR_BUFLEN(knp
);
740 ASSERT(dst
<= ((char *)buf
+ ksp
->ks_data_size
));
748 header_kstat_update(kstat_t
*header_ksp
, int rw
)
752 avl_tree_t
*t
= &kstat_avl_bykid
;
755 if (rw
== KSTAT_WRITE
)
758 ASSERT(MUTEX_HELD(&kstat_chain_lock
));
760 zoneid
= getzoneid();
761 for (e
= avl_first(t
); e
!= NULL
; e
= avl_walk(t
, e
, AVL_AFTER
)) {
762 if (kstat_zone_find((kstat_t
*)e
, zoneid
) &&
763 (e
->e_ks
.ks_flags
& KSTAT_FLAG_INVALID
) == 0) {
767 header_ksp
->ks_ndata
= nkstats
;
768 header_ksp
->ks_data_size
= nkstats
* sizeof (kstat_t
);
773 * Copy out the data section of kstat 0, which consists of the list
774 * of all kstat headers. By specification, these headers must be
775 * copied out in order of increasing KID.
778 header_kstat_snapshot(kstat_t
*header_ksp
, void *buf
, int rw
)
781 avl_tree_t
*t
= &kstat_avl_bykid
;
784 header_ksp
->ks_snaptime
= gethrtime();
786 if (rw
== KSTAT_WRITE
)
789 ASSERT(MUTEX_HELD(&kstat_chain_lock
));
791 zoneid
= getzoneid();
792 for (e
= avl_first(t
); e
!= NULL
; e
= avl_walk(t
, e
, AVL_AFTER
)) {
793 if (kstat_zone_find((kstat_t
*)e
, zoneid
) &&
794 (e
->e_ks
.ks_flags
& KSTAT_FLAG_INVALID
) == 0) {
795 bcopy(&e
->e_ks
, buf
, sizeof (kstat_t
));
796 buf
= (char *)buf
+ sizeof (kstat_t
);
805 system_misc_kstat_update(kstat_t
*ksp
, int rw
)
808 int *loadavgp
= &avenrun
[0];
809 time_t zone_boot_time
;
811 hrtime_t zone_hrtime
;
814 if (rw
== KSTAT_WRITE
)
817 if (!INGLOBALZONE(curproc
)) {
819 * Here we grab cpu_lock which is OK as long as no-one in the
820 * future attempts to lookup this particular kstat
821 * (unix:0:system_misc) while holding cpu_lock.
823 mutex_enter(&cpu_lock
);
824 if (pool_pset_enabled()) {
825 myncpus
= zone_ncpus_get(curproc
->p_zone
);
828 mutex_exit(&cpu_lock
);
829 loadavgp
= &curproc
->p_zone
->zone_avenrun
[0];
832 if (INGLOBALZONE(curproc
)) {
833 zone_boot_time
= boot_time
;
834 zone_lbolt
= ddi_get_lbolt();
837 zone_boot_time
= curproc
->p_zone
->zone_boot_time
;
839 zone_hrtime
= gethrtime();
840 zone_lbolt
= (clock_t)(NSEC_TO_TICK(zone_hrtime
) -
841 NSEC_TO_TICK(curproc
->p_zone
->zone_zsched
->p_mstart
));
842 mutex_enter(&curproc
->p_zone
->zone_nlwps_lock
);
843 zone_nproc
= curproc
->p_zone
->zone_nprocs
;
844 mutex_exit(&curproc
->p_zone
->zone_nlwps_lock
);
847 system_misc_kstat
.ncpus
.value
.ui32
= (uint32_t)myncpus
;
848 system_misc_kstat
.lbolt
.value
.ui32
= (uint32_t)zone_lbolt
;
849 system_misc_kstat
.deficit
.value
.ui32
= (uint32_t)deficit
;
850 system_misc_kstat
.clk_intr
.value
.ui32
= (uint32_t)zone_lbolt
;
851 system_misc_kstat
.vac
.value
.ui32
= (uint32_t)vac
;
852 system_misc_kstat
.nproc
.value
.ui32
= (uint32_t)zone_nproc
;
853 system_misc_kstat
.avenrun_1min
.value
.ui32
= (uint32_t)loadavgp
[0];
854 system_misc_kstat
.avenrun_5min
.value
.ui32
= (uint32_t)loadavgp
[1];
855 system_misc_kstat
.avenrun_15min
.value
.ui32
= (uint32_t)loadavgp
[2];
856 system_misc_kstat
.boot_time
.value
.ui32
= (uint32_t)
862 extern caddr_t econtig32
;
864 extern caddr_t econtig
;
869 system_pages_kstat_update(kstat_t
*ksp
, int rw
)
871 kobj_stat_t kobj_stat
;
873 if (rw
== KSTAT_WRITE
) {
877 kobj_stat_get(&kobj_stat
);
878 system_pages_kstat
.physmem
.value
.ul
= (ulong_t
)physmem
;
879 system_pages_kstat
.nalloc
.value
.ul
= kobj_stat
.nalloc
;
880 system_pages_kstat
.nfree
.value
.ul
= kobj_stat
.nfree
;
881 system_pages_kstat
.nalloc_calls
.value
.ul
= kobj_stat
.nalloc_calls
;
882 system_pages_kstat
.nfree_calls
.value
.ul
= kobj_stat
.nfree_calls
;
883 system_pages_kstat
.kernelbase
.value
.ul
= (ulong_t
)KERNELBASE
;
887 * kstat should REALLY be modified to also report kmem64_base and
888 * kmem64_end (see sun4u/os/startup.c), as the virtual address range
889 * [ kernelbase .. econtig ] no longer is truly reflective of the
890 * kernel's vallocs...
892 system_pages_kstat
.econtig
.value
.ul
= (ulong_t
)econtig32
;
894 system_pages_kstat
.econtig
.value
.ul
= (ulong_t
)econtig
;
897 system_pages_kstat
.freemem
.value
.ul
= (ulong_t
)freemem
;
898 system_pages_kstat
.availrmem
.value
.ul
= (ulong_t
)availrmem
;
899 system_pages_kstat
.lotsfree
.value
.ul
= (ulong_t
)lotsfree
;
900 system_pages_kstat
.desfree
.value
.ul
= (ulong_t
)desfree
;
901 system_pages_kstat
.minfree
.value
.ul
= (ulong_t
)minfree
;
902 system_pages_kstat
.fastscan
.value
.ul
= (ulong_t
)fastscan
;
903 system_pages_kstat
.slowscan
.value
.ul
= (ulong_t
)slowscan
;
904 system_pages_kstat
.nscan
.value
.ul
= (ulong_t
)nscan
;
905 system_pages_kstat
.desscan
.value
.ul
= (ulong_t
)desscan
;
906 system_pages_kstat
.pagesfree
.value
.ul
= (ulong_t
)freemem
;
907 system_pages_kstat
.pageslocked
.value
.ul
= (ulong_t
)(availrmem_initial
-
909 system_pages_kstat
.pagestotal
.value
.ul
= (ulong_t
)total_pages
;
911 * pp_kernel represents total pages used by the kernel since the
912 * startup. This formula takes into account the boottime kernel
913 * footprint and also considers the availrmem changes because of
914 * user explicit page locking.
916 system_pages_kstat
.pp_kernel
.value
.ul
= (ulong_t
)(physinstalled
-
917 obp_pages
- availrmem
- k_anoninfo
.ani_mem_resv
-
918 anon_segkp_pages_locked
- pages_locked
-
919 pages_claimed
- pages_useclaim
);
925 kstat_create(const char *ks_module
, int ks_instance
, const char *ks_name
,
926 const char *ks_class
, uchar_t ks_type
, uint_t ks_ndata
, uchar_t ks_flags
)
928 return (kstat_create_zone(ks_module
, ks_instance
, ks_name
, ks_class
,
929 ks_type
, ks_ndata
, ks_flags
, ALL_ZONES
));
933 * Allocate and initialize a kstat structure. Or, if a dormant kstat with
934 * the specified name exists, reactivate it. Returns a pointer to the kstat
935 * on success, NULL on failure. The kstat will not be visible to the
936 * kstat driver until kstat_install().
939 kstat_create_zone(const char *ks_module
, int ks_instance
, const char *ks_name
,
940 const char *ks_class
, uchar_t ks_type
, uint_t ks_ndata
, uchar_t ks_flags
,
947 char namebuf
[KSTAT_STRLEN
+ 16];
949 if (avl_numnodes(&kstat_avl_bykid
) == 0) {
950 avl_create(&kstat_avl_bykid
, kstat_compare_bykid
,
951 sizeof (ekstat_t
), offsetof(struct ekstat
, e_avl_bykid
));
953 avl_create(&kstat_avl_byname
, kstat_compare_byname
,
954 sizeof (ekstat_t
), offsetof(struct ekstat
, e_avl_byname
));
958 * If ks_name == NULL, set the ks_name to <module><instance>.
960 if (ks_name
== NULL
) {
961 char buf
[KSTAT_STRLEN
];
962 kstat_set_string(buf
, ks_module
);
963 (void) sprintf(namebuf
, "%s%d", buf
, ks_instance
);
968 * Make sure it's a valid kstat data type
970 if (ks_type
>= KSTAT_NUM_TYPES
) {
971 cmn_err(CE_WARN
, "kstat_create('%s', %d, '%s'): "
972 "invalid kstat type %d",
973 ks_module
, ks_instance
, ks_name
, ks_type
);
978 * Don't allow persistent virtual kstats -- it makes no sense.
979 * ks_data points to garbage when the client goes away.
981 if ((ks_flags
& KSTAT_FLAG_PERSISTENT
) &&
982 (ks_flags
& KSTAT_FLAG_VIRTUAL
)) {
983 cmn_err(CE_WARN
, "kstat_create('%s', %d, '%s'): "
984 "cannot create persistent virtual kstat",
985 ks_module
, ks_instance
, ks_name
);
990 * Don't allow variable-size physical kstats, since the framework's
991 * memory allocation for physical kstat data is fixed at creation time.
993 if ((ks_flags
& KSTAT_FLAG_VAR_SIZE
) &&
994 !(ks_flags
& KSTAT_FLAG_VIRTUAL
)) {
995 cmn_err(CE_WARN
, "kstat_create('%s', %d, '%s'): "
996 "cannot create variable-size physical kstat",
997 ks_module
, ks_instance
, ks_name
);
1002 * Make sure the number of data fields is within legal range
1004 if (ks_ndata
< kstat_data_type
[ks_type
].min_ndata
||
1005 ks_ndata
> kstat_data_type
[ks_type
].max_ndata
) {
1006 cmn_err(CE_WARN
, "kstat_create('%s', %d, '%s'): "
1007 "ks_ndata=%d out of range [%d, %d]",
1008 ks_module
, ks_instance
, ks_name
, (int)ks_ndata
,
1009 kstat_data_type
[ks_type
].min_ndata
,
1010 kstat_data_type
[ks_type
].max_ndata
);
1014 ks_data_size
= kstat_data_type
[ks_type
].size
* ks_ndata
;
1017 * If the named kstat already exists and is dormant, reactivate it.
1019 ksp
= kstat_hold_byname(ks_module
, ks_instance
, ks_name
, ks_zoneid
);
1021 if (!(ksp
->ks_flags
& KSTAT_FLAG_DORMANT
)) {
1023 * The named kstat exists but is not dormant --
1024 * this is a kstat namespace collision.
1028 "kstat_create('%s', %d, '%s'): namespace collision",
1029 ks_module
, ks_instance
, ks_name
);
1032 if ((strcmp(ksp
->ks_class
, ks_class
) != 0) ||
1033 (ksp
->ks_type
!= ks_type
) ||
1034 (ksp
->ks_ndata
!= ks_ndata
) ||
1035 (ks_flags
& KSTAT_FLAG_VIRTUAL
)) {
1037 * The name is the same, but the other key parameters
1038 * differ from those of the dormant kstat -- bogus.
1041 cmn_err(CE_WARN
, "kstat_create('%s', %d, '%s'): "
1042 "invalid reactivation of dormant kstat",
1043 ks_module
, ks_instance
, ks_name
);
1047 * Return dormant kstat pointer to caller. As usual,
1048 * the kstat is marked invalid until kstat_install().
1050 ksp
->ks_flags
|= KSTAT_FLAG_INVALID
;
1056 * Allocate memory for the new kstat header and, if this is a physical
1057 * kstat, the data section.
1059 e
= kstat_alloc(ks_flags
& KSTAT_FLAG_VIRTUAL
? 0 : ks_data_size
);
1061 cmn_err(CE_NOTE
, "kstat_create('%s', %d, '%s'): "
1062 "insufficient kernel memory",
1063 ks_module
, ks_instance
, ks_name
);
1068 * Initialize as many fields as we can. The caller may reset
1069 * ks_lock, ks_update, ks_private, and ks_snapshot as necessary.
1070 * Creators of virtual kstats may also reset ks_data. It is
1071 * also up to the caller to initialize the kstat data section,
1072 * if necessary. All initialization must be complete before
1073 * calling kstat_install().
1075 e
->e_zone
.zoneid
= ks_zoneid
;
1076 e
->e_zone
.next
= NULL
;
1079 ksp
->ks_crtime
= gethrtime();
1080 kstat_set_string(ksp
->ks_module
, ks_module
);
1081 ksp
->ks_instance
= ks_instance
;
1082 kstat_set_string(ksp
->ks_name
, ks_name
);
1083 ksp
->ks_type
= ks_type
;
1084 kstat_set_string(ksp
->ks_class
, ks_class
);
1085 ksp
->ks_flags
= ks_flags
| KSTAT_FLAG_INVALID
;
1086 if (ks_flags
& KSTAT_FLAG_VIRTUAL
)
1087 ksp
->ks_data
= NULL
;
1089 ksp
->ks_data
= (void *)(e
+ 1);
1090 ksp
->ks_ndata
= ks_ndata
;
1091 ksp
->ks_data_size
= ks_data_size
;
1092 ksp
->ks_snaptime
= ksp
->ks_crtime
;
1093 ksp
->ks_update
= default_kstat_update
;
1094 ksp
->ks_private
= NULL
;
1095 ksp
->ks_snapshot
= default_kstat_snapshot
;
1096 ksp
->ks_lock
= NULL
;
1098 mutex_enter(&kstat_chain_lock
);
1101 * Add our kstat to the AVL trees.
1103 if (avl_find(&kstat_avl_byname
, e
, &where
) != NULL
) {
1104 mutex_exit(&kstat_chain_lock
);
1106 "kstat_create('%s', %d, '%s'): namespace collision",
1107 ks_module
, ks_instance
, ks_name
);
1111 avl_insert(&kstat_avl_byname
, e
, where
);
1114 * Loop around until we find an unused KID.
1117 ksp
->ks_kid
= kstat_chain_id
++;
1118 } while (avl_find(&kstat_avl_bykid
, e
, &where
) != NULL
);
1119 avl_insert(&kstat_avl_bykid
, e
, where
);
1121 mutex_exit(&kstat_chain_lock
);
1127 * Activate a fully initialized kstat and make it visible to /dev/kstat.
1130 kstat_install(kstat_t
*ksp
)
1132 zoneid_t zoneid
= ((ekstat_t
*)ksp
)->e_zone
.zoneid
;
1135 * If this is a variable-size kstat, it MUST provide kstat data locking
1136 * to prevent data-size races with kstat readers.
1138 if ((ksp
->ks_flags
& KSTAT_FLAG_VAR_SIZE
) && ksp
->ks_lock
== NULL
) {
1139 panic("kstat_install('%s', %d, '%s'): "
1140 "cannot create variable-size kstat without data lock",
1141 ksp
->ks_module
, ksp
->ks_instance
, ksp
->ks_name
);
1144 if (kstat_hold_bykid(ksp
->ks_kid
, zoneid
) != ksp
) {
1145 cmn_err(CE_WARN
, "kstat_install(%p): does not exist",
1150 if (ksp
->ks_type
== KSTAT_TYPE_NAMED
&& ksp
->ks_data
!= NULL
) {
1151 int has_long_strings
= 0;
1153 kstat_named_t
*knp
= KSTAT_NAMED_PTR(ksp
);
1155 for (i
= 0; i
< ksp
->ks_ndata
; i
++, knp
++) {
1156 if (knp
->data_type
== KSTAT_DATA_STRING
) {
1157 has_long_strings
= 1;
1162 * It is an error for a named kstat with fields of
1163 * KSTAT_DATA_STRING to be non-virtual.
1165 if (has_long_strings
&& !(ksp
->ks_flags
& KSTAT_FLAG_VIRTUAL
)) {
1166 panic("kstat_install('%s', %d, '%s'): "
1167 "named kstat containing KSTAT_DATA_STRING "
1169 ksp
->ks_module
, ksp
->ks_instance
,
1173 * The default snapshot routine does not handle KSTAT_WRITE
1176 if (has_long_strings
&& (ksp
->ks_flags
& KSTAT_FLAG_WRITABLE
) &&
1177 (ksp
->ks_snapshot
== default_kstat_snapshot
)) {
1178 panic("kstat_install('%s', %d, '%s'): "
1179 "named kstat containing KSTAT_DATA_STRING "
1180 "is writable but uses default snapshot routine",
1181 ksp
->ks_module
, ksp
->ks_instance
, ksp
->ks_name
);
1185 if (ksp
->ks_flags
& KSTAT_FLAG_DORMANT
) {
1188 * We are reactivating a dormant kstat. Initialize the
1189 * caller's underlying data to the value it had when the
1190 * kstat went dormant, and mark the kstat as active.
1191 * Grab the provider's kstat lock if it's not already held.
1193 kmutex_t
*lp
= ksp
->ks_lock
;
1194 if (lp
!= NULL
&& MUTEX_NOT_HELD(lp
)) {
1196 (void) KSTAT_UPDATE(ksp
, KSTAT_WRITE
);
1199 (void) KSTAT_UPDATE(ksp
, KSTAT_WRITE
);
1201 ksp
->ks_flags
&= ~KSTAT_FLAG_DORMANT
;
1205 * Now that the kstat is active, make it visible to the kstat driver.
1206 * When copying out kstats the count is determined in
1207 * header_kstat_update() and actually copied into kbuf in
1208 * header_kstat_snapshot(). kstat_chain_lock is held across the two
1209 * calls to ensure that this list doesn't change. Thus, we need to
1210 * also take the lock to ensure that the we don't copy the new kstat
1211 * in the 2nd pass and overrun the buf.
1213 mutex_enter(&kstat_chain_lock
);
1214 ksp
->ks_flags
&= ~KSTAT_FLAG_INVALID
;
1215 mutex_exit(&kstat_chain_lock
);
1220 * Remove a kstat from the system. Or, if it's a persistent kstat,
1221 * just update the data and mark it as dormant.
1224 kstat_delete(kstat_t
*ksp
)
1227 ekstat_t
*e
= (ekstat_t
*)ksp
;
1231 ASSERT(ksp
!= NULL
);
1236 zoneid
= e
->e_zone
.zoneid
;
1240 if (lp
!= NULL
&& MUTEX_HELD(lp
)) {
1241 panic("kstat_delete(%p): caller holds data lock %p",
1242 (void *)ksp
, (void *)lp
);
1245 if (kstat_hold_bykid(ksp
->ks_kid
, zoneid
) != ksp
) {
1246 cmn_err(CE_WARN
, "kstat_delete(%p): does not exist",
1251 if (ksp
->ks_flags
& KSTAT_FLAG_PERSISTENT
) {
1253 * Update the data one last time, so that all activity
1254 * prior to going dormant has been accounted for.
1257 (void) KSTAT_UPDATE(ksp
, KSTAT_READ
);
1261 * Mark the kstat as dormant and restore caller-modifiable
1262 * fields to default values, so the kstat is readable during
1263 * the dormant phase.
1265 ksp
->ks_flags
|= KSTAT_FLAG_DORMANT
;
1266 ksp
->ks_lock
= NULL
;
1267 ksp
->ks_update
= default_kstat_update
;
1268 ksp
->ks_private
= NULL
;
1269 ksp
->ks_snapshot
= default_kstat_snapshot
;
1275 * Remove the kstat from the framework's AVL trees,
1276 * free the allocated memory, and increment kstat_chain_id so
1277 * /dev/kstat clients can detect the event.
1279 mutex_enter(&kstat_chain_lock
);
1280 avl_remove(&kstat_avl_bykid
, e
);
1281 avl_remove(&kstat_avl_byname
, e
);
1283 mutex_exit(&kstat_chain_lock
);
1285 kz
= e
->e_zone
.next
;
1286 while (kz
!= NULL
) {
1287 kstat_zone_t
*t
= kz
;
1290 kmem_free(t
, sizeof (*t
));
1297 kstat_delete_byname_zone(const char *ks_module
, int ks_instance
,
1298 const char *ks_name
, zoneid_t ks_zoneid
)
1302 ksp
= kstat_hold_byname(ks_module
, ks_instance
, ks_name
, ks_zoneid
);
1310 kstat_delete_byname(const char *ks_module
, int ks_instance
, const char *ks_name
)
1312 kstat_delete_byname_zone(ks_module
, ks_instance
, ks_name
, ALL_ZONES
);
1316 * The sparc V9 versions of these routines can be much cheaper than
1317 * the poor 32-bit compiler can comprehend, so they're in sparcv9_subr.s.
1318 * For simplicity, however, we always feed the C versions to lint.
1320 #if !defined(__sparc) || defined(lint) || defined(__lint)
1323 kstat_waitq_enter(kstat_io_t
*kiop
)
1325 hrtime_t
new, delta
;
1328 new = gethrtime_unscaled();
1329 delta
= new - kiop
->wlastupdate
;
1330 kiop
->wlastupdate
= new;
1331 wcnt
= kiop
->wcnt
++;
1333 kiop
->wlentime
+= delta
* wcnt
;
1334 kiop
->wtime
+= delta
;
1339 kstat_waitq_exit(kstat_io_t
*kiop
)
1341 hrtime_t
new, delta
;
1344 new = gethrtime_unscaled();
1345 delta
= new - kiop
->wlastupdate
;
1346 kiop
->wlastupdate
= new;
1347 wcnt
= kiop
->wcnt
--;
1348 ASSERT((int)wcnt
> 0);
1349 kiop
->wlentime
+= delta
* wcnt
;
1350 kiop
->wtime
+= delta
;
1354 kstat_runq_enter(kstat_io_t
*kiop
)
1356 hrtime_t
new, delta
;
1359 new = gethrtime_unscaled();
1360 delta
= new - kiop
->rlastupdate
;
1361 kiop
->rlastupdate
= new;
1362 rcnt
= kiop
->rcnt
++;
1364 kiop
->rlentime
+= delta
* rcnt
;
1365 kiop
->rtime
+= delta
;
1370 kstat_runq_exit(kstat_io_t
*kiop
)
1372 hrtime_t
new, delta
;
1375 new = gethrtime_unscaled();
1376 delta
= new - kiop
->rlastupdate
;
1377 kiop
->rlastupdate
= new;
1378 rcnt
= kiop
->rcnt
--;
1379 ASSERT((int)rcnt
> 0);
1380 kiop
->rlentime
+= delta
* rcnt
;
1381 kiop
->rtime
+= delta
;
1385 kstat_waitq_to_runq(kstat_io_t
*kiop
)
1387 hrtime_t
new, delta
;
1390 new = gethrtime_unscaled();
1392 delta
= new - kiop
->wlastupdate
;
1393 kiop
->wlastupdate
= new;
1394 wcnt
= kiop
->wcnt
--;
1395 ASSERT((int)wcnt
> 0);
1396 kiop
->wlentime
+= delta
* wcnt
;
1397 kiop
->wtime
+= delta
;
1399 delta
= new - kiop
->rlastupdate
;
1400 kiop
->rlastupdate
= new;
1401 rcnt
= kiop
->rcnt
++;
1403 kiop
->rlentime
+= delta
* rcnt
;
1404 kiop
->rtime
+= delta
;
1409 kstat_runq_back_to_waitq(kstat_io_t
*kiop
)
1411 hrtime_t
new, delta
;
1414 new = gethrtime_unscaled();
1416 delta
= new - kiop
->rlastupdate
;
1417 kiop
->rlastupdate
= new;
1418 rcnt
= kiop
->rcnt
--;
1419 ASSERT((int)rcnt
> 0);
1420 kiop
->rlentime
+= delta
* rcnt
;
1421 kiop
->rtime
+= delta
;
1423 delta
= new - kiop
->wlastupdate
;
1424 kiop
->wlastupdate
= new;
1425 wcnt
= kiop
->wcnt
++;
1427 kiop
->wlentime
+= delta
* wcnt
;
1428 kiop
->wtime
+= delta
;
1435 kstat_timer_start(kstat_timer_t
*ktp
)
1437 ktp
->start_time
= gethrtime();
1441 kstat_timer_stop(kstat_timer_t
*ktp
)
1444 u_longlong_t num_events
;
1446 ktp
->stop_time
= etime
= gethrtime();
1447 etime
-= ktp
->start_time
;
1448 num_events
= ktp
->num_events
;
1449 if (etime
< ktp
->min_time
|| num_events
== 0)
1450 ktp
->min_time
= etime
;
1451 if (etime
> ktp
->max_time
)
1452 ktp
->max_time
= etime
;
1453 ktp
->elapsed_time
+= etime
;
1454 ktp
->num_events
= num_events
+ 1;