4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2014, Joyent, Inc. All rights reserved.
24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
28 * Kernel statistics framework
31 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/vmsystm.h>
35 #include <sys/t_lock.h>
36 #include <sys/param.h>
37 #include <sys/errno.h>
39 #include <sys/sysmacros.h>
40 #include <sys/cmn_err.h>
41 #include <sys/kstat.h>
42 #include <sys/sysinfo.h>
43 #include <sys/cpuvar.h>
44 #include <sys/fcntl.h>
45 #include <sys/flock.h>
46 #include <sys/vnode.h>
50 #include <sys/debug.h>
53 #include <sys/pool_pset.h>
54 #include <sys/cpupart.h>
56 #include <sys/loadavg.h>
59 #include <vm/seg_kmem.h>
62 * Global lock to protect the AVL trees and kstat_chain_id.
64 static kmutex_t kstat_chain_lock
;
67 * Every install/delete kstat bumps kstat_chain_id. This is used by:
69 * (1) /dev/kstat, to detect changes in the kstat chain across ioctls;
71 * (2) kstat_create(), to assign a KID (kstat ID) to each new kstat.
72 * /dev/kstat uses the KID as a cookie for kstat lookups.
74 * We reserve the first two IDs because some kstats are created before
75 * the well-known ones (kstat_headers = 0, kstat_types = 1).
77 * We also bump the kstat_chain_id if a zone is gaining or losing visibility
78 * into a particular kstat, which is logically equivalent to a kstat being
82 kid_t kstat_chain_id
= 2;
85 * As far as zones are concerned, there are 3 types of kstat:
87 * 1) Those which have a well-known name, and which should return per-zone data
88 * depending on which zone is doing the kstat_read(). sockfs:0:sock_unix_list
89 * is an example of this type of kstat.
91 * 2) Those which should only be exported to a particular list of zones.
92 * For example, in the case of nfs:*:mntinfo, we don't want zone A to be
93 * able to see NFS mounts associated with zone B, while we want the
94 * global zone to be able to see all mounts on the system.
96 * 3) Those that can be exported to all zones. Most system-related
97 * kstats fall within this category.
99 * An ekstat_t thus contains a list of kstats that the zone is to be
100 * exported to. The lookup of a name:instance:module thus translates to a
101 * lookup of name:instance:module:myzone; if the kstat is not exported
102 * to all zones, and does not have the caller's zoneid explicitly
103 * enumerated in the list of zones to be exported to, it is the same as
104 * if the kstat didn't exist.
106 * Writing to kstats is currently disallowed from within a non-global
107 * zone, although this restriction could be removed in the future.
109 typedef struct kstat_zone
{
111 struct kstat_zone
*next
;
115 * Extended kstat structure -- for internal use only.
117 typedef struct ekstat
{
118 kstat_t e_ks
; /* the kstat itself */
119 size_t e_size
; /* total allocation size */
120 kthread_t
*e_owner
; /* thread holding this kstat */
121 kcondvar_t e_cv
; /* wait for owner == NULL */
122 avl_node_t e_avl_bykid
; /* AVL tree to sort by KID */
123 avl_node_t e_avl_byname
; /* AVL tree to sort by name */
124 kstat_zone_t e_zone
; /* zone to export stats to */
127 static uint64_t kstat_initial
[8192];
128 static void *kstat_initial_ptr
= kstat_initial
;
129 static size_t kstat_initial_avail
= sizeof (kstat_initial
);
130 static vmem_t
*kstat_arena
;
132 #define KSTAT_ALIGN (sizeof (uint64_t))
134 static avl_tree_t kstat_avl_bykid
;
135 static avl_tree_t kstat_avl_byname
;
138 * Various pointers we need to create kstats at boot time in kstat_init()
140 extern kstat_named_t
*segmapcnt_ptr
;
141 extern uint_t segmapcnt_ndata
;
142 extern int segmap_kstat_update(kstat_t
*, int);
143 extern kstat_named_t
*biostats_ptr
;
144 extern uint_t biostats_ndata
;
145 extern kstat_named_t
*pollstats_ptr
;
146 extern uint_t pollstats_ndata
;
150 extern time_t boot_time
;
151 extern sysinfo_t sysinfo
;
152 extern vminfo_t vminfo
;
157 kstat_named_t deficit
;
158 kstat_named_t clk_intr
;
161 kstat_named_t avenrun_1min
;
162 kstat_named_t avenrun_5min
;
163 kstat_named_t avenrun_15min
;
164 kstat_named_t boot_time
;
165 kstat_named_t nsec_per_tick
;
166 } system_misc_kstat
= {
167 { "ncpus", KSTAT_DATA_UINT32
},
168 { "lbolt", KSTAT_DATA_UINT32
},
169 { "deficit", KSTAT_DATA_UINT32
},
170 { "clk_intr", KSTAT_DATA_UINT32
},
171 { "vac", KSTAT_DATA_UINT32
},
172 { "nproc", KSTAT_DATA_UINT32
},
173 { "avenrun_1min", KSTAT_DATA_UINT32
},
174 { "avenrun_5min", KSTAT_DATA_UINT32
},
175 { "avenrun_15min", KSTAT_DATA_UINT32
},
176 { "boot_time", KSTAT_DATA_UINT32
},
177 { "nsec_per_tick", KSTAT_DATA_UINT32
},
181 kstat_named_t physmem
;
182 kstat_named_t nalloc
;
184 kstat_named_t nalloc_calls
;
185 kstat_named_t nfree_calls
;
186 kstat_named_t kernelbase
;
187 kstat_named_t econtig
;
188 kstat_named_t freemem
;
189 kstat_named_t availrmem
;
190 kstat_named_t lotsfree
;
191 kstat_named_t desfree
;
192 kstat_named_t minfree
;
193 kstat_named_t fastscan
;
194 kstat_named_t slowscan
;
196 kstat_named_t desscan
;
197 kstat_named_t pp_kernel
;
198 kstat_named_t pagesfree
;
199 kstat_named_t pageslocked
;
200 kstat_named_t pagestotal
;
201 } system_pages_kstat
= {
202 { "physmem", KSTAT_DATA_ULONG
},
203 { "nalloc", KSTAT_DATA_ULONG
},
204 { "nfree", KSTAT_DATA_ULONG
},
205 { "nalloc_calls", KSTAT_DATA_ULONG
},
206 { "nfree_calls", KSTAT_DATA_ULONG
},
207 { "kernelbase", KSTAT_DATA_ULONG
},
208 { "econtig", KSTAT_DATA_ULONG
},
209 { "freemem", KSTAT_DATA_ULONG
},
210 { "availrmem", KSTAT_DATA_ULONG
},
211 { "lotsfree", KSTAT_DATA_ULONG
},
212 { "desfree", KSTAT_DATA_ULONG
},
213 { "minfree", KSTAT_DATA_ULONG
},
214 { "fastscan", KSTAT_DATA_ULONG
},
215 { "slowscan", KSTAT_DATA_ULONG
},
216 { "nscan", KSTAT_DATA_ULONG
},
217 { "desscan", KSTAT_DATA_ULONG
},
218 { "pp_kernel", KSTAT_DATA_ULONG
},
219 { "pagesfree", KSTAT_DATA_ULONG
},
220 { "pageslocked", KSTAT_DATA_ULONG
},
221 { "pagestotal", KSTAT_DATA_ULONG
},
224 static int header_kstat_update(kstat_t
*, int);
225 static int header_kstat_snapshot(kstat_t
*, void *, int);
226 static int system_misc_kstat_update(kstat_t
*, int);
227 static int system_pages_kstat_update(kstat_t
*, int);
230 char name
[KSTAT_STRLEN
];
234 } kstat_data_type
[KSTAT_NUM_TYPES
] = {
235 { "raw", 1, 0, INT_MAX
},
236 { "name=value", sizeof (kstat_named_t
), 0, INT_MAX
},
237 { "interrupt", sizeof (kstat_intr_t
), 1, 1 },
238 { "i/o", sizeof (kstat_io_t
), 1, 1 },
239 { "event_timer", sizeof (kstat_timer_t
), 0, INT_MAX
},
243 kstat_zone_find(kstat_t
*k
, zoneid_t zoneid
)
245 ekstat_t
*e
= (ekstat_t
*)k
;
248 ASSERT(MUTEX_HELD(&kstat_chain_lock
));
249 for (kz
= &e
->e_zone
; kz
!= NULL
; kz
= kz
->next
) {
250 if (zoneid
== ALL_ZONES
|| kz
->zoneid
== ALL_ZONES
)
252 if (zoneid
== kz
->zoneid
)
259 kstat_zone_remove(kstat_t
*k
, zoneid_t zoneid
)
261 ekstat_t
*e
= (ekstat_t
*)k
;
262 kstat_zone_t
*kz
, *t
= NULL
;
264 mutex_enter(&kstat_chain_lock
);
265 if (zoneid
== e
->e_zone
.zoneid
) {
268 e
->e_zone
.zoneid
= kz
->zoneid
;
269 e
->e_zone
.next
= kz
->next
;
272 for (kz
= &e
->e_zone
; kz
->next
!= NULL
; kz
= kz
->next
) {
273 if (kz
->next
->zoneid
== zoneid
) {
279 ASSERT(t
!= NULL
); /* we removed something */
283 mutex_exit(&kstat_chain_lock
);
284 kmem_free(kz
, sizeof (*kz
));
288 kstat_zone_add(kstat_t
*k
, zoneid_t zoneid
)
290 ekstat_t
*e
= (ekstat_t
*)k
;
293 kz
= kmem_alloc(sizeof (*kz
), KM_NOSLEEP
);
296 mutex_enter(&kstat_chain_lock
);
298 kz
->next
= e
->e_zone
.next
;
301 mutex_exit(&kstat_chain_lock
);
305 * Compare the list of zones for the given kstats, returning 0 if they match
306 * (ie, one list contains ALL_ZONES or both lists contain the same zoneid).
307 * In practice, this is called indirectly by kstat_hold_byname(), so one of the
308 * two lists always has one element, and this is an O(n) operation rather than
312 kstat_zone_compare(ekstat_t
*e1
, ekstat_t
*e2
)
314 kstat_zone_t
*kz1
, *kz2
;
316 ASSERT(MUTEX_HELD(&kstat_chain_lock
));
317 for (kz1
= &e1
->e_zone
; kz1
!= NULL
; kz1
= kz1
->next
) {
318 for (kz2
= &e2
->e_zone
; kz2
!= NULL
; kz2
= kz2
->next
) {
319 if (kz1
->zoneid
== ALL_ZONES
||
320 kz2
->zoneid
== ALL_ZONES
)
322 if (kz1
->zoneid
== kz2
->zoneid
)
326 return (e1
->e_zone
.zoneid
< e2
->e_zone
.zoneid
? -1 : 1);
330 * Support for keeping kstats sorted in AVL trees for fast lookups.
333 kstat_compare_bykid(const void *a1
, const void *a2
)
335 const kstat_t
*k1
= a1
;
336 const kstat_t
*k2
= a2
;
338 if (k1
->ks_kid
< k2
->ks_kid
)
340 if (k1
->ks_kid
> k2
->ks_kid
)
342 return (kstat_zone_compare((ekstat_t
*)k1
, (ekstat_t
*)k2
));
346 kstat_compare_byname(const void *a1
, const void *a2
)
348 const kstat_t
*k1
= a1
;
349 const kstat_t
*k2
= a2
;
352 s
= strcmp(k1
->ks_module
, k2
->ks_module
);
358 if (k1
->ks_instance
< k2
->ks_instance
)
360 if (k1
->ks_instance
> k2
->ks_instance
)
363 s
= strcmp(k1
->ks_name
, k2
->ks_name
);
369 return (kstat_zone_compare((ekstat_t
*)k1
, (ekstat_t
*)k2
));
373 kstat_hold(avl_tree_t
*t
, ekstat_t
*template)
378 mutex_enter(&kstat_chain_lock
);
380 ksp
= avl_find(t
, template, NULL
);
384 if (e
->e_owner
== NULL
) {
385 e
->e_owner
= curthread
;
388 cv_wait(&e
->e_cv
, &kstat_chain_lock
);
390 mutex_exit(&kstat_chain_lock
);
395 kstat_rele(kstat_t
*ksp
)
397 ekstat_t
*e
= (ekstat_t
*)ksp
;
399 mutex_enter(&kstat_chain_lock
);
400 ASSERT(e
->e_owner
== curthread
);
402 cv_broadcast(&e
->e_cv
);
403 mutex_exit(&kstat_chain_lock
);
407 kstat_hold_bykid(kid_t kid
, zoneid_t zoneid
)
412 e
.e_zone
.zoneid
= zoneid
;
413 e
.e_zone
.next
= NULL
;
415 return (kstat_hold(&kstat_avl_bykid
, &e
));
419 kstat_hold_byname(const char *ks_module
, int ks_instance
, const char *ks_name
,
424 kstat_set_string(e
.e_ks
.ks_module
, ks_module
);
425 e
.e_ks
.ks_instance
= ks_instance
;
426 kstat_set_string(e
.e_ks
.ks_name
, ks_name
);
427 e
.e_zone
.zoneid
= ks_zoneid
;
428 e
.e_zone
.next
= NULL
;
429 return (kstat_hold(&kstat_avl_byname
, &e
));
433 kstat_alloc(size_t size
)
437 size
= P2ROUNDUP(sizeof (ekstat_t
) + size
, KSTAT_ALIGN
);
439 if (kstat_arena
== NULL
) {
440 if (size
<= kstat_initial_avail
) {
441 e
= kstat_initial_ptr
;
442 kstat_initial_ptr
= (char *)kstat_initial_ptr
+ size
;
443 kstat_initial_avail
-= size
;
446 e
= vmem_alloc(kstat_arena
, size
, VM_NOSLEEP
);
452 cv_init(&e
->e_cv
, NULL
, CV_DEFAULT
, NULL
);
459 kstat_free(ekstat_t
*e
)
461 cv_destroy(&e
->e_cv
);
462 vmem_free(kstat_arena
, e
, e
->e_size
);
466 * Create various system kstats.
473 avl_tree_t
*t
= &kstat_avl_bykid
;
476 * Set up the kstat vmem arena.
478 kstat_arena
= vmem_create("kstat",
479 kstat_initial
, sizeof (kstat_initial
), KSTAT_ALIGN
,
480 segkmem_alloc
, segkmem_free
, heap_arena
, 0, VM_SLEEP
);
483 * Make initial kstats appear as though they were allocated.
485 for (e
= avl_first(t
); e
!= NULL
; e
= avl_walk(t
, e
, AVL_AFTER
))
486 (void) vmem_xalloc(kstat_arena
, e
->e_size
, KSTAT_ALIGN
,
487 0, 0, e
, (char *)e
+ e
->e_size
,
488 VM_NOSLEEP
| VM_BESTFIT
| VM_PANIC
);
491 * The mother of all kstats. The first kstat in the system, which
492 * always has KID 0, has the headers for all kstats (including itself)
493 * as its data. Thus, the kstat driver does not need any special
494 * interface to extract the kstat chain.
497 ksp
= kstat_create("unix", 0, "kstat_headers", "kstat", KSTAT_TYPE_RAW
,
498 0, KSTAT_FLAG_VIRTUAL
| KSTAT_FLAG_VAR_SIZE
);
500 ksp
->ks_lock
= &kstat_chain_lock
;
501 ksp
->ks_update
= header_kstat_update
;
502 ksp
->ks_snapshot
= header_kstat_snapshot
;
505 panic("cannot create kstat 'kstat_headers'");
508 ksp
= kstat_create("unix", 0, "kstat_types", "kstat",
509 KSTAT_TYPE_NAMED
, KSTAT_NUM_TYPES
, 0);
512 kstat_named_t
*kn
= KSTAT_NAMED_PTR(ksp
);
514 for (i
= 0; i
< KSTAT_NUM_TYPES
; i
++) {
515 kstat_named_init(&kn
[i
], kstat_data_type
[i
].name
,
522 ksp
= kstat_create("unix", 0, "sysinfo", "misc", KSTAT_TYPE_RAW
,
523 sizeof (sysinfo_t
), KSTAT_FLAG_VIRTUAL
);
525 ksp
->ks_data
= (void *) &sysinfo
;
529 ksp
= kstat_create("unix", 0, "vminfo", "vm", KSTAT_TYPE_RAW
,
530 sizeof (vminfo_t
), KSTAT_FLAG_VIRTUAL
);
532 ksp
->ks_data
= (void *) &vminfo
;
536 ksp
= kstat_create("unix", 0, "segmap", "vm", KSTAT_TYPE_NAMED
,
537 segmapcnt_ndata
, KSTAT_FLAG_VIRTUAL
);
539 ksp
->ks_data
= (void *) segmapcnt_ptr
;
540 ksp
->ks_update
= segmap_kstat_update
;
544 ksp
= kstat_create("unix", 0, "biostats", "misc", KSTAT_TYPE_NAMED
,
545 biostats_ndata
, KSTAT_FLAG_VIRTUAL
);
547 ksp
->ks_data
= (void *) biostats_ptr
;
551 ksp
= kstat_create("unix", 0, "var", "misc", KSTAT_TYPE_RAW
,
552 sizeof (struct var
), KSTAT_FLAG_VIRTUAL
);
554 ksp
->ks_data
= (void *) &v
;
558 ksp
= kstat_create("unix", 0, "system_misc", "misc", KSTAT_TYPE_NAMED
,
559 sizeof (system_misc_kstat
) / sizeof (kstat_named_t
),
562 ksp
->ks_data
= (void *) &system_misc_kstat
;
563 ksp
->ks_update
= system_misc_kstat_update
;
567 ksp
= kstat_create("unix", 0, "system_pages", "pages", KSTAT_TYPE_NAMED
,
568 sizeof (system_pages_kstat
) / sizeof (kstat_named_t
),
571 ksp
->ks_data
= (void *) &system_pages_kstat
;
572 ksp
->ks_update
= system_pages_kstat_update
;
576 ksp
= kstat_create("poll", 0, "pollstats", "misc", KSTAT_TYPE_NAMED
,
577 pollstats_ndata
, KSTAT_FLAG_VIRTUAL
| KSTAT_FLAG_WRITABLE
);
580 ksp
->ks_data
= pollstats_ptr
;
586 * Caller of this should ensure that the string pointed by src
587 * doesn't change while kstat's lock is held. Not doing so defeats
588 * kstat's snapshot strategy as explained in <sys/kstat.h>
591 kstat_named_setstr(kstat_named_t
*knp
, const char *src
)
593 if (knp
->data_type
!= KSTAT_DATA_STRING
)
594 panic("kstat_named_setstr('%p', '%p'): "
595 "named kstat is not of type KSTAT_DATA_STRING",
596 (void *)knp
, (void *)src
);
598 KSTAT_NAMED_STR_PTR(knp
) = (char *)src
;
600 KSTAT_NAMED_STR_BUFLEN(knp
) = strlen(src
) + 1;
602 KSTAT_NAMED_STR_BUFLEN(knp
) = 0;
606 kstat_set_string(char *dst
, const char *src
)
608 bzero(dst
, KSTAT_STRLEN
);
609 (void) strncpy(dst
, src
, KSTAT_STRLEN
- 1);
613 kstat_named_init(kstat_named_t
*knp
, const char *name
, uchar_t data_type
)
615 kstat_set_string(knp
->name
, name
);
616 knp
->data_type
= data_type
;
618 if (data_type
== KSTAT_DATA_STRING
)
619 kstat_named_setstr(knp
, NULL
);
623 kstat_timer_init(kstat_timer_t
*ktp
, const char *name
)
625 kstat_set_string(ktp
->name
, name
);
630 default_kstat_update(kstat_t
*ksp
, int rw
)
637 * Named kstats with variable-length long strings have a standard
638 * way of determining how much space is needed to hold the snapshot:
640 if (ksp
->ks_data
!= NULL
&& ksp
->ks_type
== KSTAT_TYPE_NAMED
&&
641 (ksp
->ks_flags
& (KSTAT_FLAG_VAR_SIZE
| KSTAT_FLAG_LONGSTRINGS
))) {
644 * Add in the space required for the strings
646 knp
= KSTAT_NAMED_PTR(ksp
);
647 for (i
= 0; i
< ksp
->ks_ndata
; i
++, knp
++) {
648 if (knp
->data_type
== KSTAT_DATA_STRING
)
649 len
+= KSTAT_NAMED_STR_BUFLEN(knp
);
652 ksp
->ks_ndata
* sizeof (kstat_named_t
) + len
;
658 default_kstat_snapshot(kstat_t
*ksp
, void *buf
, int rw
)
664 ksp
->ks_snaptime
= cur_time
= gethrtime();
666 if (rw
== KSTAT_WRITE
) {
667 if (!(ksp
->ks_flags
& KSTAT_FLAG_WRITABLE
))
669 bcopy(buf
, ksp
->ks_data
, ksp
->ks_data_size
);
674 * KSTAT_TYPE_NAMED kstats are defined to have ks_ndata
675 * number of kstat_named_t structures, followed by an optional
676 * string segment. The ks_data generally holds only the
677 * kstat_named_t structures. So we copy it first. The strings,
678 * if any, are copied below. For other kstat types, ks_data holds the
682 namedsz
= sizeof (kstat_named_t
) * ksp
->ks_ndata
;
683 if (ksp
->ks_type
== KSTAT_TYPE_NAMED
&& ksp
->ks_data_size
> namedsz
)
684 bcopy(ksp
->ks_data
, buf
, namedsz
);
686 bcopy(ksp
->ks_data
, buf
, ksp
->ks_data_size
);
689 * Apply kstat type-specific data massaging
691 switch (ksp
->ks_type
) {
695 * Normalize time units and deal with incomplete transactions
697 kiop
= (kstat_io_t
*)buf
;
699 scalehrtime(&kiop
->wtime
);
700 scalehrtime(&kiop
->wlentime
);
701 scalehrtime(&kiop
->wlastupdate
);
702 scalehrtime(&kiop
->rtime
);
703 scalehrtime(&kiop
->rlentime
);
704 scalehrtime(&kiop
->rlastupdate
);
706 if (kiop
->wcnt
!= 0) {
707 /* like kstat_waitq_exit */
708 hrtime_t wfix
= cur_time
- kiop
->wlastupdate
;
709 kiop
->wlastupdate
= cur_time
;
710 kiop
->wlentime
+= kiop
->wcnt
* wfix
;
714 if (kiop
->rcnt
!= 0) {
715 /* like kstat_runq_exit */
716 hrtime_t rfix
= cur_time
- kiop
->rlastupdate
;
717 kiop
->rlastupdate
= cur_time
;
718 kiop
->rlentime
+= kiop
->rcnt
* rfix
;
723 case KSTAT_TYPE_NAMED
:
725 * Massage any long strings in at the end of the buffer
727 if (ksp
->ks_data_size
> namedsz
) {
729 kstat_named_t
*knp
= buf
;
730 char *dst
= (char *)(knp
+ ksp
->ks_ndata
);
732 * Copy strings and update pointers
734 for (i
= 0; i
< ksp
->ks_ndata
; i
++, knp
++) {
735 if (knp
->data_type
== KSTAT_DATA_STRING
&&
736 KSTAT_NAMED_STR_PTR(knp
) != NULL
) {
737 bcopy(KSTAT_NAMED_STR_PTR(knp
), dst
,
738 KSTAT_NAMED_STR_BUFLEN(knp
));
739 KSTAT_NAMED_STR_PTR(knp
) = dst
;
740 dst
+= KSTAT_NAMED_STR_BUFLEN(knp
);
743 ASSERT(dst
<= ((char *)buf
+ ksp
->ks_data_size
));
751 header_kstat_update(kstat_t
*header_ksp
, int rw
)
755 avl_tree_t
*t
= &kstat_avl_bykid
;
758 if (rw
== KSTAT_WRITE
)
761 ASSERT(MUTEX_HELD(&kstat_chain_lock
));
763 zoneid
= getzoneid();
764 for (e
= avl_first(t
); e
!= NULL
; e
= avl_walk(t
, e
, AVL_AFTER
)) {
765 if (kstat_zone_find((kstat_t
*)e
, zoneid
) &&
766 (e
->e_ks
.ks_flags
& KSTAT_FLAG_INVALID
) == 0) {
770 header_ksp
->ks_ndata
= nkstats
;
771 header_ksp
->ks_data_size
= nkstats
* sizeof (kstat_t
);
776 * Copy out the data section of kstat 0, which consists of the list
777 * of all kstat headers. By specification, these headers must be
778 * copied out in order of increasing KID.
781 header_kstat_snapshot(kstat_t
*header_ksp
, void *buf
, int rw
)
784 avl_tree_t
*t
= &kstat_avl_bykid
;
787 header_ksp
->ks_snaptime
= gethrtime();
789 if (rw
== KSTAT_WRITE
)
792 ASSERT(MUTEX_HELD(&kstat_chain_lock
));
794 zoneid
= getzoneid();
795 for (e
= avl_first(t
); e
!= NULL
; e
= avl_walk(t
, e
, AVL_AFTER
)) {
796 if (kstat_zone_find((kstat_t
*)e
, zoneid
) &&
797 (e
->e_ks
.ks_flags
& KSTAT_FLAG_INVALID
) == 0) {
798 bcopy(&e
->e_ks
, buf
, sizeof (kstat_t
));
799 buf
= (char *)buf
+ sizeof (kstat_t
);
808 system_misc_kstat_update(kstat_t
*ksp
, int rw
)
811 int *loadavgp
= &avenrun
[0];
812 time_t zone_boot_time
;
814 hrtime_t zone_hrtime
;
817 if (rw
== KSTAT_WRITE
)
820 if (!INGLOBALZONE(curproc
)) {
822 * Here we grab cpu_lock which is OK as long as no-one in the
823 * future attempts to lookup this particular kstat
824 * (unix:0:system_misc) while holding cpu_lock.
826 mutex_enter(&cpu_lock
);
827 if (pool_pset_enabled()) {
828 myncpus
= zone_ncpus_get(curproc
->p_zone
);
831 mutex_exit(&cpu_lock
);
832 loadavgp
= &curproc
->p_zone
->zone_avenrun
[0];
835 if (INGLOBALZONE(curproc
)) {
836 zone_boot_time
= boot_time
;
837 zone_lbolt
= ddi_get_lbolt();
840 zone_boot_time
= curproc
->p_zone
->zone_boot_time
;
842 zone_hrtime
= gethrtime();
843 zone_lbolt
= (clock_t)(NSEC_TO_TICK(zone_hrtime
) -
844 NSEC_TO_TICK(curproc
->p_zone
->zone_zsched
->p_mstart
));
845 mutex_enter(&curproc
->p_zone
->zone_nlwps_lock
);
846 zone_nproc
= curproc
->p_zone
->zone_nprocs
;
847 mutex_exit(&curproc
->p_zone
->zone_nlwps_lock
);
850 system_misc_kstat
.ncpus
.value
.ui32
= (uint32_t)myncpus
;
851 system_misc_kstat
.lbolt
.value
.ui32
= (uint32_t)zone_lbolt
;
852 system_misc_kstat
.deficit
.value
.ui32
= (uint32_t)deficit
;
853 system_misc_kstat
.clk_intr
.value
.ui32
= (uint32_t)zone_lbolt
;
854 system_misc_kstat
.vac
.value
.ui32
= (uint32_t)vac
;
855 system_misc_kstat
.nproc
.value
.ui32
= (uint32_t)zone_nproc
;
856 system_misc_kstat
.avenrun_1min
.value
.ui32
= (uint32_t)loadavgp
[0];
857 system_misc_kstat
.avenrun_5min
.value
.ui32
= (uint32_t)loadavgp
[1];
858 system_misc_kstat
.avenrun_15min
.value
.ui32
= (uint32_t)loadavgp
[2];
859 system_misc_kstat
.boot_time
.value
.ui32
= (uint32_t)
861 system_misc_kstat
.nsec_per_tick
.value
.ui32
= (uint32_t)
867 extern caddr_t econtig32
;
869 extern caddr_t econtig
;
874 system_pages_kstat_update(kstat_t
*ksp
, int rw
)
876 kobj_stat_t kobj_stat
;
878 if (rw
== KSTAT_WRITE
) {
882 kobj_stat_get(&kobj_stat
);
883 system_pages_kstat
.physmem
.value
.ul
= (ulong_t
)physmem
;
884 system_pages_kstat
.nalloc
.value
.ul
= kobj_stat
.nalloc
;
885 system_pages_kstat
.nfree
.value
.ul
= kobj_stat
.nfree
;
886 system_pages_kstat
.nalloc_calls
.value
.ul
= kobj_stat
.nalloc_calls
;
887 system_pages_kstat
.nfree_calls
.value
.ul
= kobj_stat
.nfree_calls
;
888 system_pages_kstat
.kernelbase
.value
.ul
= (ulong_t
)KERNELBASE
;
892 * kstat should REALLY be modified to also report kmem64_base and
893 * kmem64_end (see sun4u/os/startup.c), as the virtual address range
894 * [ kernelbase .. econtig ] no longer is truly reflective of the
895 * kernel's vallocs...
897 system_pages_kstat
.econtig
.value
.ul
= (ulong_t
)econtig32
;
899 system_pages_kstat
.econtig
.value
.ul
= (ulong_t
)econtig
;
902 system_pages_kstat
.freemem
.value
.ul
= (ulong_t
)freemem
;
903 system_pages_kstat
.availrmem
.value
.ul
= (ulong_t
)availrmem
;
904 system_pages_kstat
.lotsfree
.value
.ul
= (ulong_t
)lotsfree
;
905 system_pages_kstat
.desfree
.value
.ul
= (ulong_t
)desfree
;
906 system_pages_kstat
.minfree
.value
.ul
= (ulong_t
)minfree
;
907 system_pages_kstat
.fastscan
.value
.ul
= (ulong_t
)fastscan
;
908 system_pages_kstat
.slowscan
.value
.ul
= (ulong_t
)slowscan
;
909 system_pages_kstat
.nscan
.value
.ul
= (ulong_t
)nscan
;
910 system_pages_kstat
.desscan
.value
.ul
= (ulong_t
)desscan
;
911 system_pages_kstat
.pagesfree
.value
.ul
= (ulong_t
)freemem
;
912 system_pages_kstat
.pageslocked
.value
.ul
= (ulong_t
)(availrmem_initial
-
914 system_pages_kstat
.pagestotal
.value
.ul
= (ulong_t
)total_pages
;
916 * pp_kernel represents total pages used by the kernel since the
917 * startup. This formula takes into account the boottime kernel
918 * footprint and also considers the availrmem changes because of
919 * user explicit page locking.
921 system_pages_kstat
.pp_kernel
.value
.ul
= (ulong_t
)(physinstalled
-
922 obp_pages
- availrmem
- k_anoninfo
.ani_mem_resv
-
923 anon_segkp_pages_locked
- pages_locked
-
924 pages_claimed
- pages_useclaim
);
930 kstat_create(const char *ks_module
, int ks_instance
, const char *ks_name
,
931 const char *ks_class
, uchar_t ks_type
, uint_t ks_ndata
, uchar_t ks_flags
)
933 return (kstat_create_zone(ks_module
, ks_instance
, ks_name
, ks_class
,
934 ks_type
, ks_ndata
, ks_flags
, ALL_ZONES
));
938 * Allocate and initialize a kstat structure. Or, if a dormant kstat with
939 * the specified name exists, reactivate it. Returns a pointer to the kstat
940 * on success, NULL on failure. The kstat will not be visible to the
941 * kstat driver until kstat_install().
944 kstat_create_zone(const char *ks_module
, int ks_instance
, const char *ks_name
,
945 const char *ks_class
, uchar_t ks_type
, uint_t ks_ndata
, uchar_t ks_flags
,
952 char namebuf
[KSTAT_STRLEN
+ 16];
954 if (avl_numnodes(&kstat_avl_bykid
) == 0) {
955 avl_create(&kstat_avl_bykid
, kstat_compare_bykid
,
956 sizeof (ekstat_t
), offsetof(struct ekstat
, e_avl_bykid
));
958 avl_create(&kstat_avl_byname
, kstat_compare_byname
,
959 sizeof (ekstat_t
), offsetof(struct ekstat
, e_avl_byname
));
963 * If ks_name == NULL, set the ks_name to <module><instance>.
965 if (ks_name
== NULL
) {
966 char buf
[KSTAT_STRLEN
];
967 kstat_set_string(buf
, ks_module
);
968 (void) sprintf(namebuf
, "%s%d", buf
, ks_instance
);
973 * Make sure it's a valid kstat data type
975 if (ks_type
>= KSTAT_NUM_TYPES
) {
976 cmn_err(CE_WARN
, "kstat_create('%s', %d, '%s'): "
977 "invalid kstat type %d",
978 ks_module
, ks_instance
, ks_name
, ks_type
);
983 * Don't allow persistent virtual kstats -- it makes no sense.
984 * ks_data points to garbage when the client goes away.
986 if ((ks_flags
& KSTAT_FLAG_PERSISTENT
) &&
987 (ks_flags
& KSTAT_FLAG_VIRTUAL
)) {
988 cmn_err(CE_WARN
, "kstat_create('%s', %d, '%s'): "
989 "cannot create persistent virtual kstat",
990 ks_module
, ks_instance
, ks_name
);
995 * Don't allow variable-size physical kstats, since the framework's
996 * memory allocation for physical kstat data is fixed at creation time.
998 if ((ks_flags
& KSTAT_FLAG_VAR_SIZE
) &&
999 !(ks_flags
& KSTAT_FLAG_VIRTUAL
)) {
1000 cmn_err(CE_WARN
, "kstat_create('%s', %d, '%s'): "
1001 "cannot create variable-size physical kstat",
1002 ks_module
, ks_instance
, ks_name
);
1007 * Make sure the number of data fields is within legal range
1009 if (ks_ndata
< kstat_data_type
[ks_type
].min_ndata
||
1010 ks_ndata
> kstat_data_type
[ks_type
].max_ndata
) {
1011 cmn_err(CE_WARN
, "kstat_create('%s', %d, '%s'): "
1012 "ks_ndata=%d out of range [%d, %d]",
1013 ks_module
, ks_instance
, ks_name
, (int)ks_ndata
,
1014 kstat_data_type
[ks_type
].min_ndata
,
1015 kstat_data_type
[ks_type
].max_ndata
);
1019 ks_data_size
= kstat_data_type
[ks_type
].size
* ks_ndata
;
1022 * If the named kstat already exists and is dormant, reactivate it.
1024 ksp
= kstat_hold_byname(ks_module
, ks_instance
, ks_name
, ks_zoneid
);
1026 if (!(ksp
->ks_flags
& KSTAT_FLAG_DORMANT
)) {
1028 * The named kstat exists but is not dormant --
1029 * this is a kstat namespace collision.
1033 "kstat_create('%s', %d, '%s'): namespace collision",
1034 ks_module
, ks_instance
, ks_name
);
1037 if ((strcmp(ksp
->ks_class
, ks_class
) != 0) ||
1038 (ksp
->ks_type
!= ks_type
) ||
1039 (ksp
->ks_ndata
!= ks_ndata
) ||
1040 (ks_flags
& KSTAT_FLAG_VIRTUAL
)) {
1042 * The name is the same, but the other key parameters
1043 * differ from those of the dormant kstat -- bogus.
1046 cmn_err(CE_WARN
, "kstat_create('%s', %d, '%s'): "
1047 "invalid reactivation of dormant kstat",
1048 ks_module
, ks_instance
, ks_name
);
1052 * Return dormant kstat pointer to caller. As usual,
1053 * the kstat is marked invalid until kstat_install().
1055 ksp
->ks_flags
|= KSTAT_FLAG_INVALID
;
1061 * Allocate memory for the new kstat header and, if this is a physical
1062 * kstat, the data section.
1064 e
= kstat_alloc(ks_flags
& KSTAT_FLAG_VIRTUAL
? 0 : ks_data_size
);
1066 cmn_err(CE_NOTE
, "kstat_create('%s', %d, '%s'): "
1067 "insufficient kernel memory",
1068 ks_module
, ks_instance
, ks_name
);
1073 * Initialize as many fields as we can. The caller may reset
1074 * ks_lock, ks_update, ks_private, and ks_snapshot as necessary.
1075 * Creators of virtual kstats may also reset ks_data. It is
1076 * also up to the caller to initialize the kstat data section,
1077 * if necessary. All initialization must be complete before
1078 * calling kstat_install().
1080 e
->e_zone
.zoneid
= ks_zoneid
;
1081 e
->e_zone
.next
= NULL
;
1084 ksp
->ks_crtime
= gethrtime();
1085 kstat_set_string(ksp
->ks_module
, ks_module
);
1086 ksp
->ks_instance
= ks_instance
;
1087 kstat_set_string(ksp
->ks_name
, ks_name
);
1088 ksp
->ks_type
= ks_type
;
1089 kstat_set_string(ksp
->ks_class
, ks_class
);
1090 ksp
->ks_flags
= ks_flags
| KSTAT_FLAG_INVALID
;
1091 if (ks_flags
& KSTAT_FLAG_VIRTUAL
)
1092 ksp
->ks_data
= NULL
;
1094 ksp
->ks_data
= (void *)(e
+ 1);
1095 ksp
->ks_ndata
= ks_ndata
;
1096 ksp
->ks_data_size
= ks_data_size
;
1097 ksp
->ks_snaptime
= ksp
->ks_crtime
;
1098 ksp
->ks_update
= default_kstat_update
;
1099 ksp
->ks_private
= NULL
;
1100 ksp
->ks_snapshot
= default_kstat_snapshot
;
1101 ksp
->ks_lock
= NULL
;
1103 mutex_enter(&kstat_chain_lock
);
1106 * Add our kstat to the AVL trees.
1108 if (avl_find(&kstat_avl_byname
, e
, &where
) != NULL
) {
1109 mutex_exit(&kstat_chain_lock
);
1111 "kstat_create('%s', %d, '%s'): namespace collision",
1112 ks_module
, ks_instance
, ks_name
);
1116 avl_insert(&kstat_avl_byname
, e
, where
);
1119 * Loop around until we find an unused KID.
1122 ksp
->ks_kid
= kstat_chain_id
++;
1123 } while (avl_find(&kstat_avl_bykid
, e
, &where
) != NULL
);
1124 avl_insert(&kstat_avl_bykid
, e
, where
);
1126 mutex_exit(&kstat_chain_lock
);
1132 * Activate a fully initialized kstat and make it visible to /dev/kstat.
1135 kstat_install(kstat_t
*ksp
)
1137 zoneid_t zoneid
= ((ekstat_t
*)ksp
)->e_zone
.zoneid
;
1140 * If this is a variable-size kstat, it MUST provide kstat data locking
1141 * to prevent data-size races with kstat readers.
1143 if ((ksp
->ks_flags
& KSTAT_FLAG_VAR_SIZE
) && ksp
->ks_lock
== NULL
) {
1144 panic("kstat_install('%s', %d, '%s'): "
1145 "cannot create variable-size kstat without data lock",
1146 ksp
->ks_module
, ksp
->ks_instance
, ksp
->ks_name
);
1149 if (kstat_hold_bykid(ksp
->ks_kid
, zoneid
) != ksp
) {
1150 cmn_err(CE_WARN
, "kstat_install(%p): does not exist",
1155 if (ksp
->ks_type
== KSTAT_TYPE_NAMED
&& ksp
->ks_data
!= NULL
) {
1157 kstat_named_t
*knp
= KSTAT_NAMED_PTR(ksp
);
1159 for (i
= 0; i
< ksp
->ks_ndata
; i
++, knp
++) {
1160 if (knp
->data_type
== KSTAT_DATA_STRING
) {
1161 ksp
->ks_flags
|= KSTAT_FLAG_LONGSTRINGS
;
1166 * The default snapshot routine does not handle KSTAT_WRITE
1169 if ((ksp
->ks_flags
& KSTAT_FLAG_LONGSTRINGS
) &&
1170 (ksp
->ks_flags
& KSTAT_FLAG_WRITABLE
) &&
1171 (ksp
->ks_snapshot
== default_kstat_snapshot
)) {
1172 panic("kstat_install('%s', %d, '%s'): "
1173 "named kstat containing KSTAT_DATA_STRING "
1174 "is writable but uses default snapshot routine",
1175 ksp
->ks_module
, ksp
->ks_instance
, ksp
->ks_name
);
1179 if (ksp
->ks_flags
& KSTAT_FLAG_DORMANT
) {
1182 * We are reactivating a dormant kstat. Initialize the
1183 * caller's underlying data to the value it had when the
1184 * kstat went dormant, and mark the kstat as active.
1185 * Grab the provider's kstat lock if it's not already held.
1187 kmutex_t
*lp
= ksp
->ks_lock
;
1188 if (lp
!= NULL
&& MUTEX_NOT_HELD(lp
)) {
1190 (void) KSTAT_UPDATE(ksp
, KSTAT_WRITE
);
1193 (void) KSTAT_UPDATE(ksp
, KSTAT_WRITE
);
1195 ksp
->ks_flags
&= ~KSTAT_FLAG_DORMANT
;
1199 * Now that the kstat is active, make it visible to the kstat driver.
1200 * When copying out kstats the count is determined in
1201 * header_kstat_update() and actually copied into kbuf in
1202 * header_kstat_snapshot(). kstat_chain_lock is held across the two
1203 * calls to ensure that this list doesn't change. Thus, we need to
1204 * also take the lock to ensure that the we don't copy the new kstat
1205 * in the 2nd pass and overrun the buf.
1207 mutex_enter(&kstat_chain_lock
);
1208 ksp
->ks_flags
&= ~KSTAT_FLAG_INVALID
;
1209 mutex_exit(&kstat_chain_lock
);
1214 * Remove a kstat from the system. Or, if it's a persistent kstat,
1215 * just update the data and mark it as dormant.
1218 kstat_delete(kstat_t
*ksp
)
1221 ekstat_t
*e
= (ekstat_t
*)ksp
;
1225 ASSERT(ksp
!= NULL
);
1230 zoneid
= e
->e_zone
.zoneid
;
1234 if (lp
!= NULL
&& MUTEX_HELD(lp
)) {
1235 panic("kstat_delete(%p): caller holds data lock %p",
1236 (void *)ksp
, (void *)lp
);
1239 if (kstat_hold_bykid(ksp
->ks_kid
, zoneid
) != ksp
) {
1240 cmn_err(CE_WARN
, "kstat_delete(%p): does not exist",
1245 if (ksp
->ks_flags
& KSTAT_FLAG_PERSISTENT
) {
1247 * Update the data one last time, so that all activity
1248 * prior to going dormant has been accounted for.
1251 (void) KSTAT_UPDATE(ksp
, KSTAT_READ
);
1255 * Mark the kstat as dormant and restore caller-modifiable
1256 * fields to default values, so the kstat is readable during
1257 * the dormant phase.
1259 ksp
->ks_flags
|= KSTAT_FLAG_DORMANT
;
1260 ksp
->ks_lock
= NULL
;
1261 ksp
->ks_update
= default_kstat_update
;
1262 ksp
->ks_private
= NULL
;
1263 ksp
->ks_snapshot
= default_kstat_snapshot
;
1269 * Remove the kstat from the framework's AVL trees,
1270 * free the allocated memory, and increment kstat_chain_id so
1271 * /dev/kstat clients can detect the event.
1273 mutex_enter(&kstat_chain_lock
);
1274 avl_remove(&kstat_avl_bykid
, e
);
1275 avl_remove(&kstat_avl_byname
, e
);
1277 mutex_exit(&kstat_chain_lock
);
1279 kz
= e
->e_zone
.next
;
1280 while (kz
!= NULL
) {
1281 kstat_zone_t
*t
= kz
;
1284 kmem_free(t
, sizeof (*t
));
1291 kstat_delete_byname_zone(const char *ks_module
, int ks_instance
,
1292 const char *ks_name
, zoneid_t ks_zoneid
)
1296 ksp
= kstat_hold_byname(ks_module
, ks_instance
, ks_name
, ks_zoneid
);
1304 kstat_delete_byname(const char *ks_module
, int ks_instance
, const char *ks_name
)
1306 kstat_delete_byname_zone(ks_module
, ks_instance
, ks_name
, ALL_ZONES
);
1310 * The sparc V9 versions of these routines can be much cheaper than
1311 * the poor 32-bit compiler can comprehend, so they're in sparcv9_subr.s.
1312 * For simplicity, however, we always feed the C versions to lint.
1314 #if !defined(__sparc) || defined(lint) || defined(__lint)
1317 kstat_waitq_enter(kstat_io_t
*kiop
)
1319 hrtime_t
new, delta
;
1322 new = gethrtime_unscaled();
1323 delta
= new - kiop
->wlastupdate
;
1324 kiop
->wlastupdate
= new;
1325 wcnt
= kiop
->wcnt
++;
1327 kiop
->wlentime
+= delta
* wcnt
;
1328 kiop
->wtime
+= delta
;
1333 kstat_waitq_exit(kstat_io_t
*kiop
)
1335 hrtime_t
new, delta
;
1338 new = gethrtime_unscaled();
1339 delta
= new - kiop
->wlastupdate
;
1340 kiop
->wlastupdate
= new;
1341 wcnt
= kiop
->wcnt
--;
1342 ASSERT((int)wcnt
> 0);
1343 kiop
->wlentime
+= delta
* wcnt
;
1344 kiop
->wtime
+= delta
;
1348 kstat_runq_enter(kstat_io_t
*kiop
)
1350 hrtime_t
new, delta
;
1353 new = gethrtime_unscaled();
1354 delta
= new - kiop
->rlastupdate
;
1355 kiop
->rlastupdate
= new;
1356 rcnt
= kiop
->rcnt
++;
1358 kiop
->rlentime
+= delta
* rcnt
;
1359 kiop
->rtime
+= delta
;
1364 kstat_runq_exit(kstat_io_t
*kiop
)
1366 hrtime_t
new, delta
;
1369 new = gethrtime_unscaled();
1370 delta
= new - kiop
->rlastupdate
;
1371 kiop
->rlastupdate
= new;
1372 rcnt
= kiop
->rcnt
--;
1373 ASSERT((int)rcnt
> 0);
1374 kiop
->rlentime
+= delta
* rcnt
;
1375 kiop
->rtime
+= delta
;
1379 kstat_waitq_to_runq(kstat_io_t
*kiop
)
1381 hrtime_t
new, delta
;
1384 new = gethrtime_unscaled();
1386 delta
= new - kiop
->wlastupdate
;
1387 kiop
->wlastupdate
= new;
1388 wcnt
= kiop
->wcnt
--;
1389 ASSERT((int)wcnt
> 0);
1390 kiop
->wlentime
+= delta
* wcnt
;
1391 kiop
->wtime
+= delta
;
1393 delta
= new - kiop
->rlastupdate
;
1394 kiop
->rlastupdate
= new;
1395 rcnt
= kiop
->rcnt
++;
1397 kiop
->rlentime
+= delta
* rcnt
;
1398 kiop
->rtime
+= delta
;
1403 kstat_runq_back_to_waitq(kstat_io_t
*kiop
)
1405 hrtime_t
new, delta
;
1408 new = gethrtime_unscaled();
1410 delta
= new - kiop
->rlastupdate
;
1411 kiop
->rlastupdate
= new;
1412 rcnt
= kiop
->rcnt
--;
1413 ASSERT((int)rcnt
> 0);
1414 kiop
->rlentime
+= delta
* rcnt
;
1415 kiop
->rtime
+= delta
;
1417 delta
= new - kiop
->wlastupdate
;
1418 kiop
->wlastupdate
= new;
1419 wcnt
= kiop
->wcnt
++;
1421 kiop
->wlentime
+= delta
* wcnt
;
1422 kiop
->wtime
+= delta
;
1429 kstat_timer_start(kstat_timer_t
*ktp
)
1431 ktp
->start_time
= gethrtime();
1435 kstat_timer_stop(kstat_timer_t
*ktp
)
1438 u_longlong_t num_events
;
1440 ktp
->stop_time
= etime
= gethrtime();
1441 etime
-= ktp
->start_time
;
1442 num_events
= ktp
->num_events
;
1443 if (etime
< ktp
->min_time
|| num_events
== 0)
1444 ktp
->min_time
= etime
;
1445 if (etime
> ktp
->max_time
)
1446 ktp
->max_time
= etime
;
1447 ktp
->elapsed_time
+= etime
;
1448 ktp
->num_events
= num_events
+ 1;