4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Copyright 2011 Joyent, Inc. All rights reserved.
30 #include <mdb/mdb_param.h>
31 #include <mdb/mdb_modapi.h>
32 #include <mdb/mdb_ctf.h>
33 #include <mdb/mdb_whatis.h>
34 #include <sys/cpuvar.h>
35 #include <sys/kmem_impl.h>
36 #include <sys/vmem_impl.h>
37 #include <sys/machelf.h>
38 #include <sys/modctl.h>
40 #include <sys/panic.h>
41 #include <sys/stack.h>
42 #include <sys/sysmacros.h>
51 #define dprintf(x) if (mdb_debug_level) { \
52 mdb_printf("kmem debug: "); \
57 #define KM_ALLOCATED 0x01
59 #define KM_BUFCTL 0x04
60 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */
63 static int mdb_debug_level
= 0;
67 kmem_init_walkers(uintptr_t addr
, const kmem_cache_t
*c
, void *ignored
)
72 (void) mdb_snprintf(descr
, sizeof (descr
),
73 "walk the %s cache", c
->cache_name
);
75 w
.walk_name
= c
->cache_name
;
77 w
.walk_init
= kmem_walk_init
;
78 w
.walk_step
= kmem_walk_step
;
79 w
.walk_fini
= kmem_walk_fini
;
80 w
.walk_init_arg
= (void *)addr
;
82 if (mdb_add_walker(&w
) == -1)
83 mdb_warn("failed to add %s walker", c
->cache_name
);
90 kmem_debug(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
94 mdb_printf("kmem: debugging is now %s\n",
95 mdb_debug_level
? "on" : "off");
101 kmem_cache_walk_init(mdb_walk_state_t
*wsp
)
105 if (mdb_lookup_by_name("kmem_caches", &sym
) == -1) {
106 mdb_warn("couldn't find kmem_caches");
110 wsp
->walk_addr
= (uintptr_t)sym
.st_value
;
112 return (list_walk_init_named(wsp
, "cache list", "cache"));
116 kmem_cpu_cache_walk_init(mdb_walk_state_t
*wsp
)
118 if (wsp
->walk_addr
== NULL
) {
119 mdb_warn("kmem_cpu_cache doesn't support global walks");
123 if (mdb_layered_walk("cpu", wsp
) == -1) {
124 mdb_warn("couldn't walk 'cpu'");
128 wsp
->walk_data
= (void *)wsp
->walk_addr
;
134 kmem_cpu_cache_walk_step(mdb_walk_state_t
*wsp
)
136 uintptr_t caddr
= (uintptr_t)wsp
->walk_data
;
137 const cpu_t
*cpu
= wsp
->walk_layer
;
140 caddr
+= OFFSETOF(kmem_cache_t
, cache_cpu
[cpu
->cpu_seqid
]);
142 if (mdb_vread(&cc
, sizeof (kmem_cpu_cache_t
), caddr
) == -1) {
143 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr
);
147 return (wsp
->walk_callback(caddr
, &cc
, wsp
->walk_cbdata
));
151 kmem_slab_check(void *p
, uintptr_t saddr
, void *arg
)
154 uintptr_t caddr
= (uintptr_t)arg
;
155 if ((uintptr_t)sp
->slab_cache
!= caddr
) {
156 mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
157 saddr
, caddr
, sp
->slab_cache
);
165 kmem_partial_slab_check(void *p
, uintptr_t saddr
, void *arg
)
169 int rc
= kmem_slab_check(p
, saddr
, arg
);
174 if (!KMEM_SLAB_IS_PARTIAL(sp
)) {
175 mdb_warn("slab %p is not a partial slab\n", saddr
);
183 kmem_complete_slab_check(void *p
, uintptr_t saddr
, void *arg
)
187 int rc
= kmem_slab_check(p
, saddr
, arg
);
192 if (!KMEM_SLAB_IS_ALL_USED(sp
)) {
193 mdb_warn("slab %p is not completely allocated\n", saddr
);
201 uintptr_t kns_cache_addr
;
206 kmem_nth_slab_check(void *p
, uintptr_t saddr
, void *arg
)
208 kmem_nth_slab_t
*chkp
= arg
;
210 int rc
= kmem_slab_check(p
, saddr
, (void *)chkp
->kns_cache_addr
);
215 return (chkp
->kns_nslabs
-- == 0 ? 1 : 0);
219 kmem_complete_slab_walk_init(mdb_walk_state_t
*wsp
)
221 uintptr_t caddr
= wsp
->walk_addr
;
223 wsp
->walk_addr
= (uintptr_t)(caddr
+
224 offsetof(kmem_cache_t
, cache_complete_slabs
));
226 return (list_walk_init_checked(wsp
, "slab list", "slab",
227 kmem_complete_slab_check
, (void *)caddr
));
231 kmem_partial_slab_walk_init(mdb_walk_state_t
*wsp
)
233 uintptr_t caddr
= wsp
->walk_addr
;
235 wsp
->walk_addr
= (uintptr_t)(caddr
+
236 offsetof(kmem_cache_t
, cache_partial_slabs
));
238 return (avl_walk_init_checked(wsp
, "slab list", "slab",
239 kmem_partial_slab_check
, (void *)caddr
));
243 kmem_slab_walk_init(mdb_walk_state_t
*wsp
)
245 uintptr_t caddr
= wsp
->walk_addr
;
248 mdb_warn("kmem_slab doesn't support global walks\n");
252 combined_walk_init(wsp
);
253 combined_walk_add(wsp
,
254 kmem_complete_slab_walk_init
, list_walk_step
, list_walk_fini
);
255 combined_walk_add(wsp
,
256 kmem_partial_slab_walk_init
, avl_walk_step
, avl_walk_fini
);
262 kmem_first_complete_slab_walk_init(mdb_walk_state_t
*wsp
)
264 uintptr_t caddr
= wsp
->walk_addr
;
265 kmem_nth_slab_t
*chk
;
267 chk
= mdb_alloc(sizeof (kmem_nth_slab_t
),
269 chk
->kns_cache_addr
= caddr
;
271 wsp
->walk_addr
= (uintptr_t)(caddr
+
272 offsetof(kmem_cache_t
, cache_complete_slabs
));
274 return (list_walk_init_checked(wsp
, "slab list", "slab",
275 kmem_nth_slab_check
, chk
));
279 kmem_slab_walk_partial_init(mdb_walk_state_t
*wsp
)
281 uintptr_t caddr
= wsp
->walk_addr
;
285 mdb_warn("kmem_slab_partial doesn't support global walks\n");
289 if (mdb_vread(&c
, sizeof (c
), caddr
) == -1) {
290 mdb_warn("couldn't read kmem_cache at %p", caddr
);
294 combined_walk_init(wsp
);
297 * Some consumers (umem_walk_step(), in particular) require at
298 * least one callback if there are any buffers in the cache. So
299 * if there are *no* partial slabs, report the first full slab, if
302 * Yes, this is ugly, but it's cleaner than the other possibilities.
304 if (c
.cache_partial_slabs
.avl_numnodes
== 0) {
305 combined_walk_add(wsp
, kmem_first_complete_slab_walk_init
,
306 list_walk_step
, list_walk_fini
);
308 combined_walk_add(wsp
, kmem_partial_slab_walk_init
,
309 avl_walk_step
, avl_walk_fini
);
316 kmem_cache(uintptr_t addr
, uint_t flags
, int ac
, const mdb_arg_t
*argv
)
319 const char *filter
= NULL
;
321 if (mdb_getopts(ac
, argv
,
322 'n', MDB_OPT_STR
, &filter
,
327 if (!(flags
& DCMD_ADDRSPEC
)) {
328 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac
, argv
) == -1) {
329 mdb_warn("can't walk kmem_cache");
335 if (DCMD_HDRSPEC(flags
))
336 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
337 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
339 if (mdb_vread(&c
, sizeof (c
), addr
) == -1) {
340 mdb_warn("couldn't read kmem_cache at %p", addr
);
344 if ((filter
!= NULL
) && (strstr(c
.cache_name
, filter
) == NULL
))
347 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr
, c
.cache_name
,
348 c
.cache_flags
, c
.cache_cflags
, c
.cache_bufsize
, c
.cache_buftotal
);
354 kmem_cache_help(void)
356 mdb_printf("%s", "Print kernel memory caches.\n\n");
358 mdb_printf("%<b>OPTIONS%</b>\n");
362 " name of kmem cache (or matching partial name)\n"
364 "Column\tDescription\n"
366 "ADDR\t\taddress of kmem cache\n"
367 "NAME\t\tname of kmem cache\n"
368 "FLAG\t\tvarious cache state flags\n"
369 "CFLAG\t\tcache creation flags\n"
370 "BUFSIZE\tobject size in bytes\n"
371 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
374 #define LABEL_WIDTH 11
376 kmem_slabs_print_dist(uint_t
*ks_bucket
, size_t buffers_per_slab
,
377 size_t maxbuckets
, size_t minbucketsize
)
382 const int *distarray
;
385 buckets
= buffers_per_slab
;
388 for (i
= 0; i
<= buffers_per_slab
; i
++)
389 total
+= ks_bucket
[i
];
392 buckets
= MIN(buckets
, maxbuckets
);
394 if (minbucketsize
> 1) {
396 * minbucketsize does not apply to the first bucket reserved
397 * for completely allocated slabs
399 buckets
= MIN(buckets
, 1 + ((buffers_per_slab
- 1) /
401 if ((buckets
< 2) && (buffers_per_slab
> 1)) {
403 minbucketsize
= (buffers_per_slab
- 1);
408 * The first printed bucket is reserved for completely allocated slabs.
409 * Passing (buckets - 1) excludes that bucket from the generated
410 * distribution, since we're handling it as a special case.
412 complete
[0] = buffers_per_slab
;
413 complete
[1] = buffers_per_slab
+ 1;
414 distarray
= dist_linear(buckets
- 1, 1, buffers_per_slab
- 1);
416 mdb_printf("%*s\n", LABEL_WIDTH
, "Allocated");
417 dist_print_header("Buffers", LABEL_WIDTH
, "Slabs");
419 dist_print_bucket(complete
, 0, ks_bucket
, total
, LABEL_WIDTH
);
421 * Print bucket ranges in descending order after the first bucket for
422 * completely allocated slabs, so a person can see immediately whether
423 * or not there is fragmentation without having to scan possibly
424 * multiple screens of output. Starting at (buckets - 2) excludes the
425 * extra terminating bucket.
427 for (i
= buckets
- 2; i
>= 0; i
--) {
428 dist_print_bucket(distarray
, i
, ks_bucket
, total
, LABEL_WIDTH
);
436 kmem_first_slab(uintptr_t addr
, const kmem_slab_t
*sp
, boolean_t
*is_slab
)
444 kmem_first_partial_slab(uintptr_t addr
, const kmem_slab_t
*sp
,
448 * The "kmem_partial_slab" walker reports the first full slab if there
449 * are no partial slabs (for the sake of consumers that require at least
450 * one callback if there are any buffers in the cache).
452 *is_slab
= KMEM_SLAB_IS_PARTIAL(sp
);
456 typedef struct kmem_slab_usage
{
457 int ksu_refcnt
; /* count of allocated buffers on slab */
458 boolean_t ksu_nomove
; /* slab marked non-reclaimable */
461 typedef struct kmem_slab_stats
{
462 const kmem_cache_t
*ks_cp
;
463 int ks_slabs
; /* slabs in cache */
464 int ks_partial_slabs
; /* partially allocated slabs in cache */
465 uint64_t ks_unused_buffers
; /* total unused buffers in cache */
466 int ks_max_buffers_per_slab
; /* max buffers per slab */
467 int ks_usage_len
; /* ks_usage array length */
468 kmem_slab_usage_t
*ks_usage
; /* partial slab usage */
469 uint_t
*ks_bucket
; /* slab usage distribution */
474 kmem_slablist_stat(uintptr_t addr
, const kmem_slab_t
*sp
,
475 kmem_slab_stats_t
*ks
)
477 kmem_slab_usage_t
*ksu
;
481 ks
->ks_bucket
[sp
->slab_refcnt
]++;
483 unused
= (sp
->slab_chunks
- sp
->slab_refcnt
);
488 ks
->ks_partial_slabs
++;
489 ks
->ks_unused_buffers
+= unused
;
491 if (ks
->ks_partial_slabs
> ks
->ks_usage_len
) {
492 kmem_slab_usage_t
*usage
;
493 int len
= ks
->ks_usage_len
;
495 len
= (len
== 0 ? 16 : len
* 2);
496 usage
= mdb_zalloc(len
* sizeof (kmem_slab_usage_t
), UM_SLEEP
);
497 if (ks
->ks_usage
!= NULL
) {
498 bcopy(ks
->ks_usage
, usage
,
499 ks
->ks_usage_len
* sizeof (kmem_slab_usage_t
));
500 mdb_free(ks
->ks_usage
,
501 ks
->ks_usage_len
* sizeof (kmem_slab_usage_t
));
503 ks
->ks_usage
= usage
;
504 ks
->ks_usage_len
= len
;
507 ksu
= &ks
->ks_usage
[ks
->ks_partial_slabs
- 1];
508 ksu
->ksu_refcnt
= sp
->slab_refcnt
;
509 ksu
->ksu_nomove
= (sp
->slab_flags
& KMEM_SLAB_NOMOVE
);
516 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
517 "", "", "Partial", "", "Unused", "");
518 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
519 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
520 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
521 "-------------------------", "--------", "--------", "---------",
522 "---------", "------");
526 kmem_slabs(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
529 kmem_slab_stats_t stats
;
533 size_t maxbuckets
= 1;
534 size_t minbucketsize
= 0;
535 const char *filter
= NULL
;
536 const char *name
= NULL
;
537 uint_t opt_v
= FALSE
;
538 boolean_t buckets
= B_FALSE
;
539 boolean_t skip
= B_FALSE
;
541 if (mdb_getopts(argc
, argv
,
542 'B', MDB_OPT_UINTPTR
, &minbucketsize
,
543 'b', MDB_OPT_UINTPTR
, &maxbuckets
,
544 'n', MDB_OPT_STR
, &filter
,
545 'N', MDB_OPT_STR
, &name
,
546 'v', MDB_OPT_SETBITS
, TRUE
, &opt_v
,
551 if ((maxbuckets
!= 1) || (minbucketsize
!= 0)) {
555 if (!(flags
& DCMD_ADDRSPEC
)) {
556 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc
,
558 mdb_warn("can't walk kmem_cache");
564 if (mdb_vread(&c
, sizeof (c
), addr
) == -1) {
565 mdb_warn("couldn't read kmem_cache at %p", addr
);
570 skip
= ((filter
!= NULL
) &&
571 (strstr(c
.cache_name
, filter
) == NULL
));
572 } else if (filter
== NULL
) {
573 skip
= (strcmp(c
.cache_name
, name
) != 0);
575 /* match either -n or -N */
576 skip
= ((strcmp(c
.cache_name
, name
) != 0) &&
577 (strstr(c
.cache_name
, filter
) == NULL
));
580 if (!(opt_v
|| buckets
) && DCMD_HDRSPEC(flags
)) {
582 } else if ((opt_v
|| buckets
) && !skip
) {
583 if (DCMD_HDRSPEC(flags
)) {
586 boolean_t is_slab
= B_FALSE
;
587 const char *walker_name
;
589 cb
= (mdb_walk_cb_t
)kmem_first_partial_slab
;
590 walker_name
= "kmem_slab_partial";
592 cb
= (mdb_walk_cb_t
)kmem_first_slab
;
593 walker_name
= "kmem_slab";
595 (void) mdb_pwalk(walker_name
, cb
, &is_slab
, addr
);
606 bzero(&stats
, sizeof (kmem_slab_stats_t
));
608 stats
.ks_max_buffers_per_slab
= c
.cache_maxchunks
;
609 /* +1 to include a zero bucket */
610 stats
.ks_bucket
= mdb_zalloc((stats
.ks_max_buffers_per_slab
+ 1) *
611 sizeof (*stats
.ks_bucket
), UM_SLEEP
);
612 cb
= (mdb_walk_cb_t
)kmem_slablist_stat
;
613 (void) mdb_pwalk("kmem_slab", cb
, &stats
, addr
);
615 if (c
.cache_buftotal
== 0) {
619 uint64_t n
= stats
.ks_unused_buffers
* 10000;
620 pct
= (int)(n
/ c
.cache_buftotal
);
621 tenths_pct
= pct
- ((pct
/ 100) * 100);
622 tenths_pct
= (tenths_pct
+ 5) / 10; /* round nearest tenth */
623 if (tenths_pct
== 10) {
630 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c
.cache_name
,
631 stats
.ks_slabs
, stats
.ks_partial_slabs
, c
.cache_buftotal
,
632 stats
.ks_unused_buffers
, pct
, tenths_pct
);
634 if (maxbuckets
== 0) {
635 maxbuckets
= stats
.ks_max_buffers_per_slab
;
638 if (((maxbuckets
> 1) || (minbucketsize
> 0)) &&
639 (stats
.ks_slabs
> 0)) {
641 kmem_slabs_print_dist(stats
.ks_bucket
,
642 stats
.ks_max_buffers_per_slab
, maxbuckets
, minbucketsize
);
645 mdb_free(stats
.ks_bucket
, (stats
.ks_max_buffers_per_slab
+ 1) *
646 sizeof (*stats
.ks_bucket
));
652 if (opt_v
&& (stats
.ks_partial_slabs
> 0)) {
654 kmem_slab_usage_t
*ksu
;
656 mdb_printf(" %d complete (%d), %d partial:",
657 (stats
.ks_slabs
- stats
.ks_partial_slabs
),
658 stats
.ks_max_buffers_per_slab
,
659 stats
.ks_partial_slabs
);
661 for (i
= 0; i
< stats
.ks_partial_slabs
; i
++) {
662 ksu
= &stats
.ks_usage
[i
];
663 mdb_printf(" %d%s", ksu
->ksu_refcnt
,
664 (ksu
->ksu_nomove
? "*" : ""));
669 if (stats
.ks_usage_len
> 0) {
670 mdb_free(stats
.ks_usage
,
671 stats
.ks_usage_len
* sizeof (kmem_slab_usage_t
));
678 kmem_slabs_help(void)
681 "Display slab usage per kmem cache.\n\n");
683 mdb_printf("%<b>OPTIONS%</b>\n");
687 " name of kmem cache (or matching partial name)\n"
689 " exact name of kmem cache\n"
691 " Print a distribution of allocated buffers per slab using at\n"
692 " most maxbins bins. The first bin is reserved for completely\n"
693 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
694 " effect as specifying the maximum allocated buffers per slab\n"
695 " or setting minbinsize to 1 (-B 1).\n"
697 " Print a distribution of allocated buffers per slab, making\n"
698 " all bins (except the first, reserved for completely allocated\n"
699 " slabs) at least minbinsize buffers apart.\n"
700 " -v verbose output: List the allocated buffer count of each partial\n"
701 " slab on the free list in order from front to back to show how\n"
702 " closely the slabs are ordered by usage. For example\n"
704 " 10 complete, 3 partial (8): 7 3 1\n"
706 " means there are thirteen slabs with eight buffers each, including\n"
707 " three partially allocated slabs with less than all eight buffers\n"
710 " Buffer allocations are always from the front of the partial slab\n"
711 " list. When a buffer is freed from a completely used slab, that\n"
712 " slab is added to the front of the partial slab list. Assuming\n"
713 " that all buffers are equally likely to be freed soon, the\n"
714 " desired order of partial slabs is most-used at the front of the\n"
715 " list and least-used at the back (as in the example above).\n"
716 " However, if a slab contains an allocated buffer that will not\n"
717 " soon be freed, it would be better for that slab to be at the\n"
718 " front where all of its buffers can be allocated. Taking a slab\n"
719 " off the partial slab list (either with all buffers freed or all\n"
720 " buffers allocated) reduces cache fragmentation.\n"
722 " A slab's allocated buffer count representing a partial slab (9 in\n"
723 " the example below) may be marked as follows:\n"
725 " 9* An asterisk indicates that kmem has marked the slab non-\n"
726 " reclaimable because the kmem client refused to move one of the\n"
727 " slab's buffers. Since kmem does not expect to completely free the\n"
728 " slab, it moves it to the front of the list in the hope of\n"
729 " completely allocating it instead. A slab marked with an asterisk\n"
730 " stays marked for as long as it remains on the partial slab list.\n"
732 "Column\t\tDescription\n"
734 "Cache Name\t\tname of kmem cache\n"
735 "Slabs\t\t\ttotal slab count\n"
736 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
737 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
738 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
739 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
740 "\t\t\t for accounting structures (debug mode), slab\n"
741 "\t\t\t coloring (incremental small offsets to stagger\n"
742 "\t\t\t buffer alignment), or the per-CPU magazine layer\n");
746 addrcmp(const void *lhs
, const void *rhs
)
748 uintptr_t p1
= *((uintptr_t *)lhs
);
749 uintptr_t p2
= *((uintptr_t *)rhs
);
759 bufctlcmp(const kmem_bufctl_audit_t
**lhs
, const kmem_bufctl_audit_t
**rhs
)
761 const kmem_bufctl_audit_t
*bcp1
= *lhs
;
762 const kmem_bufctl_audit_t
*bcp2
= *rhs
;
764 if (bcp1
->bc_timestamp
> bcp2
->bc_timestamp
)
767 if (bcp1
->bc_timestamp
< bcp2
->bc_timestamp
)
773 typedef struct kmem_hash_walk
{
774 uintptr_t *kmhw_table
;
777 kmem_bufctl_t kmhw_cur
;
781 kmem_hash_walk_init(mdb_walk_state_t
*wsp
)
783 kmem_hash_walk_t
*kmhw
;
786 uintptr_t haddr
, addr
= wsp
->walk_addr
;
791 mdb_warn("kmem_hash doesn't support global walks\n");
795 if (mdb_vread(&c
, sizeof (c
), addr
) == -1) {
796 mdb_warn("couldn't read cache at addr %p", addr
);
800 if (!(c
.cache_flags
& KMF_HASH
)) {
801 mdb_warn("cache %p doesn't have a hash table\n", addr
);
802 return (WALK_DONE
); /* nothing to do */
805 kmhw
= mdb_zalloc(sizeof (kmem_hash_walk_t
), UM_SLEEP
);
806 kmhw
->kmhw_cur
.bc_next
= NULL
;
809 kmhw
->kmhw_nelems
= nelems
= c
.cache_hash_mask
+ 1;
810 hsize
= nelems
* sizeof (uintptr_t);
811 haddr
= (uintptr_t)c
.cache_hash_table
;
813 kmhw
->kmhw_table
= hash
= mdb_alloc(hsize
, UM_SLEEP
);
814 if (mdb_vread(hash
, hsize
, haddr
) == -1) {
815 mdb_warn("failed to read hash table at %p", haddr
);
816 mdb_free(hash
, hsize
);
817 mdb_free(kmhw
, sizeof (kmem_hash_walk_t
));
821 wsp
->walk_data
= kmhw
;
827 kmem_hash_walk_step(mdb_walk_state_t
*wsp
)
829 kmem_hash_walk_t
*kmhw
= wsp
->walk_data
;
830 uintptr_t addr
= NULL
;
832 if ((addr
= (uintptr_t)kmhw
->kmhw_cur
.bc_next
) == NULL
) {
833 while (kmhw
->kmhw_pos
< kmhw
->kmhw_nelems
) {
834 if ((addr
= kmhw
->kmhw_table
[kmhw
->kmhw_pos
++]) != NULL
)
841 if (mdb_vread(&kmhw
->kmhw_cur
, sizeof (kmem_bufctl_t
), addr
) == -1) {
842 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr
);
846 return (wsp
->walk_callback(addr
, &kmhw
->kmhw_cur
, wsp
->walk_cbdata
));
850 kmem_hash_walk_fini(mdb_walk_state_t
*wsp
)
852 kmem_hash_walk_t
*kmhw
= wsp
->walk_data
;
857 mdb_free(kmhw
->kmhw_table
, kmhw
->kmhw_nelems
* sizeof (uintptr_t));
858 mdb_free(kmhw
, sizeof (kmem_hash_walk_t
));
862 * Find the address of the bufctl structure for the address 'buf' in cache
863 * 'cp', which is at address caddr, and place it in *out.
866 kmem_hash_lookup(kmem_cache_t
*cp
, uintptr_t caddr
, void *buf
, uintptr_t *out
)
868 uintptr_t bucket
= (uintptr_t)KMEM_HASH(cp
, buf
);
872 if (mdb_vread(&bcp
, sizeof (kmem_bufctl_t
*), bucket
) == -1) {
873 mdb_warn("unable to read hash bucket for %p in cache %p",
878 while (bcp
!= NULL
) {
879 if (mdb_vread(&bc
, sizeof (kmem_bufctl_t
),
880 (uintptr_t)bcp
) == -1) {
881 mdb_warn("unable to read bufctl at %p", bcp
);
884 if (bc
.bc_addr
== buf
) {
885 *out
= (uintptr_t)bcp
;
891 mdb_warn("unable to find bufctl for %p in cache %p\n", buf
, caddr
);
896 kmem_get_magsize(const kmem_cache_t
*cp
)
898 uintptr_t addr
= (uintptr_t)cp
->cache_magtype
;
904 * if cpu 0 has a non-zero magsize, it must be correct. caches
905 * with KMF_NOMAGAZINE have disabled their magazine layers, so
906 * it is okay to return 0 for them.
908 if ((res
= cp
->cache_cpu
[0].cc_magsize
) != 0 ||
909 (cp
->cache_flags
& KMF_NOMAGAZINE
))
912 if (mdb_lookup_by_name("kmem_magtype", &mt_sym
) == -1) {
913 mdb_warn("unable to read 'kmem_magtype'");
914 } else if (addr
< mt_sym
.st_value
||
915 addr
+ sizeof (mt
) - 1 > mt_sym
.st_value
+ mt_sym
.st_size
- 1 ||
916 ((addr
- mt_sym
.st_value
) % sizeof (mt
)) != 0) {
917 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
918 cp
->cache_name
, addr
);
921 if (mdb_vread(&mt
, sizeof (mt
), addr
) == -1) {
922 mdb_warn("unable to read magtype at %a", addr
);
925 return (mt
.mt_magsize
);
930 kmem_estimate_slab(uintptr_t addr
, const kmem_slab_t
*sp
, size_t *est
)
932 *est
-= (sp
->slab_chunks
- sp
->slab_refcnt
);
938 * Returns an upper bound on the number of allocated buffers in a given
942 kmem_estimate_allocated(uintptr_t addr
, const kmem_cache_t
*cp
)
947 cache_est
= cp
->cache_buftotal
;
949 (void) mdb_pwalk("kmem_slab_partial",
950 (mdb_walk_cb_t
)kmem_estimate_slab
, &cache_est
, addr
);
952 if ((magsize
= kmem_get_magsize(cp
)) != 0) {
953 size_t mag_est
= cp
->cache_full
.ml_total
* magsize
;
955 if (cache_est
>= mag_est
) {
956 cache_est
-= mag_est
;
958 mdb_warn("cache %p's magazine layer holds more buffers "
959 "than the slab layer.\n", addr
);
965 #define READMAG_ROUNDS(rounds) { \
966 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
967 mdb_warn("couldn't read magazine at %p", kmp); \
970 for (i = 0; i < rounds; i++) { \
971 maglist[magcnt++] = mp->mag_round[i]; \
972 if (magcnt == magmax) { \
973 mdb_warn("%d magazines exceeds fudge factor\n", \
981 kmem_read_magazines(kmem_cache_t
*cp
, uintptr_t addr
, int ncpus
,
982 void ***maglistp
, size_t *magcntp
, size_t *magmaxp
, int alloc_flags
)
984 kmem_magazine_t
*kmp
, *mp
;
985 void **maglist
= NULL
;
987 size_t magsize
, magmax
, magbsize
;
991 * Read the magtype out of the cache, after verifying the pointer's
994 magsize
= kmem_get_magsize(cp
);
1003 * There are several places where we need to go buffer hunting:
1004 * the per-CPU loaded magazine, the per-CPU spare full magazine,
1005 * and the full magazine list in the depot.
1007 * For an upper bound on the number of buffers in the magazine
1008 * layer, we have the number of magazines on the cache_full
1009 * list plus at most two magazines per CPU (the loaded and the
1010 * spare). Toss in 100 magazines as a fudge factor in case this
1011 * is live (the number "100" comes from the same fudge factor in
1014 magmax
= (cp
->cache_full
.ml_total
+ 2 * ncpus
+ 100) * magsize
;
1015 magbsize
= offsetof(kmem_magazine_t
, mag_round
[magsize
]);
1017 if (magbsize
>= PAGESIZE
/ 2) {
1018 mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1023 maglist
= mdb_alloc(magmax
* sizeof (void *), alloc_flags
);
1024 mp
= mdb_alloc(magbsize
, alloc_flags
);
1025 if (mp
== NULL
|| maglist
== NULL
)
1029 * First up: the magazines in the depot (i.e. on the cache_full list).
1031 for (kmp
= cp
->cache_full
.ml_list
; kmp
!= NULL
; ) {
1032 READMAG_ROUNDS(magsize
);
1035 if (kmp
== cp
->cache_full
.ml_list
)
1036 break; /* cache_full list loop detected */
1039 dprintf(("cache_full list done\n"));
1042 * Now whip through the CPUs, snagging the loaded magazines
1045 * In order to prevent inconsistent dumps, rounds and prounds
1046 * are copied aside before dumping begins.
1048 for (cpu
= 0; cpu
< ncpus
; cpu
++) {
1049 kmem_cpu_cache_t
*ccp
= &cp
->cache_cpu
[cpu
];
1050 short rounds
, prounds
;
1052 if (KMEM_DUMPCC(ccp
)) {
1053 rounds
= ccp
->cc_dump_rounds
;
1054 prounds
= ccp
->cc_dump_prounds
;
1056 rounds
= ccp
->cc_rounds
;
1057 prounds
= ccp
->cc_prounds
;
1060 dprintf(("reading cpu cache %p\n",
1061 (uintptr_t)ccp
- (uintptr_t)cp
+ addr
));
1064 (kmp
= ccp
->cc_loaded
) != NULL
) {
1065 dprintf(("reading %d loaded rounds\n", rounds
));
1066 READMAG_ROUNDS(rounds
);
1070 (kmp
= ccp
->cc_ploaded
) != NULL
) {
1071 dprintf(("reading %d previously loaded rounds\n",
1073 READMAG_ROUNDS(prounds
);
1077 dprintf(("magazine layer: %d buffers\n", magcnt
));
1079 if (!(alloc_flags
& UM_GC
))
1080 mdb_free(mp
, magbsize
);
1082 *maglistp
= maglist
;
1089 if (!(alloc_flags
& UM_GC
)) {
1091 mdb_free(mp
, magbsize
);
1093 mdb_free(maglist
, magmax
* sizeof (void *));
1099 kmem_walk_callback(mdb_walk_state_t
*wsp
, uintptr_t buf
)
1101 return (wsp
->walk_callback(buf
, NULL
, wsp
->walk_cbdata
));
1105 bufctl_walk_callback(kmem_cache_t
*cp
, mdb_walk_state_t
*wsp
, uintptr_t buf
)
1107 kmem_bufctl_audit_t b
;
1110 * if KMF_AUDIT is not set, we know that we're looking at a
1113 if (!(cp
->cache_flags
& KMF_AUDIT
) ||
1114 mdb_vread(&b
, sizeof (kmem_bufctl_audit_t
), buf
) == -1) {
1115 (void) memset(&b
, 0, sizeof (b
));
1116 if (mdb_vread(&b
, sizeof (kmem_bufctl_t
), buf
) == -1) {
1117 mdb_warn("unable to read bufctl at %p", buf
);
1122 return (wsp
->walk_callback(buf
, &b
, wsp
->walk_cbdata
));
1125 typedef struct kmem_walk
{
1128 uintptr_t kmw_addr
; /* cache address */
1129 kmem_cache_t
*kmw_cp
;
1143 char *kmw_valid
; /* to keep track of freed buffers */
1144 char *kmw_ubase
; /* buffer for slab data */
1148 kmem_walk_init_common(mdb_walk_state_t
*wsp
, int type
)
1155 size_t magmax
, magcnt
;
1156 void **maglist
= NULL
;
1157 uint_t chunksize
, slabsize
;
1158 int status
= WALK_ERR
;
1159 uintptr_t addr
= wsp
->walk_addr
;
1160 const char *layered
;
1165 mdb_warn("kmem walk doesn't support global walks\n");
1169 dprintf(("walking %p\n", addr
));
1172 * First we need to figure out how many CPUs are configured in the
1173 * system to know how much to slurp out.
1175 mdb_readvar(&ncpus
, "max_ncpus");
1177 csize
= KMEM_CACHE_SIZE(ncpus
);
1178 cp
= mdb_alloc(csize
, UM_SLEEP
);
1180 if (mdb_vread(cp
, csize
, addr
) == -1) {
1181 mdb_warn("couldn't read cache at addr %p", addr
);
1186 * It's easy for someone to hand us an invalid cache address.
1187 * Unfortunately, it is hard for this walker to survive an
1188 * invalid cache cleanly. So we make sure that:
1190 * 1. the vmem arena for the cache is readable,
1191 * 2. the vmem arena's quantum is a power of 2,
1192 * 3. our slabsize is a multiple of the quantum, and
1193 * 4. our chunksize is >0 and less than our slabsize.
1195 if (mdb_vread(&vm_quantum
, sizeof (vm_quantum
),
1196 (uintptr_t)&cp
->cache_arena
->vm_quantum
) == -1 ||
1198 (vm_quantum
& (vm_quantum
- 1)) != 0 ||
1199 cp
->cache_slabsize
< vm_quantum
||
1200 P2PHASE(cp
->cache_slabsize
, vm_quantum
) != 0 ||
1201 cp
->cache_chunksize
== 0 ||
1202 cp
->cache_chunksize
> cp
->cache_slabsize
) {
1203 mdb_warn("%p is not a valid kmem_cache_t\n", addr
);
1207 dprintf(("buf total is %d\n", cp
->cache_buftotal
));
1209 if (cp
->cache_buftotal
== 0) {
1210 mdb_free(cp
, csize
);
1215 * If they ask for bufctls, but it's a small-slab cache,
1216 * there is nothing to report.
1218 if ((type
& KM_BUFCTL
) && !(cp
->cache_flags
& KMF_HASH
)) {
1219 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1221 mdb_free(cp
, csize
);
1226 * If they want constructed buffers, but there's no constructor or
1227 * the cache has DEADBEEF checking enabled, there is nothing to report.
1229 if ((type
& KM_CONSTRUCTED
) && (!(type
& KM_FREE
) ||
1230 cp
->cache_constructor
== NULL
||
1231 (cp
->cache_flags
& (KMF_DEADBEEF
| KMF_LITE
)) == KMF_DEADBEEF
)) {
1232 mdb_free(cp
, csize
);
1237 * Read in the contents of the magazine layer
1239 if (kmem_read_magazines(cp
, addr
, ncpus
, &maglist
, &magcnt
,
1240 &magmax
, UM_SLEEP
) == WALK_ERR
)
1244 * We have all of the buffers from the magazines; if we are walking
1245 * allocated buffers, sort them so we can bsearch them later.
1247 if (type
& KM_ALLOCATED
)
1248 qsort(maglist
, magcnt
, sizeof (void *), addrcmp
);
1250 wsp
->walk_data
= kmw
= mdb_zalloc(sizeof (kmem_walk_t
), UM_SLEEP
);
1252 kmw
->kmw_type
= type
;
1253 kmw
->kmw_addr
= addr
;
1255 kmw
->kmw_csize
= csize
;
1256 kmw
->kmw_maglist
= maglist
;
1257 kmw
->kmw_max
= magmax
;
1258 kmw
->kmw_count
= magcnt
;
1262 * When walking allocated buffers in a KMF_HASH cache, we walk the
1263 * hash table instead of the slab layer.
1265 if ((cp
->cache_flags
& KMF_HASH
) && (type
& KM_ALLOCATED
)) {
1266 layered
= "kmem_hash";
1268 kmw
->kmw_type
|= KM_HASH
;
1271 * If we are walking freed buffers, we only need the
1272 * magazine layer plus the partially allocated slabs.
1273 * To walk allocated buffers, we need all of the slabs.
1275 if (type
& KM_ALLOCATED
)
1276 layered
= "kmem_slab";
1278 layered
= "kmem_slab_partial";
1281 * for small-slab caches, we read in the entire slab. For
1282 * freed buffers, we can just walk the freelist. For
1283 * allocated buffers, we use a 'valid' array to track
1284 * the freed buffers.
1286 if (!(cp
->cache_flags
& KMF_HASH
)) {
1287 chunksize
= cp
->cache_chunksize
;
1288 slabsize
= cp
->cache_slabsize
;
1290 kmw
->kmw_ubase
= mdb_alloc(slabsize
+
1291 sizeof (kmem_bufctl_t
), UM_SLEEP
);
1293 if (type
& KM_ALLOCATED
)
1295 mdb_alloc(slabsize
/ chunksize
, UM_SLEEP
);
1301 if (mdb_layered_walk(layered
, wsp
) == -1) {
1302 mdb_warn("unable to start layered '%s' walk", layered
);
1307 if (status
== WALK_ERR
) {
1309 mdb_free(kmw
->kmw_valid
, slabsize
/ chunksize
);
1312 mdb_free(kmw
->kmw_ubase
, slabsize
+
1313 sizeof (kmem_bufctl_t
));
1315 if (kmw
->kmw_maglist
)
1316 mdb_free(kmw
->kmw_maglist
,
1317 kmw
->kmw_max
* sizeof (uintptr_t));
1319 mdb_free(kmw
, sizeof (kmem_walk_t
));
1320 wsp
->walk_data
= NULL
;
1324 if (status
== WALK_ERR
)
1325 mdb_free(cp
, csize
);
1331 kmem_walk_step(mdb_walk_state_t
*wsp
)
1333 kmem_walk_t
*kmw
= wsp
->walk_data
;
1334 int type
= kmw
->kmw_type
;
1335 kmem_cache_t
*cp
= kmw
->kmw_cp
;
1337 void **maglist
= kmw
->kmw_maglist
;
1338 int magcnt
= kmw
->kmw_count
;
1340 uintptr_t chunksize
, slabsize
;
1342 const kmem_slab_t
*sp
;
1343 const kmem_bufctl_t
*bcp
;
1351 char *valid
, *ubase
;
1354 * first, handle the 'kmem_hash' layered walk case
1356 if (type
& KM_HASH
) {
1358 * We have a buffer which has been allocated out of the
1359 * global layer. We need to make sure that it's not
1360 * actually sitting in a magazine before we report it as
1361 * an allocated buffer.
1363 buf
= ((const kmem_bufctl_t
*)wsp
->walk_layer
)->bc_addr
;
1366 bsearch(&buf
, maglist
, magcnt
, sizeof (void *),
1370 if (type
& KM_BUFCTL
)
1371 return (bufctl_walk_callback(cp
, wsp
, wsp
->walk_addr
));
1373 return (kmem_walk_callback(wsp
, (uintptr_t)buf
));
1378 addr
= kmw
->kmw_addr
;
1381 * If we're walking freed buffers, report everything in the
1382 * magazine layer before processing the first slab.
1384 if ((type
& KM_FREE
) && magcnt
!= 0) {
1385 kmw
->kmw_count
= 0; /* only do this once */
1386 for (i
= 0; i
< magcnt
; i
++) {
1389 if (type
& KM_BUFCTL
) {
1392 if (cp
->cache_flags
& KMF_BUFTAG
) {
1396 /* LINTED - alignment */
1397 btp
= KMEM_BUFTAG(cp
, buf
);
1398 if (mdb_vread(&tag
, sizeof (tag
),
1399 (uintptr_t)btp
) == -1) {
1400 mdb_warn("reading buftag for "
1401 "%p at %p", buf
, btp
);
1404 out
= (uintptr_t)tag
.bt_bufctl
;
1406 if (kmem_hash_lookup(cp
, addr
, buf
,
1410 ret
= bufctl_walk_callback(cp
, wsp
, out
);
1412 ret
= kmem_walk_callback(wsp
, (uintptr_t)buf
);
1415 if (ret
!= WALK_NEXT
)
1421 * If they want constructed buffers, we're finished, since the
1422 * magazine layer holds them all.
1424 if (type
& KM_CONSTRUCTED
)
1428 * Handle the buffers in the current slab
1430 chunksize
= cp
->cache_chunksize
;
1431 slabsize
= cp
->cache_slabsize
;
1433 sp
= wsp
->walk_layer
;
1434 chunks
= sp
->slab_chunks
;
1435 kbase
= sp
->slab_base
;
1437 dprintf(("kbase is %p\n", kbase
));
1439 if (!(cp
->cache_flags
& KMF_HASH
)) {
1440 valid
= kmw
->kmw_valid
;
1441 ubase
= kmw
->kmw_ubase
;
1443 if (mdb_vread(ubase
, chunks
* chunksize
,
1444 (uintptr_t)kbase
) == -1) {
1445 mdb_warn("failed to read slab contents at %p", kbase
);
1450 * Set up the valid map as fully allocated -- we'll punch
1453 if (type
& KM_ALLOCATED
)
1454 (void) memset(valid
, 1, chunks
);
1461 * walk the slab's freelist
1463 bcp
= sp
->slab_head
;
1465 dprintf(("refcnt is %d; chunks is %d\n", sp
->slab_refcnt
, chunks
));
1468 * since we could be in the middle of allocating a buffer,
1469 * our refcnt could be one higher than it aught. So we
1470 * check one further on the freelist than the count allows.
1472 for (i
= sp
->slab_refcnt
; i
<= chunks
; i
++) {
1475 dprintf(("bcp is %p\n", bcp
));
1481 "slab %p in cache %p freelist too short by %d\n",
1482 sp
, addr
, chunks
- i
);
1486 if (cp
->cache_flags
& KMF_HASH
) {
1487 if (mdb_vread(&bc
, sizeof (bc
), (uintptr_t)bcp
) == -1) {
1488 mdb_warn("failed to read bufctl ptr at %p",
1495 * Otherwise the buffer is (or should be) in the slab
1496 * that we've read in; determine its offset in the
1497 * slab, validate that it's not corrupt, and add to
1498 * our base address to find the umem_bufctl_t. (Note
1499 * that we don't need to add the size of the bufctl
1500 * to our offset calculation because of the slop that's
1501 * allocated for the buffer at ubase.)
1503 uintptr_t offs
= (uintptr_t)bcp
- (uintptr_t)kbase
;
1505 if (offs
> chunks
* chunksize
) {
1506 mdb_warn("found corrupt bufctl ptr %p"
1507 " in slab %p in cache %p\n", bcp
,
1508 wsp
->walk_addr
, addr
);
1512 bc
= *((kmem_bufctl_t
*)((uintptr_t)ubase
+ offs
));
1513 buf
= KMEM_BUF(cp
, bcp
);
1516 ndx
= ((uintptr_t)buf
- (uintptr_t)kbase
) / chunksize
;
1518 if (ndx
> slabsize
/ cp
->cache_bufsize
) {
1520 * This is very wrong; we have managed to find
1521 * a buffer in the slab which shouldn't
1522 * actually be here. Emit a warning, and
1525 mdb_warn("buf %p is out of range for "
1526 "slab %p, cache %p\n", buf
, sp
, addr
);
1527 } else if (type
& KM_ALLOCATED
) {
1529 * we have found a buffer on the slab's freelist;
1535 * Report this freed buffer
1537 if (type
& KM_BUFCTL
) {
1538 ret
= bufctl_walk_callback(cp
, wsp
,
1541 ret
= kmem_walk_callback(wsp
, (uintptr_t)buf
);
1543 if (ret
!= WALK_NEXT
)
1551 dprintf(("slab %p in cache %p freelist too long (%p)\n",
1556 * If we are walking freed buffers, the loop above handled reporting
1562 if (type
& KM_BUFCTL
) {
1563 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1564 "cache %p\n", addr
);
1569 * Report allocated buffers, skipping buffers in the magazine layer.
1570 * We only get this far for small-slab caches.
1572 for (i
= 0; ret
== WALK_NEXT
&& i
< chunks
; i
++) {
1573 buf
= (char *)kbase
+ i
* chunksize
;
1576 continue; /* on slab freelist */
1579 bsearch(&buf
, maglist
, magcnt
, sizeof (void *),
1581 continue; /* in magazine layer */
1583 ret
= kmem_walk_callback(wsp
, (uintptr_t)buf
);
1589 kmem_walk_fini(mdb_walk_state_t
*wsp
)
1591 kmem_walk_t
*kmw
= wsp
->walk_data
;
1592 uintptr_t chunksize
;
1598 if (kmw
->kmw_maglist
!= NULL
)
1599 mdb_free(kmw
->kmw_maglist
, kmw
->kmw_max
* sizeof (void *));
1601 chunksize
= kmw
->kmw_cp
->cache_chunksize
;
1602 slabsize
= kmw
->kmw_cp
->cache_slabsize
;
1604 if (kmw
->kmw_valid
!= NULL
)
1605 mdb_free(kmw
->kmw_valid
, slabsize
/ chunksize
);
1606 if (kmw
->kmw_ubase
!= NULL
)
1607 mdb_free(kmw
->kmw_ubase
, slabsize
+ sizeof (kmem_bufctl_t
));
1609 mdb_free(kmw
->kmw_cp
, kmw
->kmw_csize
);
1610 mdb_free(kmw
, sizeof (kmem_walk_t
));
1615 kmem_walk_all(uintptr_t addr
, const kmem_cache_t
*c
, mdb_walk_state_t
*wsp
)
1618 * Buffers allocated from NOTOUCH caches can also show up as freed
1619 * memory in other caches. This can be a little confusing, so we
1620 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1621 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1623 if (c
->cache_cflags
& KMC_NOTOUCH
)
1626 if (mdb_pwalk(wsp
->walk_data
, wsp
->walk_callback
,
1627 wsp
->walk_cbdata
, addr
) == -1)
1633 #define KMEM_WALK_ALL(name, wsp) { \
1634 wsp->walk_data = (name); \
1635 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1636 return (WALK_ERR); \
1637 return (WALK_DONE); \
1641 kmem_walk_init(mdb_walk_state_t
*wsp
)
1643 if (wsp
->walk_arg
!= NULL
)
1644 wsp
->walk_addr
= (uintptr_t)wsp
->walk_arg
;
1646 if (wsp
->walk_addr
== NULL
)
1647 KMEM_WALK_ALL("kmem", wsp
);
1648 return (kmem_walk_init_common(wsp
, KM_ALLOCATED
));
1652 bufctl_walk_init(mdb_walk_state_t
*wsp
)
1654 if (wsp
->walk_addr
== NULL
)
1655 KMEM_WALK_ALL("bufctl", wsp
);
1656 return (kmem_walk_init_common(wsp
, KM_ALLOCATED
| KM_BUFCTL
));
1660 freemem_walk_init(mdb_walk_state_t
*wsp
)
1662 if (wsp
->walk_addr
== NULL
)
1663 KMEM_WALK_ALL("freemem", wsp
);
1664 return (kmem_walk_init_common(wsp
, KM_FREE
));
1668 freemem_constructed_walk_init(mdb_walk_state_t
*wsp
)
1670 if (wsp
->walk_addr
== NULL
)
1671 KMEM_WALK_ALL("freemem_constructed", wsp
);
1672 return (kmem_walk_init_common(wsp
, KM_FREE
| KM_CONSTRUCTED
));
1676 freectl_walk_init(mdb_walk_state_t
*wsp
)
1678 if (wsp
->walk_addr
== NULL
)
1679 KMEM_WALK_ALL("freectl", wsp
);
1680 return (kmem_walk_init_common(wsp
, KM_FREE
| KM_BUFCTL
));
1684 freectl_constructed_walk_init(mdb_walk_state_t
*wsp
)
1686 if (wsp
->walk_addr
== NULL
)
1687 KMEM_WALK_ALL("freectl_constructed", wsp
);
1688 return (kmem_walk_init_common(wsp
,
1689 KM_FREE
| KM_BUFCTL
| KM_CONSTRUCTED
));
1692 typedef struct bufctl_history_walk
{
1694 kmem_cache_t
*bhw_cache
;
1695 kmem_slab_t
*bhw_slab
;
1696 hrtime_t bhw_timestamp
;
1697 } bufctl_history_walk_t
;
1700 bufctl_history_walk_init(mdb_walk_state_t
*wsp
)
1702 bufctl_history_walk_t
*bhw
;
1703 kmem_bufctl_audit_t bc
;
1704 kmem_bufctl_audit_t bcn
;
1706 if (wsp
->walk_addr
== NULL
) {
1707 mdb_warn("bufctl_history walk doesn't support global walks\n");
1711 if (mdb_vread(&bc
, sizeof (bc
), wsp
->walk_addr
) == -1) {
1712 mdb_warn("unable to read bufctl at %p", wsp
->walk_addr
);
1716 bhw
= mdb_zalloc(sizeof (*bhw
), UM_SLEEP
);
1717 bhw
->bhw_timestamp
= 0;
1718 bhw
->bhw_cache
= bc
.bc_cache
;
1719 bhw
->bhw_slab
= bc
.bc_slab
;
1722 * sometimes the first log entry matches the base bufctl; in that
1723 * case, skip the base bufctl.
1725 if (bc
.bc_lastlog
!= NULL
&&
1726 mdb_vread(&bcn
, sizeof (bcn
), (uintptr_t)bc
.bc_lastlog
) != -1 &&
1727 bc
.bc_addr
== bcn
.bc_addr
&&
1728 bc
.bc_cache
== bcn
.bc_cache
&&
1729 bc
.bc_slab
== bcn
.bc_slab
&&
1730 bc
.bc_timestamp
== bcn
.bc_timestamp
&&
1731 bc
.bc_thread
== bcn
.bc_thread
)
1732 bhw
->bhw_next
= bc
.bc_lastlog
;
1734 bhw
->bhw_next
= (void *)wsp
->walk_addr
;
1736 wsp
->walk_addr
= (uintptr_t)bc
.bc_addr
;
1737 wsp
->walk_data
= bhw
;
1743 bufctl_history_walk_step(mdb_walk_state_t
*wsp
)
1745 bufctl_history_walk_t
*bhw
= wsp
->walk_data
;
1746 uintptr_t addr
= (uintptr_t)bhw
->bhw_next
;
1747 uintptr_t baseaddr
= wsp
->walk_addr
;
1748 kmem_bufctl_audit_t bc
;
1753 if (mdb_vread(&bc
, sizeof (bc
), addr
) == -1) {
1754 mdb_warn("unable to read bufctl at %p", bhw
->bhw_next
);
1759 * The bufctl is only valid if the address, cache, and slab are
1760 * correct. We also check that the timestamp is decreasing, to
1761 * prevent infinite loops.
1763 if ((uintptr_t)bc
.bc_addr
!= baseaddr
||
1764 bc
.bc_cache
!= bhw
->bhw_cache
||
1765 bc
.bc_slab
!= bhw
->bhw_slab
||
1766 (bhw
->bhw_timestamp
!= 0 && bc
.bc_timestamp
>= bhw
->bhw_timestamp
))
1769 bhw
->bhw_next
= bc
.bc_lastlog
;
1770 bhw
->bhw_timestamp
= bc
.bc_timestamp
;
1772 return (wsp
->walk_callback(addr
, &bc
, wsp
->walk_cbdata
));
1776 bufctl_history_walk_fini(mdb_walk_state_t
*wsp
)
1778 bufctl_history_walk_t
*bhw
= wsp
->walk_data
;
1780 mdb_free(bhw
, sizeof (*bhw
));
1783 typedef struct kmem_log_walk
{
1784 kmem_bufctl_audit_t
*klw_base
;
1785 kmem_bufctl_audit_t
**klw_sorted
;
1786 kmem_log_header_t klw_lh
;
1793 kmem_log_walk_init(mdb_walk_state_t
*wsp
)
1795 uintptr_t lp
= wsp
->walk_addr
;
1796 kmem_log_walk_t
*klw
;
1797 kmem_log_header_t
*lhp
;
1798 int maxndx
, i
, j
, k
;
1801 * By default (global walk), walk the kmem_transaction_log. Otherwise
1802 * read the log whose kmem_log_header_t is stored at walk_addr.
1804 if (lp
== NULL
&& mdb_readvar(&lp
, "kmem_transaction_log") == -1) {
1805 mdb_warn("failed to read 'kmem_transaction_log'");
1810 mdb_warn("log is disabled\n");
1814 klw
= mdb_zalloc(sizeof (kmem_log_walk_t
), UM_SLEEP
);
1817 if (mdb_vread(lhp
, sizeof (kmem_log_header_t
), lp
) == -1) {
1818 mdb_warn("failed to read log header at %p", lp
);
1819 mdb_free(klw
, sizeof (kmem_log_walk_t
));
1823 klw
->klw_size
= lhp
->lh_chunksize
* lhp
->lh_nchunks
;
1824 klw
->klw_base
= mdb_alloc(klw
->klw_size
, UM_SLEEP
);
1825 maxndx
= lhp
->lh_chunksize
/ sizeof (kmem_bufctl_audit_t
) - 1;
1827 if (mdb_vread(klw
->klw_base
, klw
->klw_size
,
1828 (uintptr_t)lhp
->lh_base
) == -1) {
1829 mdb_warn("failed to read log at base %p", lhp
->lh_base
);
1830 mdb_free(klw
->klw_base
, klw
->klw_size
);
1831 mdb_free(klw
, sizeof (kmem_log_walk_t
));
1835 klw
->klw_sorted
= mdb_alloc(maxndx
* lhp
->lh_nchunks
*
1836 sizeof (kmem_bufctl_audit_t
*), UM_SLEEP
);
1838 for (i
= 0, k
= 0; i
< lhp
->lh_nchunks
; i
++) {
1839 kmem_bufctl_audit_t
*chunk
= (kmem_bufctl_audit_t
*)
1840 ((uintptr_t)klw
->klw_base
+ i
* lhp
->lh_chunksize
);
1842 for (j
= 0; j
< maxndx
; j
++)
1843 klw
->klw_sorted
[k
++] = &chunk
[j
];
1846 qsort(klw
->klw_sorted
, k
, sizeof (kmem_bufctl_audit_t
*),
1847 (int(*)(const void *, const void *))bufctlcmp
);
1849 klw
->klw_maxndx
= k
;
1850 wsp
->walk_data
= klw
;
1856 kmem_log_walk_step(mdb_walk_state_t
*wsp
)
1858 kmem_log_walk_t
*klw
= wsp
->walk_data
;
1859 kmem_bufctl_audit_t
*bcp
;
1861 if (klw
->klw_ndx
== klw
->klw_maxndx
)
1864 bcp
= klw
->klw_sorted
[klw
->klw_ndx
++];
1866 return (wsp
->walk_callback((uintptr_t)bcp
- (uintptr_t)klw
->klw_base
+
1867 (uintptr_t)klw
->klw_lh
.lh_base
, bcp
, wsp
->walk_cbdata
));
1871 kmem_log_walk_fini(mdb_walk_state_t
*wsp
)
1873 kmem_log_walk_t
*klw
= wsp
->walk_data
;
1875 mdb_free(klw
->klw_base
, klw
->klw_size
);
1876 mdb_free(klw
->klw_sorted
, klw
->klw_maxndx
*
1877 sizeof (kmem_bufctl_audit_t
*));
1878 mdb_free(klw
, sizeof (kmem_log_walk_t
));
1881 typedef struct allocdby_bufctl
{
1884 } allocdby_bufctl_t
;
1886 typedef struct allocdby_walk
{
1887 const char *abw_walk
;
1888 uintptr_t abw_thread
;
1891 allocdby_bufctl_t
*abw_buf
;
1896 allocdby_walk_bufctl(uintptr_t addr
, const kmem_bufctl_audit_t
*bcp
,
1897 allocdby_walk_t
*abw
)
1899 if ((uintptr_t)bcp
->bc_thread
!= abw
->abw_thread
)
1902 if (abw
->abw_nbufs
== abw
->abw_size
) {
1903 allocdby_bufctl_t
*buf
;
1904 size_t oldsize
= sizeof (allocdby_bufctl_t
) * abw
->abw_size
;
1906 buf
= mdb_zalloc(oldsize
<< 1, UM_SLEEP
);
1908 bcopy(abw
->abw_buf
, buf
, oldsize
);
1909 mdb_free(abw
->abw_buf
, oldsize
);
1911 abw
->abw_size
<<= 1;
1915 abw
->abw_buf
[abw
->abw_nbufs
].abb_addr
= addr
;
1916 abw
->abw_buf
[abw
->abw_nbufs
].abb_ts
= bcp
->bc_timestamp
;
1924 allocdby_walk_cache(uintptr_t addr
, const kmem_cache_t
*c
, allocdby_walk_t
*abw
)
1926 if (mdb_pwalk(abw
->abw_walk
, (mdb_walk_cb_t
)allocdby_walk_bufctl
,
1928 mdb_warn("couldn't walk bufctl for cache %p", addr
);
1936 allocdby_cmp(const allocdby_bufctl_t
*lhs
, const allocdby_bufctl_t
*rhs
)
1938 if (lhs
->abb_ts
< rhs
->abb_ts
)
1940 if (lhs
->abb_ts
> rhs
->abb_ts
)
1946 allocdby_walk_init_common(mdb_walk_state_t
*wsp
, const char *walk
)
1948 allocdby_walk_t
*abw
;
1950 if (wsp
->walk_addr
== NULL
) {
1951 mdb_warn("allocdby walk doesn't support global walks\n");
1955 abw
= mdb_zalloc(sizeof (allocdby_walk_t
), UM_SLEEP
);
1957 abw
->abw_thread
= wsp
->walk_addr
;
1958 abw
->abw_walk
= walk
;
1959 abw
->abw_size
= 128; /* something reasonable */
1961 mdb_zalloc(abw
->abw_size
* sizeof (allocdby_bufctl_t
), UM_SLEEP
);
1963 wsp
->walk_data
= abw
;
1965 if (mdb_walk("kmem_cache",
1966 (mdb_walk_cb_t
)allocdby_walk_cache
, abw
) == -1) {
1967 mdb_warn("couldn't walk kmem_cache");
1968 allocdby_walk_fini(wsp
);
1972 qsort(abw
->abw_buf
, abw
->abw_nbufs
, sizeof (allocdby_bufctl_t
),
1973 (int(*)(const void *, const void *))allocdby_cmp
);
1979 allocdby_walk_init(mdb_walk_state_t
*wsp
)
1981 return (allocdby_walk_init_common(wsp
, "bufctl"));
1985 freedby_walk_init(mdb_walk_state_t
*wsp
)
1987 return (allocdby_walk_init_common(wsp
, "freectl"));
1991 allocdby_walk_step(mdb_walk_state_t
*wsp
)
1993 allocdby_walk_t
*abw
= wsp
->walk_data
;
1994 kmem_bufctl_audit_t bc
;
1997 if (abw
->abw_ndx
== abw
->abw_nbufs
)
2000 addr
= abw
->abw_buf
[abw
->abw_ndx
++].abb_addr
;
2002 if (mdb_vread(&bc
, sizeof (bc
), addr
) == -1) {
2003 mdb_warn("couldn't read bufctl at %p", addr
);
2007 return (wsp
->walk_callback(addr
, &bc
, wsp
->walk_cbdata
));
2011 allocdby_walk_fini(mdb_walk_state_t
*wsp
)
2013 allocdby_walk_t
*abw
= wsp
->walk_data
;
2015 mdb_free(abw
->abw_buf
, sizeof (allocdby_bufctl_t
) * abw
->abw_size
);
2016 mdb_free(abw
, sizeof (allocdby_walk_t
));
2021 allocdby_walk(uintptr_t addr
, const kmem_bufctl_audit_t
*bcp
, void *ignored
)
2023 char c
[MDB_SYM_NAMLEN
];
2027 mdb_printf("%0?p %12llx ", addr
, bcp
->bc_timestamp
);
2028 for (i
= 0; i
< bcp
->bc_depth
; i
++) {
2029 if (mdb_lookup_by_addr(bcp
->bc_stack
[i
],
2030 MDB_SYM_FUZZY
, c
, sizeof (c
), &sym
) == -1)
2032 if (strncmp(c
, "kmem_", 5) == 0)
2034 mdb_printf("%s+0x%lx",
2035 c
, bcp
->bc_stack
[i
] - (uintptr_t)sym
.st_value
);
2044 allocdby_common(uintptr_t addr
, uint_t flags
, const char *w
)
2046 if (!(flags
& DCMD_ADDRSPEC
))
2047 return (DCMD_USAGE
);
2049 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2051 if (mdb_pwalk(w
, (mdb_walk_cb_t
)allocdby_walk
, NULL
, addr
) == -1) {
2052 mdb_warn("can't walk '%s' for %p", w
, addr
);
2061 allocdby(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
2063 return (allocdby_common(addr
, flags
, "allocdby"));
2068 freedby(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
2070 return (allocdby_common(addr
, flags
, "freedby"));
2074 * Return a string describing the address in relation to the given thread's
2077 * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2079 * - If the address is above the stack pointer, return an empty string
2080 * signifying that the address is active.
2082 * - If the address is below the stack pointer, and the thread is not on proc,
2083 * return " (below sp)".
2085 * - If the address is below the stack pointer, and the thread is on proc,
2086 * return " (possibly below sp)". Depending on context, we may or may not
2087 * have an accurate t_sp.
2090 stack_active(const kthread_t
*t
, uintptr_t addr
)
2095 if (t
->t_state
== TS_FREE
)
2096 return (" (inactive interrupt thread)");
2099 * Check to see if we're on the panic stack. If so, ignore t_sp, as it
2100 * no longer relates to the thread's real stack.
2102 if (mdb_lookup_by_name("panic_stack", &sym
) == 0) {
2103 panicstk
= (uintptr_t)sym
.st_value
;
2105 if (t
->t_sp
>= panicstk
&& t
->t_sp
< panicstk
+ PANICSTKSIZE
)
2109 if (addr
>= t
->t_sp
+ STACK_BIAS
)
2112 if (t
->t_state
== TS_ONPROC
)
2113 return (" (possibly below sp)");
2115 return (" (below sp)");
2119 * Additional state for the kmem and vmem ::whatis handlers
2121 typedef struct whatis_info
{
2123 const kmem_cache_t
*wi_cache
;
2124 const vmem_t
*wi_vmem
;
2125 vmem_t
*wi_msb_arena
;
2126 size_t wi_slab_size
;
2127 uint_t wi_slab_found
;
2128 uint_t wi_kmem_lite_count
;
2132 /* call one of our dcmd functions with "-v" and the provided address */
2134 whatis_call_printer(mdb_dcmd_f
*dcmd
, uintptr_t addr
)
2137 a
.a_type
= MDB_TYPE_STRING
;
2138 a
.a_un
.a_str
= "-v";
2141 (void) (*dcmd
)(addr
, DCMD_ADDRSPEC
, 1, &a
);
2145 whatis_print_kmf_lite(uintptr_t btaddr
, size_t count
)
2147 #define KMEM_LITE_MAX 16
2148 pc_t callers
[KMEM_LITE_MAX
];
2149 pc_t uninit
= (pc_t
)KMEM_UNINITIALIZED_PATTERN
;
2153 const char *plural
= "";
2156 /* validate our arguments and read in the buftag */
2157 if (count
== 0 || count
> KMEM_LITE_MAX
||
2158 mdb_vread(&bt
, sizeof (bt
), btaddr
) == -1)
2161 /* validate the buffer state and read in the callers */
2162 stat
= (intptr_t)bt
.bt_bufctl
^ bt
.bt_bxstat
;
2164 if (stat
!= KMEM_BUFTAG_ALLOC
|| stat
!= KMEM_BUFTAG_FREE
||
2165 mdb_vread(callers
, count
* sizeof (pc_t
),
2166 btaddr
+ offsetof(kmem_buftag_lite_t
, bt_history
)) == -1)
2169 /* If there aren't any filled in callers, bail */
2170 if (callers
[0] == uninit
)
2173 plural
= (callers
[1] == uninit
) ? "" : "s";
2175 /* Everything's done and checked; print them out */
2179 mdb_printf("recent caller%s: %a", plural
, callers
[0]);
2180 for (i
= 1; i
< count
; i
++) {
2181 if (callers
[i
] == uninit
)
2183 mdb_printf(", %a", callers
[i
]);
2189 whatis_print_kmem(whatis_info_t
*wi
, uintptr_t maddr
, uintptr_t addr
,
2192 mdb_whatis_t
*w
= wi
->wi_w
;
2194 const kmem_cache_t
*cp
= wi
->wi_cache
;
2195 /* LINTED pointer cast may result in improper alignment */
2196 uintptr_t btaddr
= (uintptr_t)KMEM_BUFTAG(cp
, addr
);
2197 int quiet
= (mdb_whatis_flags(w
) & WHATIS_QUIET
);
2198 int call_printer
= (!quiet
&& (cp
->cache_flags
& KMF_AUDIT
));
2200 mdb_whatis_report_object(w
, maddr
, addr
, "");
2202 if (baddr
!= 0 && !call_printer
)
2203 mdb_printf("bufctl %p ", baddr
);
2205 mdb_printf("%s from %s",
2206 (wi
->wi_freemem
== FALSE
) ? "allocated" : "freed", cp
->cache_name
);
2208 if (baddr
!= 0 && call_printer
) {
2209 whatis_call_printer(bufctl
, baddr
);
2213 /* for KMF_LITE caches, try to print out the previous callers */
2214 if (!quiet
&& (cp
->cache_flags
& KMF_LITE
))
2215 whatis_print_kmf_lite(btaddr
, wi
->wi_kmem_lite_count
);
2222 whatis_walk_kmem(uintptr_t addr
, void *ignored
, whatis_info_t
*wi
)
2224 mdb_whatis_t
*w
= wi
->wi_w
;
2227 size_t size
= wi
->wi_cache
->cache_bufsize
;
2229 while (mdb_whatis_match(w
, addr
, size
, &cur
))
2230 whatis_print_kmem(wi
, cur
, addr
, NULL
);
2232 return (WHATIS_WALKRET(w
));
2237 whatis_walk_bufctl(uintptr_t baddr
, const kmem_bufctl_t
*bcp
, whatis_info_t
*wi
)
2239 mdb_whatis_t
*w
= wi
->wi_w
;
2242 uintptr_t addr
= (uintptr_t)bcp
->bc_addr
;
2243 size_t size
= wi
->wi_cache
->cache_bufsize
;
2245 while (mdb_whatis_match(w
, addr
, size
, &cur
))
2246 whatis_print_kmem(wi
, cur
, addr
, baddr
);
2248 return (WHATIS_WALKRET(w
));
2252 whatis_walk_seg(uintptr_t addr
, const vmem_seg_t
*vs
, whatis_info_t
*wi
)
2254 mdb_whatis_t
*w
= wi
->wi_w
;
2256 size_t size
= vs
->vs_end
- vs
->vs_start
;
2259 /* We're not interested in anything but alloc and free segments */
2260 if (vs
->vs_type
!= VMEM_ALLOC
&& vs
->vs_type
!= VMEM_FREE
)
2263 while (mdb_whatis_match(w
, vs
->vs_start
, size
, &cur
)) {
2264 mdb_whatis_report_object(w
, cur
, vs
->vs_start
, "");
2267 * If we're not printing it seperately, provide the vmem_seg
2268 * pointer if it has a stack trace.
2270 if ((mdb_whatis_flags(w
) & WHATIS_QUIET
) &&
2271 (!(mdb_whatis_flags(w
) & WHATIS_BUFCTL
) ||
2272 (vs
->vs_type
== VMEM_ALLOC
&& vs
->vs_depth
!= 0))) {
2273 mdb_printf("vmem_seg %p ", addr
);
2276 mdb_printf("%s from the %s vmem arena",
2277 (vs
->vs_type
== VMEM_ALLOC
) ? "allocated" : "freed",
2278 wi
->wi_vmem
->vm_name
);
2280 if (!(mdb_whatis_flags(w
) & WHATIS_QUIET
))
2281 whatis_call_printer(vmem_seg
, addr
);
2286 return (WHATIS_WALKRET(w
));
2290 whatis_walk_vmem(uintptr_t addr
, const vmem_t
*vmem
, whatis_info_t
*wi
)
2292 mdb_whatis_t
*w
= wi
->wi_w
;
2293 const char *nm
= vmem
->vm_name
;
2295 int identifier
= ((vmem
->vm_cflags
& VMC_IDENTIFIER
) != 0);
2296 int idspace
= ((mdb_whatis_flags(w
) & WHATIS_IDSPACE
) != 0);
2298 if (identifier
!= idspace
)
2303 if (mdb_whatis_flags(w
) & WHATIS_VERBOSE
)
2304 mdb_printf("Searching vmem arena %s...\n", nm
);
2306 if (mdb_pwalk("vmem_seg",
2307 (mdb_walk_cb_t
)whatis_walk_seg
, wi
, addr
) == -1) {
2308 mdb_warn("can't walk vmem_seg for %p", addr
);
2312 return (WHATIS_WALKRET(w
));
2317 whatis_walk_slab(uintptr_t saddr
, const kmem_slab_t
*sp
, whatis_info_t
*wi
)
2319 mdb_whatis_t
*w
= wi
->wi_w
;
2321 /* It must overlap with the slab data, or it's not interesting */
2322 if (mdb_whatis_overlaps(w
,
2323 (uintptr_t)sp
->slab_base
, wi
->wi_slab_size
)) {
2324 wi
->wi_slab_found
++;
2331 whatis_walk_cache(uintptr_t addr
, const kmem_cache_t
*c
, whatis_info_t
*wi
)
2333 mdb_whatis_t
*w
= wi
->wi_w
;
2335 char *walk
, *freewalk
;
2339 int identifier
= ((c
->cache_flags
& KMC_IDENTIFIER
) != 0);
2340 int idspace
= ((mdb_whatis_flags(w
) & WHATIS_IDSPACE
) != 0);
2342 if (identifier
!= idspace
)
2345 /* Override the '-b' flag as necessary */
2346 if (!(c
->cache_flags
& KMF_HASH
))
2347 do_bufctl
= FALSE
; /* no bufctls to walk */
2348 else if (c
->cache_flags
& KMF_AUDIT
)
2349 do_bufctl
= TRUE
; /* we always want debugging info */
2351 do_bufctl
= ((mdb_whatis_flags(w
) & WHATIS_BUFCTL
) != 0);
2355 freewalk
= "freectl";
2356 func
= (mdb_walk_cb_t
)whatis_walk_bufctl
;
2359 freewalk
= "freemem";
2360 func
= (mdb_walk_cb_t
)whatis_walk_kmem
;
2365 if (mdb_whatis_flags(w
) & WHATIS_VERBOSE
)
2366 mdb_printf("Searching %s...\n", c
->cache_name
);
2369 * If more then two buffers live on each slab, figure out if we're
2370 * interested in anything in any slab before doing the more expensive
2371 * kmem/freemem (bufctl/freectl) walkers.
2373 wi
->wi_slab_size
= c
->cache_slabsize
- c
->cache_maxcolor
;
2374 if (!(c
->cache_flags
& KMF_HASH
))
2375 wi
->wi_slab_size
-= sizeof (kmem_slab_t
);
2377 if ((wi
->wi_slab_size
/ c
->cache_chunksize
) > 2) {
2378 wi
->wi_slab_found
= 0;
2379 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t
)whatis_walk_slab
, wi
,
2381 mdb_warn("can't find kmem_slab walker");
2384 if (wi
->wi_slab_found
== 0)
2388 wi
->wi_freemem
= FALSE
;
2389 if (mdb_pwalk(walk
, func
, wi
, addr
) == -1) {
2390 mdb_warn("can't find %s walker", walk
);
2394 if (mdb_whatis_done(w
))
2398 * We have searched for allocated memory; now search for freed memory.
2400 if (mdb_whatis_flags(w
) & WHATIS_VERBOSE
)
2401 mdb_printf("Searching %s for free memory...\n", c
->cache_name
);
2403 wi
->wi_freemem
= TRUE
;
2404 if (mdb_pwalk(freewalk
, func
, wi
, addr
) == -1) {
2405 mdb_warn("can't find %s walker", freewalk
);
2409 return (WHATIS_WALKRET(w
));
2413 whatis_walk_touch(uintptr_t addr
, const kmem_cache_t
*c
, whatis_info_t
*wi
)
2415 if (c
->cache_arena
== wi
->wi_msb_arena
||
2416 (c
->cache_cflags
& KMC_NOTOUCH
))
2419 return (whatis_walk_cache(addr
, c
, wi
));
2423 whatis_walk_metadata(uintptr_t addr
, const kmem_cache_t
*c
, whatis_info_t
*wi
)
2425 if (c
->cache_arena
!= wi
->wi_msb_arena
)
2428 return (whatis_walk_cache(addr
, c
, wi
));
2432 whatis_walk_notouch(uintptr_t addr
, const kmem_cache_t
*c
, whatis_info_t
*wi
)
2434 if (c
->cache_arena
== wi
->wi_msb_arena
||
2435 !(c
->cache_cflags
& KMC_NOTOUCH
))
2438 return (whatis_walk_cache(addr
, c
, wi
));
2442 whatis_walk_thread(uintptr_t addr
, const kthread_t
*t
, mdb_whatis_t
*w
)
2449 * Often, one calls ::whatis on an address from a thread structure.
2450 * We use this opportunity to short circuit this case...
2452 while (mdb_whatis_match(w
, addr
, sizeof (kthread_t
), &cur
))
2453 mdb_whatis_report_object(w
, cur
, addr
,
2454 "allocated as a thread structure\n");
2457 * Now check the stack
2459 if (t
->t_stkbase
== NULL
)
2463 * This assumes that t_stk is the end of the stack, but it's really
2464 * only the initial stack pointer for the thread. Arguments to the
2465 * initial procedure, SA(MINFRAME), etc. are all after t_stk. So
2466 * that 't->t_stk::whatis' reports "part of t's stack", we include
2467 * t_stk in the range (the "+ 1", below), but the kernel should
2468 * really include the full stack bounds where we can find it.
2470 saddr
= (uintptr_t)t
->t_stkbase
;
2471 size
= (uintptr_t)t
->t_stk
- saddr
+ 1;
2472 while (mdb_whatis_match(w
, saddr
, size
, &cur
))
2473 mdb_whatis_report_object(w
, cur
, cur
,
2474 "in thread %p's stack%s\n", addr
, stack_active(t
, cur
));
2476 return (WHATIS_WALKRET(w
));
2480 whatis_modctl_match(mdb_whatis_t
*w
, const char *name
,
2481 uintptr_t base
, size_t size
, const char *where
)
2486 * Since we're searching for addresses inside a module, we report
2489 while (mdb_whatis_match(w
, base
, size
, &cur
))
2490 mdb_whatis_report_address(w
, cur
, "in %s's %s\n", name
, where
);
2494 whatis_walk_modctl(uintptr_t addr
, const struct modctl
*m
, mdb_whatis_t
*w
)
2496 char name
[MODMAXNAMELEN
];
2500 if (m
->mod_mp
== NULL
)
2503 if (mdb_vread(&mod
, sizeof (mod
), (uintptr_t)m
->mod_mp
) == -1) {
2504 mdb_warn("couldn't read modctl %p's module", addr
);
2508 if (mdb_readstr(name
, sizeof (name
), (uintptr_t)m
->mod_modname
) == -1)
2509 (void) mdb_snprintf(name
, sizeof (name
), "0x%p", addr
);
2511 whatis_modctl_match(w
, name
,
2512 (uintptr_t)mod
.text
, mod
.text_size
, "text segment");
2513 whatis_modctl_match(w
, name
,
2514 (uintptr_t)mod
.data
, mod
.data_size
, "data segment");
2515 whatis_modctl_match(w
, name
,
2516 (uintptr_t)mod
.bss
, mod
.bss_size
, "bss segment");
2518 if (mdb_vread(&shdr
, sizeof (shdr
), (uintptr_t)mod
.symhdr
) == -1) {
2519 mdb_warn("couldn't read symbol header for %p's module", addr
);
2523 whatis_modctl_match(w
, name
,
2524 (uintptr_t)mod
.symtbl
, mod
.nsyms
* shdr
.sh_entsize
, "symtab");
2525 whatis_modctl_match(w
, name
,
2526 (uintptr_t)mod
.symspace
, mod
.symsize
, "symtab");
2528 return (WHATIS_WALKRET(w
));
2533 whatis_walk_memseg(uintptr_t addr
, const struct memseg
*seg
, mdb_whatis_t
*w
)
2537 uintptr_t base
= (uintptr_t)seg
->pages
;
2538 size_t size
= (uintptr_t)seg
->epages
- base
;
2540 while (mdb_whatis_match(w
, base
, size
, &cur
)) {
2541 /* round our found pointer down to the page_t base. */
2542 size_t offset
= (cur
- base
) % sizeof (page_t
);
2544 mdb_whatis_report_object(w
, cur
, cur
- offset
,
2545 "allocated as a page structure\n");
2548 return (WHATIS_WALKRET(w
));
2553 whatis_run_modules(mdb_whatis_t
*w
, void *arg
)
2555 if (mdb_walk("modctl", (mdb_walk_cb_t
)whatis_walk_modctl
, w
) == -1) {
2556 mdb_warn("couldn't find modctl walker");
2564 whatis_run_threads(mdb_whatis_t
*w
, void *ignored
)
2567 * Now search all thread stacks. Yes, this is a little weak; we
2568 * can save a lot of work by first checking to see if the
2569 * address is in segkp vs. segkmem. But hey, computers are
2572 if (mdb_walk("thread", (mdb_walk_cb_t
)whatis_walk_thread
, w
) == -1) {
2573 mdb_warn("couldn't find thread walker");
2581 whatis_run_pages(mdb_whatis_t
*w
, void *ignored
)
2583 if (mdb_walk("memseg", (mdb_walk_cb_t
)whatis_walk_memseg
, w
) == -1) {
2584 mdb_warn("couldn't find memseg walker");
2592 whatis_run_kmem(mdb_whatis_t
*w
, void *ignored
)
2596 bzero(&wi
, sizeof (wi
));
2599 if (mdb_readvar(&wi
.wi_msb_arena
, "kmem_msb_arena") == -1)
2600 mdb_warn("unable to readvar \"kmem_msb_arena\"");
2602 if (mdb_readvar(&wi
.wi_kmem_lite_count
,
2603 "kmem_lite_count") == -1 || wi
.wi_kmem_lite_count
> 16)
2604 wi
.wi_kmem_lite_count
= 0;
2607 * We process kmem caches in the following order:
2609 * non-KMC_NOTOUCH, non-metadata (typically the most interesting)
2610 * metadata (can be huge with KMF_AUDIT)
2611 * KMC_NOTOUCH, non-metadata (see kmem_walk_all())
2613 if (mdb_walk("kmem_cache", (mdb_walk_cb_t
)whatis_walk_touch
,
2615 mdb_walk("kmem_cache", (mdb_walk_cb_t
)whatis_walk_metadata
,
2617 mdb_walk("kmem_cache", (mdb_walk_cb_t
)whatis_walk_notouch
,
2619 mdb_warn("couldn't find kmem_cache walker");
2627 whatis_run_vmem(mdb_whatis_t
*w
, void *ignored
)
2631 bzero(&wi
, sizeof (wi
));
2634 if (mdb_walk("vmem_postfix",
2635 (mdb_walk_cb_t
)whatis_walk_vmem
, &wi
) == -1) {
2636 mdb_warn("couldn't find vmem_postfix walker");
2642 typedef struct kmem_log_cpu
{
2647 typedef struct kmem_log_data
{
2649 kmem_log_cpu_t
*kmd_cpu
;
2653 kmem_log_walk(uintptr_t addr
, const kmem_bufctl_audit_t
*b
,
2654 kmem_log_data_t
*kmd
)
2657 kmem_log_cpu_t
*kmc
= kmd
->kmd_cpu
;
2660 for (i
= 0; i
< NCPU
; i
++) {
2661 if (addr
>= kmc
[i
].kmc_low
&& addr
< kmc
[i
].kmc_high
)
2665 if (kmd
->kmd_addr
) {
2666 if (b
->bc_cache
== NULL
)
2669 if (mdb_vread(&bufsize
, sizeof (bufsize
),
2670 (uintptr_t)&b
->bc_cache
->cache_bufsize
) == -1) {
2672 "failed to read cache_bufsize for cache at %p",
2677 if (kmd
->kmd_addr
< (uintptr_t)b
->bc_addr
||
2678 kmd
->kmd_addr
>= (uintptr_t)b
->bc_addr
+ bufsize
)
2685 mdb_printf("%3d", i
);
2687 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr
, b
->bc_addr
,
2688 b
->bc_timestamp
, b
->bc_thread
);
2695 kmem_log(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
2697 kmem_log_header_t lh
;
2698 kmem_cpu_log_header_t clh
;
2699 uintptr_t lhp
, clhp
;
2703 kmem_log_cpu_t
*kmc
;
2705 kmem_log_data_t kmd
;
2706 uint_t opt_b
= FALSE
;
2708 if (mdb_getopts(argc
, argv
,
2709 'b', MDB_OPT_SETBITS
, TRUE
, &opt_b
, NULL
) != argc
)
2710 return (DCMD_USAGE
);
2712 if (mdb_readvar(&lhp
, "kmem_transaction_log") == -1) {
2713 mdb_warn("failed to read 'kmem_transaction_log'");
2718 mdb_warn("no kmem transaction log\n");
2722 mdb_readvar(&ncpus
, "ncpus");
2724 if (mdb_vread(&lh
, sizeof (kmem_log_header_t
), lhp
) == -1) {
2725 mdb_warn("failed to read log header at %p", lhp
);
2729 clhp
= lhp
+ ((uintptr_t)&lh
.lh_cpu
[0] - (uintptr_t)&lh
);
2731 cpu
= mdb_alloc(sizeof (uintptr_t) * NCPU
, UM_SLEEP
| UM_GC
);
2733 if (mdb_lookup_by_name("cpu", &sym
) == -1) {
2734 mdb_warn("couldn't find 'cpu' array");
2738 if (sym
.st_size
!= NCPU
* sizeof (uintptr_t)) {
2739 mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2740 NCPU
* sizeof (uintptr_t), sym
.st_size
);
2744 if (mdb_vread(cpu
, sym
.st_size
, (uintptr_t)sym
.st_value
) == -1) {
2745 mdb_warn("failed to read cpu array at %p", sym
.st_value
);
2749 kmc
= mdb_zalloc(sizeof (kmem_log_cpu_t
) * NCPU
, UM_SLEEP
| UM_GC
);
2750 kmd
.kmd_addr
= NULL
;
2753 for (i
= 0; i
< NCPU
; i
++) {
2758 if (mdb_vread(&clh
, sizeof (clh
), clhp
) == -1) {
2759 mdb_warn("cannot read cpu %d's log header at %p",
2764 kmc
[i
].kmc_low
= clh
.clh_chunk
* lh
.lh_chunksize
+
2765 (uintptr_t)lh
.lh_base
;
2766 kmc
[i
].kmc_high
= (uintptr_t)clh
.clh_current
;
2768 clhp
+= sizeof (kmem_cpu_log_header_t
);
2771 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2772 "TIMESTAMP", "THREAD");
2775 * If we have been passed an address, print out only log entries
2776 * corresponding to that address. If opt_b is specified, then interpret
2777 * the address as a bufctl.
2779 if (flags
& DCMD_ADDRSPEC
) {
2780 kmem_bufctl_audit_t b
;
2783 kmd
.kmd_addr
= addr
;
2786 sizeof (kmem_bufctl_audit_t
), addr
) == -1) {
2787 mdb_warn("failed to read bufctl at %p", addr
);
2791 (void) kmem_log_walk(addr
, &b
, &kmd
);
2797 if (mdb_walk("kmem_log", (mdb_walk_cb_t
)kmem_log_walk
, &kmd
) == -1) {
2798 mdb_warn("can't find kmem log walker");
2805 typedef struct bufctl_history_cb
{
2808 const mdb_arg_t
*bhc_argv
;
2810 } bufctl_history_cb_t
;
2814 bufctl_history_callback(uintptr_t addr
, const void *ign
, void *arg
)
2816 bufctl_history_cb_t
*bhc
= arg
;
2819 bufctl(addr
, bhc
->bhc_flags
, bhc
->bhc_argc
, bhc
->bhc_argv
);
2821 bhc
->bhc_flags
&= ~DCMD_LOOPFIRST
;
2823 return ((bhc
->bhc_ret
== DCMD_OK
)? WALK_NEXT
: WALK_DONE
);
2830 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2832 mdb_printf("%<b>OPTIONS%</b>\n");
2835 " -v Display the full content of the bufctl, including its stack trace\n"
2836 " -h retrieve the bufctl's transaction history, if available\n"
2838 " filter out bufctls not involving the buffer at addr\n"
2840 " filter out bufctls without the function/PC in their stack trace\n"
2842 " filter out bufctls timestamped before earliest\n"
2844 " filter out bufctls timestamped after latest\n"
2846 " filter out bufctls not involving thread\n");
2850 bufctl(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
2852 kmem_bufctl_audit_t bc
;
2853 uint_t verbose
= FALSE
;
2854 uint_t history
= FALSE
;
2855 uint_t in_history
= FALSE
;
2856 uintptr_t caller
= NULL
, thread
= NULL
;
2857 uintptr_t laddr
, haddr
, baddr
= NULL
;
2858 hrtime_t earliest
= 0, latest
= 0;
2860 char c
[MDB_SYM_NAMLEN
];
2863 if (mdb_getopts(argc
, argv
,
2864 'v', MDB_OPT_SETBITS
, TRUE
, &verbose
,
2865 'h', MDB_OPT_SETBITS
, TRUE
, &history
,
2866 'H', MDB_OPT_SETBITS
, TRUE
, &in_history
, /* internal */
2867 'c', MDB_OPT_UINTPTR
, &caller
,
2868 't', MDB_OPT_UINTPTR
, &thread
,
2869 'e', MDB_OPT_UINT64
, &earliest
,
2870 'l', MDB_OPT_UINT64
, &latest
,
2871 'a', MDB_OPT_UINTPTR
, &baddr
, NULL
) != argc
)
2872 return (DCMD_USAGE
);
2874 if (!(flags
& DCMD_ADDRSPEC
))
2875 return (DCMD_USAGE
);
2877 if (in_history
&& !history
)
2878 return (DCMD_USAGE
);
2880 if (history
&& !in_history
) {
2881 mdb_arg_t
*nargv
= mdb_zalloc(sizeof (*nargv
) * (argc
+ 1),
2883 bufctl_history_cb_t bhc
;
2885 nargv
[0].a_type
= MDB_TYPE_STRING
;
2886 nargv
[0].a_un
.a_str
= "-H"; /* prevent recursion */
2888 for (i
= 0; i
< argc
; i
++)
2889 nargv
[i
+ 1] = argv
[i
];
2892 * When in history mode, we treat each element as if it
2893 * were in a seperate loop, so that the headers group
2894 * bufctls with similar histories.
2896 bhc
.bhc_flags
= flags
| DCMD_LOOP
| DCMD_LOOPFIRST
;
2897 bhc
.bhc_argc
= argc
+ 1;
2898 bhc
.bhc_argv
= nargv
;
2899 bhc
.bhc_ret
= DCMD_OK
;
2901 if (mdb_pwalk("bufctl_history", bufctl_history_callback
, &bhc
,
2903 mdb_warn("unable to walk bufctl_history");
2907 if (bhc
.bhc_ret
== DCMD_OK
&& !(flags
& DCMD_PIPE_OUT
))
2910 return (bhc
.bhc_ret
);
2913 if (DCMD_HDRSPEC(flags
) && !(flags
& DCMD_PIPE_OUT
)) {
2915 mdb_printf("%16s %16s %16s %16s\n"
2916 "%<u>%16s %16s %16s %16s%</u>\n",
2917 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2918 "", "CACHE", "LASTLOG", "CONTENTS");
2920 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2921 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2925 if (mdb_vread(&bc
, sizeof (bc
), addr
) == -1) {
2926 mdb_warn("couldn't read bufctl at %p", addr
);
2931 * Guard against bogus bc_depth in case the bufctl is corrupt or
2932 * the address does not really refer to a bufctl.
2934 depth
= MIN(bc
.bc_depth
, KMEM_STACK_DEPTH
);
2936 if (caller
!= NULL
) {
2938 haddr
= caller
+ sizeof (caller
);
2940 if (mdb_lookup_by_addr(caller
, MDB_SYM_FUZZY
, c
, sizeof (c
),
2941 &sym
) != -1 && caller
== (uintptr_t)sym
.st_value
) {
2943 * We were provided an exact symbol value; any
2944 * address in the function is valid.
2946 laddr
= (uintptr_t)sym
.st_value
;
2947 haddr
= (uintptr_t)sym
.st_value
+ sym
.st_size
;
2950 for (i
= 0; i
< depth
; i
++)
2951 if (bc
.bc_stack
[i
] >= laddr
&& bc
.bc_stack
[i
] < haddr
)
2958 if (thread
!= NULL
&& (uintptr_t)bc
.bc_thread
!= thread
)
2961 if (earliest
!= 0 && bc
.bc_timestamp
< earliest
)
2964 if (latest
!= 0 && bc
.bc_timestamp
> latest
)
2967 if (baddr
!= 0 && (uintptr_t)bc
.bc_addr
!= baddr
)
2970 if (flags
& DCMD_PIPE_OUT
) {
2971 mdb_printf("%#lr\n", addr
);
2977 "%<b>%16p%</b> %16p %16llx %16p\n"
2978 "%16s %16p %16p %16p\n",
2979 addr
, bc
.bc_addr
, bc
.bc_timestamp
, bc
.bc_thread
,
2980 "", bc
.bc_cache
, bc
.bc_lastlog
, bc
.bc_contents
);
2983 for (i
= 0; i
< depth
; i
++)
2984 mdb_printf("%a\n", bc
.bc_stack
[i
]);
2988 mdb_printf("%0?p %0?p %12llx %0?p", addr
, bc
.bc_addr
,
2989 bc
.bc_timestamp
, bc
.bc_thread
);
2991 for (i
= 0; i
< depth
; i
++) {
2992 if (mdb_lookup_by_addr(bc
.bc_stack
[i
],
2993 MDB_SYM_FUZZY
, c
, sizeof (c
), &sym
) == -1)
2995 if (strncmp(c
, "kmem_", 5) == 0)
2997 mdb_printf(" %a\n", bc
.bc_stack
[i
]);
3008 typedef struct kmem_verify
{
3009 uint64_t *kmv_buf
; /* buffer to read cache contents into */
3010 size_t kmv_size
; /* number of bytes in kmv_buf */
3011 int kmv_corruption
; /* > 0 if corruption found. */
3012 int kmv_besilent
; /* report actual corruption sites */
3013 struct kmem_cache kmv_cache
; /* the cache we're operating on */
3018 * verify that buf is filled with the pattern pat.
3021 verify_pattern(uint64_t *buf_arg
, size_t size
, uint64_t pat
)
3024 uint64_t *bufend
= (uint64_t *)((char *)buf_arg
+ size
);
3027 for (buf
= buf_arg
; buf
< bufend
; buf
++)
3029 return ((uintptr_t)buf
- (uintptr_t)buf_arg
);
3035 * verify that btp->bt_bxstat == (bcp ^ pat)
3038 verify_buftag(kmem_buftag_t
*btp
, uintptr_t pat
)
3040 return (btp
->bt_bxstat
== ((intptr_t)btp
->bt_bufctl
^ pat
) ? 0 : -1);
3045 * verify the integrity of a free block of memory by checking
3046 * that it is filled with 0xdeadbeef and that its buftag is sane.
3050 verify_free(uintptr_t addr
, const void *data
, void *private)
3052 kmem_verify_t
*kmv
= (kmem_verify_t
*)private;
3053 uint64_t *buf
= kmv
->kmv_buf
; /* buf to validate */
3054 int64_t corrupt
; /* corruption offset */
3055 kmem_buftag_t
*buftagp
; /* ptr to buftag */
3056 kmem_cache_t
*cp
= &kmv
->kmv_cache
;
3057 int besilent
= kmv
->kmv_besilent
;
3060 buftagp
= KMEM_BUFTAG(cp
, buf
);
3063 * Read the buffer to check.
3065 if (mdb_vread(buf
, kmv
->kmv_size
, addr
) == -1) {
3067 mdb_warn("couldn't read %p", addr
);
3071 if ((corrupt
= verify_pattern(buf
, cp
->cache_verify
,
3072 KMEM_FREE_PATTERN
)) >= 0) {
3074 mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3075 addr
, (uintptr_t)addr
+ corrupt
);
3079 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3080 * the first bytes of the buffer, hence we cannot check for red
3083 if ((cp
->cache_flags
& (KMF_HASH
| KMF_LITE
)) == KMF_HASH
&&
3084 buftagp
->bt_redzone
!= KMEM_REDZONE_PATTERN
) {
3086 mdb_printf("buffer %p (free) seems to "
3087 "have a corrupt redzone pattern\n", addr
);
3092 * confirm bufctl pointer integrity.
3094 if (verify_buftag(buftagp
, KMEM_BUFTAG_FREE
) == -1) {
3096 mdb_printf("buffer %p (free) has a corrupt "
3103 kmv
->kmv_corruption
++;
3109 * Verify that the buftag of an allocated buffer makes sense with respect
3114 verify_alloc(uintptr_t addr
, const void *data
, void *private)
3116 kmem_verify_t
*kmv
= (kmem_verify_t
*)private;
3117 kmem_cache_t
*cp
= &kmv
->kmv_cache
;
3118 uint64_t *buf
= kmv
->kmv_buf
; /* buf to validate */
3120 kmem_buftag_t
*buftagp
= KMEM_BUFTAG(cp
, buf
);
3121 uint32_t *ip
= (uint32_t *)buftagp
;
3122 uint8_t *bp
= (uint8_t *)buf
;
3123 int looks_ok
= 0, size_ok
= 1; /* flags for finding corruption */
3124 int besilent
= kmv
->kmv_besilent
;
3127 * Read the buffer to check.
3129 if (mdb_vread(buf
, kmv
->kmv_size
, addr
) == -1) {
3131 mdb_warn("couldn't read %p", addr
);
3136 * There are two cases to handle:
3137 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3138 * 0xfeedfacefeedface at the end of it
3139 * 2. If the buf was alloc'd using kmem_alloc, it will have
3140 * 0xbb just past the end of the region in use. At the buftag,
3141 * it will have 0xfeedface (or, if the whole buffer is in use,
3142 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3143 * endianness), followed by 32 bits containing the offset of the
3144 * 0xbb byte in the buffer.
3146 * Finally, the two 32-bit words that comprise the second half of the
3147 * buftag should xor to KMEM_BUFTAG_ALLOC
3150 if (buftagp
->bt_redzone
== KMEM_REDZONE_PATTERN
)
3152 else if (!KMEM_SIZE_VALID(ip
[1]))
3154 else if (bp
[KMEM_SIZE_DECODE(ip
[1])] == KMEM_REDZONE_BYTE
)
3161 mdb_printf("buffer %p (allocated) has a corrupt "
3162 "redzone size encoding\n", addr
);
3168 mdb_printf("buffer %p (allocated) has a corrupt "
3169 "redzone signature\n", addr
);
3173 if (verify_buftag(buftagp
, KMEM_BUFTAG_ALLOC
) == -1) {
3175 mdb_printf("buffer %p (allocated) has a "
3176 "corrupt buftag\n", addr
);
3182 kmv
->kmv_corruption
++;
3188 kmem_verify(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
3190 if (flags
& DCMD_ADDRSPEC
) {
3191 int check_alloc
= 0, check_free
= 0;
3194 if (mdb_vread(&kmv
.kmv_cache
, sizeof (kmv
.kmv_cache
),
3196 mdb_warn("couldn't read kmem_cache %p", addr
);
3200 kmv
.kmv_size
= kmv
.kmv_cache
.cache_buftag
+
3201 sizeof (kmem_buftag_t
);
3202 kmv
.kmv_buf
= mdb_alloc(kmv
.kmv_size
, UM_SLEEP
| UM_GC
);
3203 kmv
.kmv_corruption
= 0;
3205 if ((kmv
.kmv_cache
.cache_flags
& KMF_REDZONE
)) {
3207 if (kmv
.kmv_cache
.cache_flags
& KMF_DEADBEEF
)
3210 if (!(flags
& DCMD_LOOP
)) {
3211 mdb_warn("cache %p (%s) does not have "
3212 "redzone checking enabled\n", addr
,
3213 kmv
.kmv_cache
.cache_name
);
3218 if (flags
& DCMD_LOOP
) {
3220 * table mode, don't print out every corrupt buffer
3222 kmv
.kmv_besilent
= 1;
3224 mdb_printf("Summary for cache '%s'\n",
3225 kmv
.kmv_cache
.cache_name
);
3227 kmv
.kmv_besilent
= 0;
3231 (void) mdb_pwalk("kmem", verify_alloc
, &kmv
, addr
);
3233 (void) mdb_pwalk("freemem", verify_free
, &kmv
, addr
);
3235 if (flags
& DCMD_LOOP
) {
3236 if (kmv
.kmv_corruption
== 0) {
3237 mdb_printf("%-*s %?p clean\n",
3239 kmv
.kmv_cache
.cache_name
, addr
);
3241 char *s
= ""; /* optional s in "buffer[s]" */
3242 if (kmv
.kmv_corruption
> 1)
3245 mdb_printf("%-*s %?p %d corrupt buffer%s\n",
3247 kmv
.kmv_cache
.cache_name
, addr
,
3248 kmv
.kmv_corruption
, s
);
3252 * This is the more verbose mode, when the user has
3253 * type addr::kmem_verify. If the cache was clean,
3254 * nothing will have yet been printed. So say something.
3256 if (kmv
.kmv_corruption
== 0)
3257 mdb_printf("clean\n");
3263 * If the user didn't specify a cache to verify, we'll walk all
3264 * kmem_cache's, specifying ourself as a callback for each...
3265 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3267 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN
,
3268 "Cache Name", "Addr", "Cache Integrity");
3269 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL
));
3275 typedef struct vmem_node
{
3276 struct vmem_node
*vn_next
;
3277 struct vmem_node
*vn_parent
;
3278 struct vmem_node
*vn_sibling
;
3279 struct vmem_node
*vn_children
;
3285 typedef struct vmem_walk
{
3286 vmem_node_t
*vw_root
;
3287 vmem_node_t
*vw_current
;
3291 vmem_walk_init(mdb_walk_state_t
*wsp
)
3293 uintptr_t vaddr
, paddr
;
3294 vmem_node_t
*head
= NULL
, *root
= NULL
, *current
= NULL
, *parent
, *vp
;
3297 if (mdb_readvar(&vaddr
, "vmem_list") == -1) {
3298 mdb_warn("couldn't read 'vmem_list'");
3302 while (vaddr
!= NULL
) {
3303 vp
= mdb_zalloc(sizeof (vmem_node_t
), UM_SLEEP
);
3304 vp
->vn_addr
= vaddr
;
3308 if (vaddr
== wsp
->walk_addr
)
3311 if (mdb_vread(&vp
->vn_vmem
, sizeof (vmem_t
), vaddr
) == -1) {
3312 mdb_warn("couldn't read vmem_t at %p", vaddr
);
3316 vaddr
= (uintptr_t)vp
->vn_vmem
.vm_next
;
3319 for (vp
= head
; vp
!= NULL
; vp
= vp
->vn_next
) {
3321 if ((paddr
= (uintptr_t)vp
->vn_vmem
.vm_source
) == NULL
) {
3322 vp
->vn_sibling
= root
;
3327 for (parent
= head
; parent
!= NULL
; parent
= parent
->vn_next
) {
3328 if (parent
->vn_addr
!= paddr
)
3330 vp
->vn_sibling
= parent
->vn_children
;
3331 parent
->vn_children
= vp
;
3332 vp
->vn_parent
= parent
;
3336 if (parent
== NULL
) {
3337 mdb_warn("couldn't find %p's parent (%p)\n",
3338 vp
->vn_addr
, paddr
);
3343 vw
= mdb_zalloc(sizeof (vmem_walk_t
), UM_SLEEP
);
3346 if (current
!= NULL
)
3347 vw
->vw_current
= current
;
3349 vw
->vw_current
= root
;
3351 wsp
->walk_data
= vw
;
3354 for (vp
= head
; head
!= NULL
; vp
= head
) {
3356 mdb_free(vp
, sizeof (vmem_node_t
));
3363 vmem_walk_step(mdb_walk_state_t
*wsp
)
3365 vmem_walk_t
*vw
= wsp
->walk_data
;
3369 if ((vp
= vw
->vw_current
) == NULL
)
3372 rval
= wsp
->walk_callback(vp
->vn_addr
, &vp
->vn_vmem
, wsp
->walk_cbdata
);
3374 if (vp
->vn_children
!= NULL
) {
3375 vw
->vw_current
= vp
->vn_children
;
3380 vw
->vw_current
= vp
->vn_sibling
;
3382 } while (vw
->vw_current
== NULL
&& vp
!= NULL
);
3388 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3389 * children are visited before their parent. We perform the postfix walk
3390 * iteratively (rather than recursively) to allow mdb to regain control
3391 * after each callback.
3394 vmem_postfix_walk_step(mdb_walk_state_t
*wsp
)
3396 vmem_walk_t
*vw
= wsp
->walk_data
;
3397 vmem_node_t
*vp
= vw
->vw_current
;
3401 * If this node is marked, then we know that we have already visited
3402 * all of its children. If the node has any siblings, they need to
3403 * be visited next; otherwise, we need to visit the parent. Note
3404 * that vp->vn_marked will only be zero on the first invocation of
3405 * the step function.
3407 if (vp
->vn_marked
) {
3408 if (vp
->vn_sibling
!= NULL
)
3409 vp
= vp
->vn_sibling
;
3410 else if (vp
->vn_parent
!= NULL
)
3414 * We have neither a parent, nor a sibling, and we
3415 * have already been visited; we're done.
3422 * Before we visit this node, visit its children.
3424 while (vp
->vn_children
!= NULL
&& !vp
->vn_children
->vn_marked
)
3425 vp
= vp
->vn_children
;
3428 vw
->vw_current
= vp
;
3429 rval
= wsp
->walk_callback(vp
->vn_addr
, &vp
->vn_vmem
, wsp
->walk_cbdata
);
3435 vmem_walk_fini(mdb_walk_state_t
*wsp
)
3437 vmem_walk_t
*vw
= wsp
->walk_data
;
3438 vmem_node_t
*root
= vw
->vw_root
;
3444 if ((vw
->vw_root
= root
->vn_children
) != NULL
)
3445 vmem_walk_fini(wsp
);
3447 vw
->vw_root
= root
->vn_sibling
;
3448 done
= (root
->vn_sibling
== NULL
&& root
->vn_parent
== NULL
);
3449 mdb_free(root
, sizeof (vmem_node_t
));
3452 mdb_free(vw
, sizeof (vmem_walk_t
));
3454 vmem_walk_fini(wsp
);
3458 typedef struct vmem_seg_walk
{
3460 uintptr_t vsw_start
;
3461 uintptr_t vsw_current
;
3466 vmem_seg_walk_common_init(mdb_walk_state_t
*wsp
, uint8_t type
, char *name
)
3468 vmem_seg_walk_t
*vsw
;
3470 if (wsp
->walk_addr
== NULL
) {
3471 mdb_warn("vmem_%s does not support global walks\n", name
);
3475 wsp
->walk_data
= vsw
= mdb_alloc(sizeof (vmem_seg_walk_t
), UM_SLEEP
);
3477 vsw
->vsw_type
= type
;
3478 vsw
->vsw_start
= wsp
->walk_addr
+ offsetof(vmem_t
, vm_seg0
);
3479 vsw
->vsw_current
= vsw
->vsw_start
;
3485 * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3490 vmem_alloc_walk_init(mdb_walk_state_t
*wsp
)
3492 return (vmem_seg_walk_common_init(wsp
, VMEM_ALLOC
, "alloc"));
3496 vmem_free_walk_init(mdb_walk_state_t
*wsp
)
3498 return (vmem_seg_walk_common_init(wsp
, VMEM_FREE
, "free"));
3502 vmem_span_walk_init(mdb_walk_state_t
*wsp
)
3504 return (vmem_seg_walk_common_init(wsp
, VMEM_SPAN
, "span"));
3508 vmem_seg_walk_init(mdb_walk_state_t
*wsp
)
3510 return (vmem_seg_walk_common_init(wsp
, VMEM_NONE
, "seg"));
3514 vmem_seg_walk_step(mdb_walk_state_t
*wsp
)
3517 vmem_seg_walk_t
*vsw
= wsp
->walk_data
;
3518 uintptr_t addr
= vsw
->vsw_current
;
3519 static size_t seg_size
= 0;
3523 if (mdb_readvar(&seg_size
, "vmem_seg_size") == -1) {
3524 mdb_warn("failed to read 'vmem_seg_size'");
3525 seg_size
= sizeof (vmem_seg_t
);
3529 if (seg_size
< sizeof (seg
))
3530 bzero((caddr_t
)&seg
+ seg_size
, sizeof (seg
) - seg_size
);
3532 if (mdb_vread(&seg
, seg_size
, addr
) == -1) {
3533 mdb_warn("couldn't read vmem_seg at %p", addr
);
3537 vsw
->vsw_current
= (uintptr_t)seg
.vs_anext
;
3538 if (vsw
->vsw_type
!= VMEM_NONE
&& seg
.vs_type
!= vsw
->vsw_type
) {
3541 rval
= wsp
->walk_callback(addr
, &seg
, wsp
->walk_cbdata
);
3544 if (vsw
->vsw_current
== vsw
->vsw_start
)
3551 vmem_seg_walk_fini(mdb_walk_state_t
*wsp
)
3553 vmem_seg_walk_t
*vsw
= wsp
->walk_data
;
3555 mdb_free(vsw
, sizeof (vmem_seg_walk_t
));
3558 #define VMEM_NAMEWIDTH 22
3561 vmem(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
3564 vmem_kstat_t
*vkp
= &v
.vm_kstat
;
3567 char c
[VMEM_NAMEWIDTH
];
3569 if (!(flags
& DCMD_ADDRSPEC
)) {
3570 if (mdb_walk_dcmd("vmem", "vmem", argc
, argv
) == -1) {
3571 mdb_warn("can't walk vmem");
3577 if (DCMD_HDRSPEC(flags
))
3578 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3579 "ADDR", VMEM_NAMEWIDTH
, "NAME", "INUSE",
3580 "TOTAL", "SUCCEED", "FAIL");
3582 if (mdb_vread(&v
, sizeof (v
), addr
) == -1) {
3583 mdb_warn("couldn't read vmem at %p", addr
);
3587 for (paddr
= (uintptr_t)v
.vm_source
; paddr
!= NULL
; ident
+= 2) {
3588 if (mdb_vread(&parent
, sizeof (parent
), paddr
) == -1) {
3589 mdb_warn("couldn't trace %p's ancestry", addr
);
3593 paddr
= (uintptr_t)parent
.vm_source
;
3596 (void) mdb_snprintf(c
, VMEM_NAMEWIDTH
, "%*s%s", ident
, "", v
.vm_name
);
3598 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3599 addr
, VMEM_NAMEWIDTH
, c
,
3600 vkp
->vk_mem_inuse
.value
.ui64
, vkp
->vk_mem_total
.value
.ui64
,
3601 vkp
->vk_alloc
.value
.ui64
, vkp
->vk_fail
.value
.ui64
);
3610 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3612 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3613 "representing a single chunk of data. Only ALLOC segments have debugging\n"
3616 mdb_printf("%<b>OPTIONS%</b>\n");
3619 " -v Display the full content of the vmem_seg, including its stack trace\n"
3620 " -s report the size of the segment, instead of the end address\n"
3622 " filter out segments without the function/PC in their stack trace\n"
3624 " filter out segments timestamped before earliest\n"
3626 " filter out segments timestamped after latest\n"
3628 " filer out segments smaller than minsize\n"
3630 " filer out segments larger than maxsize\n"
3632 " filter out segments not involving thread\n"
3634 " filter out segments not of type 'type'\n"
3635 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3640 vmem_seg(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
3643 pc_t
*stk
= vs
.vs_stack
;
3646 const char *type
= NULL
;
3648 char c
[MDB_SYM_NAMLEN
];
3652 uintptr_t laddr
, haddr
;
3654 uintptr_t caller
= NULL
, thread
= NULL
;
3655 uintptr_t minsize
= 0, maxsize
= 0;
3657 hrtime_t earliest
= 0, latest
= 0;
3662 if (!(flags
& DCMD_ADDRSPEC
))
3663 return (DCMD_USAGE
);
3665 if (mdb_getopts(argc
, argv
,
3666 'c', MDB_OPT_UINTPTR
, &caller
,
3667 'e', MDB_OPT_UINT64
, &earliest
,
3668 'l', MDB_OPT_UINT64
, &latest
,
3669 's', MDB_OPT_SETBITS
, TRUE
, &size
,
3670 'm', MDB_OPT_UINTPTR
, &minsize
,
3671 'M', MDB_OPT_UINTPTR
, &maxsize
,
3672 't', MDB_OPT_UINTPTR
, &thread
,
3673 'T', MDB_OPT_STR
, &type
,
3674 'v', MDB_OPT_SETBITS
, TRUE
, &verbose
,
3676 return (DCMD_USAGE
);
3678 if (DCMD_HDRSPEC(flags
) && !(flags
& DCMD_PIPE_OUT
)) {
3680 mdb_printf("%16s %4s %16s %16s %16s\n"
3681 "%<u>%16s %4s %16s %16s %16s%</u>\n",
3682 "ADDR", "TYPE", "START", "END", "SIZE",
3683 "", "", "THREAD", "TIMESTAMP", "");
3685 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3686 "START", size
? "SIZE" : "END", "WHO");
3690 if (mdb_vread(&vs
, sizeof (vs
), addr
) == -1) {
3691 mdb_warn("couldn't read vmem_seg at %p", addr
);
3696 if (strcmp(type
, "ALLC") == 0 || strcmp(type
, "ALLOC") == 0)
3698 else if (strcmp(type
, "FREE") == 0)
3700 else if (strcmp(type
, "SPAN") == 0)
3702 else if (strcmp(type
, "ROTR") == 0 ||
3703 strcmp(type
, "ROTOR") == 0)
3705 else if (strcmp(type
, "WLKR") == 0 ||
3706 strcmp(type
, "WALKER") == 0)
3709 mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3714 if (vs
.vs_type
!= t
)
3718 sz
= vs
.vs_end
- vs
.vs_start
;
3720 if (minsize
!= 0 && sz
< minsize
)
3723 if (maxsize
!= 0 && sz
> maxsize
)
3727 depth
= vs
.vs_depth
;
3730 * debug info, when present, is only accurate for VMEM_ALLOC segments
3732 no_debug
= (t
!= VMEM_ALLOC
) ||
3733 (depth
== 0 || depth
> VMEM_STACK_DEPTH
);
3736 if (caller
!= NULL
|| thread
!= NULL
|| earliest
!= 0 ||
3738 return (DCMD_OK
); /* not enough info */
3740 if (caller
!= NULL
) {
3742 haddr
= caller
+ sizeof (caller
);
3744 if (mdb_lookup_by_addr(caller
, MDB_SYM_FUZZY
, c
,
3745 sizeof (c
), &sym
) != -1 &&
3746 caller
== (uintptr_t)sym
.st_value
) {
3748 * We were provided an exact symbol value; any
3749 * address in the function is valid.
3751 laddr
= (uintptr_t)sym
.st_value
;
3752 haddr
= (uintptr_t)sym
.st_value
+ sym
.st_size
;
3755 for (i
= 0; i
< depth
; i
++)
3756 if (vs
.vs_stack
[i
] >= laddr
&&
3757 vs
.vs_stack
[i
] < haddr
)
3764 if (thread
!= NULL
&& (uintptr_t)vs
.vs_thread
!= thread
)
3767 if (earliest
!= 0 && vs
.vs_timestamp
< earliest
)
3770 if (latest
!= 0 && vs
.vs_timestamp
> latest
)
3774 type
= (t
== VMEM_ALLOC
? "ALLC" :
3775 t
== VMEM_FREE
? "FREE" :
3776 t
== VMEM_SPAN
? "SPAN" :
3777 t
== VMEM_ROTOR
? "ROTR" :
3778 t
== VMEM_WALKER
? "WLKR" :
3781 if (flags
& DCMD_PIPE_OUT
) {
3782 mdb_printf("%#lr\n", addr
);
3787 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3788 addr
, type
, vs
.vs_start
, vs
.vs_end
, sz
);
3793 mdb_printf("%16s %4s %16p %16llx\n",
3794 "", "", vs
.vs_thread
, vs
.vs_timestamp
);
3797 for (i
= 0; i
< depth
; i
++) {
3798 mdb_printf("%a\n", stk
[i
]);
3803 mdb_printf("%0?p %4s %0?p %0?p", addr
, type
,
3804 vs
.vs_start
, size
? sz
: vs
.vs_end
);
3811 for (i
= 0; i
< depth
; i
++) {
3812 if (mdb_lookup_by_addr(stk
[i
], MDB_SYM_FUZZY
,
3813 c
, sizeof (c
), &sym
) == -1)
3815 if (strncmp(c
, "vmem_", 5) == 0)
3819 mdb_printf(" %a\n", stk
[i
]);
3824 typedef struct kmalog_data
{
3826 hrtime_t kma_newest
;
3831 showbc(uintptr_t addr
, const kmem_bufctl_audit_t
*bcp
, kmalog_data_t
*kma
)
3833 char name
[KMEM_CACHE_NAMELEN
+ 1];
3838 if (bcp
->bc_timestamp
== 0)
3841 if (kma
->kma_newest
== 0)
3842 kma
->kma_newest
= bcp
->bc_timestamp
;
3844 if (kma
->kma_addr
) {
3845 if (mdb_vread(&bufsize
, sizeof (bufsize
),
3846 (uintptr_t)&bcp
->bc_cache
->cache_bufsize
) == -1) {
3848 "failed to read cache_bufsize for cache at %p",
3853 if (kma
->kma_addr
< (uintptr_t)bcp
->bc_addr
||
3854 kma
->kma_addr
>= (uintptr_t)bcp
->bc_addr
+ bufsize
)
3858 delta
= kma
->kma_newest
- bcp
->bc_timestamp
;
3859 depth
= MIN(bcp
->bc_depth
, KMEM_STACK_DEPTH
);
3861 if (mdb_readstr(name
, sizeof (name
), (uintptr_t)
3862 &bcp
->bc_cache
->cache_name
) <= 0)
3863 (void) mdb_snprintf(name
, sizeof (name
), "%a", bcp
->bc_cache
);
3865 mdb_printf("\nT-%lld.%09lld addr=%p %s\n",
3866 delta
/ NANOSEC
, delta
% NANOSEC
, bcp
->bc_addr
, name
);
3868 for (i
= 0; i
< depth
; i
++)
3869 mdb_printf("\t %a\n", bcp
->bc_stack
[i
]);
3875 kmalog(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
3877 const char *logname
= "kmem_transaction_log";
3881 return (DCMD_USAGE
);
3884 if (flags
& DCMD_ADDRSPEC
)
3885 kma
.kma_addr
= addr
;
3887 kma
.kma_addr
= NULL
;
3890 if (argv
->a_type
!= MDB_TYPE_STRING
)
3891 return (DCMD_USAGE
);
3892 if (strcmp(argv
->a_un
.a_str
, "fail") == 0)
3893 logname
= "kmem_failure_log";
3894 else if (strcmp(argv
->a_un
.a_str
, "slab") == 0)
3895 logname
= "kmem_slab_log";
3897 return (DCMD_USAGE
);
3900 if (mdb_readvar(&addr
, logname
) == -1) {
3901 mdb_warn("failed to read %s log header pointer");
3905 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t
)showbc
, &kma
, addr
) == -1) {
3906 mdb_warn("failed to walk kmem log");
3914 * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3915 * The first piece is a structure which we use to accumulate kmem_cache_t
3916 * addresses of interest. The kmc_add is used as a callback for the kmem_cache
3917 * walker; we either add all caches, or ones named explicitly as arguments.
3920 typedef struct kmclist
{
3921 const char *kmc_name
; /* Name to match (or NULL) */
3922 uintptr_t *kmc_caches
; /* List of kmem_cache_t addrs */
3923 int kmc_nelems
; /* Num entries in kmc_caches */
3924 int kmc_size
; /* Size of kmc_caches array */
3928 kmc_add(uintptr_t addr
, const kmem_cache_t
*cp
, kmclist_t
*kmc
)
3933 if (kmc
->kmc_name
== NULL
||
3934 strcmp(cp
->cache_name
, kmc
->kmc_name
) == 0) {
3936 * If we have a match, grow our array (if necessary), and then
3937 * add the virtual address of the matching cache to our list.
3939 if (kmc
->kmc_nelems
>= kmc
->kmc_size
) {
3940 s
= kmc
->kmc_size
? kmc
->kmc_size
* 2 : 256;
3941 p
= mdb_alloc(sizeof (uintptr_t) * s
, UM_SLEEP
| UM_GC
);
3943 bcopy(kmc
->kmc_caches
, p
,
3944 sizeof (uintptr_t) * kmc
->kmc_size
);
3946 kmc
->kmc_caches
= p
;
3950 kmc
->kmc_caches
[kmc
->kmc_nelems
++] = addr
;
3951 return (kmc
->kmc_name
? WALK_DONE
: WALK_NEXT
);
3958 * The second piece of ::kmausers is a hash table of allocations. Each
3959 * allocation owner is identified by its stack trace and data_size. We then
3960 * track the total bytes of all such allocations, and the number of allocations
3961 * to report at the end. Once we have a list of caches, we walk through the
3962 * allocated bufctls of each, and update our hash table accordingly.
3965 typedef struct kmowner
{
3966 struct kmowner
*kmo_head
; /* First hash elt in bucket */
3967 struct kmowner
*kmo_next
; /* Next hash elt in chain */
3968 size_t kmo_signature
; /* Hash table signature */
3969 uint_t kmo_num
; /* Number of allocations */
3970 size_t kmo_data_size
; /* Size of each allocation */
3971 size_t kmo_total_size
; /* Total bytes of allocation */
3972 int kmo_depth
; /* Depth of stack trace */
3973 uintptr_t kmo_stack
[KMEM_STACK_DEPTH
]; /* Stack trace */
3976 typedef struct kmusers
{
3977 uintptr_t kmu_addr
; /* address of interest */
3978 const kmem_cache_t
*kmu_cache
; /* Current kmem cache */
3979 kmowner_t
*kmu_hash
; /* Hash table of owners */
3980 int kmu_nelems
; /* Number of entries in use */
3981 int kmu_size
; /* Total number of entries */
3985 kmu_add(kmusers_t
*kmu
, const kmem_bufctl_audit_t
*bcp
,
3986 size_t size
, size_t data_size
)
3988 int i
, depth
= MIN(bcp
->bc_depth
, KMEM_STACK_DEPTH
);
3989 size_t bucket
, signature
= data_size
;
3990 kmowner_t
*kmo
, *kmoend
;
3993 * If the hash table is full, double its size and rehash everything.
3995 if (kmu
->kmu_nelems
>= kmu
->kmu_size
) {
3996 int s
= kmu
->kmu_size
? kmu
->kmu_size
* 2 : 1024;
3998 kmo
= mdb_alloc(sizeof (kmowner_t
) * s
, UM_SLEEP
| UM_GC
);
3999 bcopy(kmu
->kmu_hash
, kmo
, sizeof (kmowner_t
) * kmu
->kmu_size
);
4000 kmu
->kmu_hash
= kmo
;
4003 kmoend
= kmu
->kmu_hash
+ kmu
->kmu_size
;
4004 for (kmo
= kmu
->kmu_hash
; kmo
< kmoend
; kmo
++)
4005 kmo
->kmo_head
= NULL
;
4007 kmoend
= kmu
->kmu_hash
+ kmu
->kmu_nelems
;
4008 for (kmo
= kmu
->kmu_hash
; kmo
< kmoend
; kmo
++) {
4009 bucket
= kmo
->kmo_signature
& (kmu
->kmu_size
- 1);
4010 kmo
->kmo_next
= kmu
->kmu_hash
[bucket
].kmo_head
;
4011 kmu
->kmu_hash
[bucket
].kmo_head
= kmo
;
4016 * Finish computing the hash signature from the stack trace, and then
4017 * see if the owner is in the hash table. If so, update our stats.
4019 for (i
= 0; i
< depth
; i
++)
4020 signature
+= bcp
->bc_stack
[i
];
4022 bucket
= signature
& (kmu
->kmu_size
- 1);
4024 for (kmo
= kmu
->kmu_hash
[bucket
].kmo_head
; kmo
; kmo
= kmo
->kmo_next
) {
4025 if (kmo
->kmo_signature
== signature
) {
4026 size_t difference
= 0;
4028 difference
|= kmo
->kmo_data_size
- data_size
;
4029 difference
|= kmo
->kmo_depth
- depth
;
4031 for (i
= 0; i
< depth
; i
++) {
4032 difference
|= kmo
->kmo_stack
[i
] -
4036 if (difference
== 0) {
4037 kmo
->kmo_total_size
+= size
;
4045 * If the owner is not yet hashed, grab the next element and fill it
4046 * in based on the allocation information.
4048 kmo
= &kmu
->kmu_hash
[kmu
->kmu_nelems
++];
4049 kmo
->kmo_next
= kmu
->kmu_hash
[bucket
].kmo_head
;
4050 kmu
->kmu_hash
[bucket
].kmo_head
= kmo
;
4052 kmo
->kmo_signature
= signature
;
4054 kmo
->kmo_data_size
= data_size
;
4055 kmo
->kmo_total_size
= size
;
4056 kmo
->kmo_depth
= depth
;
4058 for (i
= 0; i
< depth
; i
++)
4059 kmo
->kmo_stack
[i
] = bcp
->bc_stack
[i
];
4063 * When ::kmausers is invoked without the -f flag, we simply update our hash
4064 * table with the information from each allocated bufctl.
4068 kmause1(uintptr_t addr
, const kmem_bufctl_audit_t
*bcp
, kmusers_t
*kmu
)
4070 const kmem_cache_t
*cp
= kmu
->kmu_cache
;
4072 kmu_add(kmu
, bcp
, cp
->cache_bufsize
, cp
->cache_bufsize
);
4077 * When ::kmausers is invoked with the -f flag, we print out the information
4078 * for each bufctl as well as updating the hash table.
4081 kmause2(uintptr_t addr
, const kmem_bufctl_audit_t
*bcp
, kmusers_t
*kmu
)
4083 int i
, depth
= MIN(bcp
->bc_depth
, KMEM_STACK_DEPTH
);
4084 const kmem_cache_t
*cp
= kmu
->kmu_cache
;
4085 kmem_bufctl_t bufctl
;
4087 if (kmu
->kmu_addr
) {
4088 if (mdb_vread(&bufctl
, sizeof (bufctl
), addr
) == -1)
4089 mdb_warn("couldn't read bufctl at %p", addr
);
4090 else if (kmu
->kmu_addr
< (uintptr_t)bufctl
.bc_addr
||
4091 kmu
->kmu_addr
>= (uintptr_t)bufctl
.bc_addr
+
4096 mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4097 cp
->cache_bufsize
, addr
, bcp
->bc_thread
, cp
->cache_name
);
4099 for (i
= 0; i
< depth
; i
++)
4100 mdb_printf("\t %a\n", bcp
->bc_stack
[i
]);
4102 kmu_add(kmu
, bcp
, cp
->cache_bufsize
, cp
->cache_bufsize
);
4107 * We sort our results by allocation size before printing them.
4110 kmownercmp(const void *lp
, const void *rp
)
4112 const kmowner_t
*lhs
= lp
;
4113 const kmowner_t
*rhs
= rp
;
4115 return (rhs
->kmo_total_size
- lhs
->kmo_total_size
);
4119 * The main engine of ::kmausers is relatively straightforward: First we
4120 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4121 * iterate over the allocated bufctls of each cache in the list. Finally,
4122 * we sort and print our results.
4126 kmausers(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
4128 int mem_threshold
= 8192; /* Minimum # bytes for printing */
4129 int cnt_threshold
= 100; /* Minimum # blocks for printing */
4130 int audited_caches
= 0; /* Number of KMF_AUDIT caches found */
4131 int do_all_caches
= 1; /* Do all caches (no arguments) */
4132 int opt_e
= FALSE
; /* Include "small" users */
4133 int opt_f
= FALSE
; /* Print stack traces */
4135 mdb_walk_cb_t callback
= (mdb_walk_cb_t
)kmause1
;
4136 kmowner_t
*kmo
, *kmoend
;
4142 bzero(&kmc
, sizeof (kmc
));
4143 bzero(&kmu
, sizeof (kmu
));
4145 while ((i
= mdb_getopts(argc
, argv
,
4146 'e', MDB_OPT_SETBITS
, TRUE
, &opt_e
,
4147 'f', MDB_OPT_SETBITS
, TRUE
, &opt_f
, NULL
)) != argc
) {
4149 argv
+= i
; /* skip past options we just processed */
4150 argc
-= i
; /* adjust argc */
4152 if (argv
->a_type
!= MDB_TYPE_STRING
|| *argv
->a_un
.a_str
== '-')
4153 return (DCMD_USAGE
);
4155 oelems
= kmc
.kmc_nelems
;
4156 kmc
.kmc_name
= argv
->a_un
.a_str
;
4157 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t
)kmc_add
, &kmc
);
4159 if (kmc
.kmc_nelems
== oelems
) {
4160 mdb_warn("unknown kmem cache: %s\n", kmc
.kmc_name
);
4169 if (flags
& DCMD_ADDRSPEC
) {
4171 kmu
.kmu_addr
= addr
;
4173 kmu
.kmu_addr
= NULL
;
4177 mem_threshold
= cnt_threshold
= 0;
4180 callback
= (mdb_walk_cb_t
)kmause2
;
4182 if (do_all_caches
) {
4183 kmc
.kmc_name
= NULL
; /* match all cache names */
4184 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t
)kmc_add
, &kmc
);
4187 for (i
= 0; i
< kmc
.kmc_nelems
; i
++) {
4188 uintptr_t cp
= kmc
.kmc_caches
[i
];
4191 if (mdb_vread(&c
, sizeof (c
), cp
) == -1) {
4192 mdb_warn("failed to read cache at %p", cp
);
4196 if (!(c
.cache_flags
& KMF_AUDIT
)) {
4197 if (!do_all_caches
) {
4198 mdb_warn("KMF_AUDIT is not enabled for %s\n",
4205 (void) mdb_pwalk("bufctl", callback
, &kmu
, cp
);
4209 if (audited_caches
== 0 && do_all_caches
) {
4210 mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4214 qsort(kmu
.kmu_hash
, kmu
.kmu_nelems
, sizeof (kmowner_t
), kmownercmp
);
4215 kmoend
= kmu
.kmu_hash
+ kmu
.kmu_nelems
;
4217 for (kmo
= kmu
.kmu_hash
; kmo
< kmoend
; kmo
++) {
4218 if (kmo
->kmo_total_size
< mem_threshold
&&
4219 kmo
->kmo_num
< cnt_threshold
)
4221 mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4222 kmo
->kmo_total_size
, kmo
->kmo_num
, kmo
->kmo_data_size
);
4223 for (i
= 0; i
< kmo
->kmo_depth
; i
++)
4224 mdb_printf("\t %a\n", kmo
->kmo_stack
[i
]);
4234 "Displays the largest users of the kmem allocator, sorted by \n"
4235 "trace. If one or more caches is specified, only those caches\n"
4236 "will be searched. By default, all caches are searched. If an\n"
4237 "address is specified, then only those allocations which include\n"
4238 "the given address are displayed. Specifying an address implies\n"
4241 "\t-e\tInclude all users, not just the largest\n"
4242 "\t-f\tDisplay individual allocations. By default, users are\n"
4243 "\t\tgrouped by stack\n");
4247 kmem_ready_check(void)
4251 if (mdb_readvar(&ready
, "kmem_ready") < 0)
4252 return (-1); /* errno is set for us */
4258 kmem_statechange(void)
4260 static int been_ready
= 0;
4265 if (kmem_ready_check() <= 0)
4269 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t
)kmem_init_walkers
, NULL
);
4276 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init
,
4277 list_walk_step
, list_walk_fini
4281 * If kmem is ready, we'll need to invoke the kmem_cache walker
4282 * immediately. Walkers in the linkage structure won't be ready until
4283 * _mdb_init returns, so we'll need to add this one manually. If kmem
4284 * is ready, we'll use the walker to initialize the caches. If kmem
4285 * isn't ready, we'll register a callback that will allow us to defer
4286 * cache walking until it is.
4288 if (mdb_add_walker(&w
) != 0) {
4289 mdb_warn("failed to add kmem_cache walker");
4295 /* register our ::whatis handlers */
4296 mdb_whatis_register("modules", whatis_run_modules
, NULL
,
4297 WHATIS_PRIO_EARLY
, WHATIS_REG_NO_ID
);
4298 mdb_whatis_register("threads", whatis_run_threads
, NULL
,
4299 WHATIS_PRIO_EARLY
, WHATIS_REG_NO_ID
);
4300 mdb_whatis_register("pages", whatis_run_pages
, NULL
,
4301 WHATIS_PRIO_EARLY
, WHATIS_REG_NO_ID
);
4302 mdb_whatis_register("kmem", whatis_run_kmem
, NULL
,
4303 WHATIS_PRIO_ALLOCATOR
, 0);
4304 mdb_whatis_register("vmem", whatis_run_vmem
, NULL
,
4305 WHATIS_PRIO_ALLOCATOR
, 0);
4308 typedef struct whatthread
{
4309 uintptr_t wt_target
;
4314 whatthread_walk_thread(uintptr_t addr
, const kthread_t
*t
, whatthread_t
*w
)
4316 uintptr_t current
, data
;
4318 if (t
->t_stkbase
== NULL
)
4322 * Warn about swapped out threads, but drive on anyway
4324 if (!(t
->t_schedflag
& TS_LOAD
)) {
4325 mdb_warn("thread %p's stack swapped out\n", addr
);
4330 * Search the thread's stack for the given pointer. Note that it would
4331 * be more efficient to follow ::kgrep's lead and read in page-sized
4332 * chunks, but this routine is already fast and simple.
4334 for (current
= (uintptr_t)t
->t_stkbase
; current
< (uintptr_t)t
->t_stk
;
4335 current
+= sizeof (uintptr_t)) {
4336 if (mdb_vread(&data
, sizeof (data
), current
) == -1) {
4337 mdb_warn("couldn't read thread %p's stack at %p",
4342 if (data
== w
->wt_target
) {
4343 if (w
->wt_verbose
) {
4344 mdb_printf("%p in thread %p's stack%s\n",
4345 current
, addr
, stack_active(t
, current
));
4347 mdb_printf("%#lr\n", addr
);
4357 whatthread(uintptr_t addr
, uint_t flags
, int argc
, const mdb_arg_t
*argv
)
4361 if (!(flags
& DCMD_ADDRSPEC
))
4362 return (DCMD_USAGE
);
4364 w
.wt_verbose
= FALSE
;
4367 if (mdb_getopts(argc
, argv
,
4368 'v', MDB_OPT_SETBITS
, TRUE
, &w
.wt_verbose
, NULL
) != argc
)
4369 return (DCMD_USAGE
);
4371 if (mdb_walk("thread", (mdb_walk_cb_t
)whatthread_walk_thread
, &w
)
4373 mdb_warn("couldn't walk threads");