1917 ::umem_verify seg faults on a dump with rampant corruption
[illumos-gate.git] / usr / src / cmd / mdb / common / modules / genunix / kmem.c
blob909b7ecc7b17bd0cd5a35d462861905b593ae635
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Copyright 2011 Joyent, Inc. All rights reserved.
30 #include <mdb/mdb_param.h>
31 #include <mdb/mdb_modapi.h>
32 #include <mdb/mdb_ctf.h>
33 #include <mdb/mdb_whatis.h>
34 #include <sys/cpuvar.h>
35 #include <sys/kmem_impl.h>
36 #include <sys/vmem_impl.h>
37 #include <sys/machelf.h>
38 #include <sys/modctl.h>
39 #include <sys/kobj.h>
40 #include <sys/panic.h>
41 #include <sys/stack.h>
42 #include <sys/sysmacros.h>
43 #include <vm/page.h>
45 #include "avl.h"
46 #include "combined.h"
47 #include "dist.h"
48 #include "kmem.h"
49 #include "list.h"
51 #define dprintf(x) if (mdb_debug_level) { \
52 mdb_printf("kmem debug: "); \
53 /*CSTYLED*/\
54 mdb_printf x ;\
57 #define KM_ALLOCATED 0x01
58 #define KM_FREE 0x02
59 #define KM_BUFCTL 0x04
60 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */
61 #define KM_HASH 0x10
63 static int mdb_debug_level = 0;
65 /*ARGSUSED*/
66 static int
67 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
69 mdb_walker_t w;
70 char descr[64];
72 (void) mdb_snprintf(descr, sizeof (descr),
73 "walk the %s cache", c->cache_name);
75 w.walk_name = c->cache_name;
76 w.walk_descr = descr;
77 w.walk_init = kmem_walk_init;
78 w.walk_step = kmem_walk_step;
79 w.walk_fini = kmem_walk_fini;
80 w.walk_init_arg = (void *)addr;
82 if (mdb_add_walker(&w) == -1)
83 mdb_warn("failed to add %s walker", c->cache_name);
85 return (WALK_NEXT);
88 /*ARGSUSED*/
89 int
90 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
92 mdb_debug_level ^= 1;
94 mdb_printf("kmem: debugging is now %s\n",
95 mdb_debug_level ? "on" : "off");
97 return (DCMD_OK);
101 kmem_cache_walk_init(mdb_walk_state_t *wsp)
103 GElf_Sym sym;
105 if (mdb_lookup_by_name("kmem_caches", &sym) == -1) {
106 mdb_warn("couldn't find kmem_caches");
107 return (WALK_ERR);
110 wsp->walk_addr = (uintptr_t)sym.st_value;
112 return (list_walk_init_named(wsp, "cache list", "cache"));
116 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
118 if (wsp->walk_addr == NULL) {
119 mdb_warn("kmem_cpu_cache doesn't support global walks");
120 return (WALK_ERR);
123 if (mdb_layered_walk("cpu", wsp) == -1) {
124 mdb_warn("couldn't walk 'cpu'");
125 return (WALK_ERR);
128 wsp->walk_data = (void *)wsp->walk_addr;
130 return (WALK_NEXT);
134 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
136 uintptr_t caddr = (uintptr_t)wsp->walk_data;
137 const cpu_t *cpu = wsp->walk_layer;
138 kmem_cpu_cache_t cc;
140 caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]);
142 if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
143 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
144 return (WALK_ERR);
147 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
150 static int
151 kmem_slab_check(void *p, uintptr_t saddr, void *arg)
153 kmem_slab_t *sp = p;
154 uintptr_t caddr = (uintptr_t)arg;
155 if ((uintptr_t)sp->slab_cache != caddr) {
156 mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
157 saddr, caddr, sp->slab_cache);
158 return (-1);
161 return (0);
164 static int
165 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg)
167 kmem_slab_t *sp = p;
169 int rc = kmem_slab_check(p, saddr, arg);
170 if (rc != 0) {
171 return (rc);
174 if (!KMEM_SLAB_IS_PARTIAL(sp)) {
175 mdb_warn("slab %p is not a partial slab\n", saddr);
176 return (-1);
179 return (0);
182 static int
183 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg)
185 kmem_slab_t *sp = p;
187 int rc = kmem_slab_check(p, saddr, arg);
188 if (rc != 0) {
189 return (rc);
192 if (!KMEM_SLAB_IS_ALL_USED(sp)) {
193 mdb_warn("slab %p is not completely allocated\n", saddr);
194 return (-1);
197 return (0);
200 typedef struct {
201 uintptr_t kns_cache_addr;
202 int kns_nslabs;
203 } kmem_nth_slab_t;
205 static int
206 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg)
208 kmem_nth_slab_t *chkp = arg;
210 int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr);
211 if (rc != 0) {
212 return (rc);
215 return (chkp->kns_nslabs-- == 0 ? 1 : 0);
218 static int
219 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp)
221 uintptr_t caddr = wsp->walk_addr;
223 wsp->walk_addr = (uintptr_t)(caddr +
224 offsetof(kmem_cache_t, cache_complete_slabs));
226 return (list_walk_init_checked(wsp, "slab list", "slab",
227 kmem_complete_slab_check, (void *)caddr));
230 static int
231 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp)
233 uintptr_t caddr = wsp->walk_addr;
235 wsp->walk_addr = (uintptr_t)(caddr +
236 offsetof(kmem_cache_t, cache_partial_slabs));
238 return (avl_walk_init_checked(wsp, "slab list", "slab",
239 kmem_partial_slab_check, (void *)caddr));
243 kmem_slab_walk_init(mdb_walk_state_t *wsp)
245 uintptr_t caddr = wsp->walk_addr;
247 if (caddr == NULL) {
248 mdb_warn("kmem_slab doesn't support global walks\n");
249 return (WALK_ERR);
252 combined_walk_init(wsp);
253 combined_walk_add(wsp,
254 kmem_complete_slab_walk_init, list_walk_step, list_walk_fini);
255 combined_walk_add(wsp,
256 kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini);
258 return (WALK_NEXT);
261 static int
262 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp)
264 uintptr_t caddr = wsp->walk_addr;
265 kmem_nth_slab_t *chk;
267 chk = mdb_alloc(sizeof (kmem_nth_slab_t),
268 UM_SLEEP | UM_GC);
269 chk->kns_cache_addr = caddr;
270 chk->kns_nslabs = 1;
271 wsp->walk_addr = (uintptr_t)(caddr +
272 offsetof(kmem_cache_t, cache_complete_slabs));
274 return (list_walk_init_checked(wsp, "slab list", "slab",
275 kmem_nth_slab_check, chk));
279 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
281 uintptr_t caddr = wsp->walk_addr;
282 kmem_cache_t c;
284 if (caddr == NULL) {
285 mdb_warn("kmem_slab_partial doesn't support global walks\n");
286 return (WALK_ERR);
289 if (mdb_vread(&c, sizeof (c), caddr) == -1) {
290 mdb_warn("couldn't read kmem_cache at %p", caddr);
291 return (WALK_ERR);
294 combined_walk_init(wsp);
297 * Some consumers (umem_walk_step(), in particular) require at
298 * least one callback if there are any buffers in the cache. So
299 * if there are *no* partial slabs, report the first full slab, if
300 * any.
302 * Yes, this is ugly, but it's cleaner than the other possibilities.
304 if (c.cache_partial_slabs.avl_numnodes == 0) {
305 combined_walk_add(wsp, kmem_first_complete_slab_walk_init,
306 list_walk_step, list_walk_fini);
307 } else {
308 combined_walk_add(wsp, kmem_partial_slab_walk_init,
309 avl_walk_step, avl_walk_fini);
312 return (WALK_NEXT);
316 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
318 kmem_cache_t c;
319 const char *filter = NULL;
321 if (mdb_getopts(ac, argv,
322 'n', MDB_OPT_STR, &filter,
323 NULL) != ac) {
324 return (DCMD_USAGE);
327 if (!(flags & DCMD_ADDRSPEC)) {
328 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
329 mdb_warn("can't walk kmem_cache");
330 return (DCMD_ERR);
332 return (DCMD_OK);
335 if (DCMD_HDRSPEC(flags))
336 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
337 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
339 if (mdb_vread(&c, sizeof (c), addr) == -1) {
340 mdb_warn("couldn't read kmem_cache at %p", addr);
341 return (DCMD_ERR);
344 if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL))
345 return (DCMD_OK);
347 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
348 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
350 return (DCMD_OK);
353 void
354 kmem_cache_help(void)
356 mdb_printf("%s", "Print kernel memory caches.\n\n");
357 mdb_dec_indent(2);
358 mdb_printf("%<b>OPTIONS%</b>\n");
359 mdb_inc_indent(2);
360 mdb_printf("%s",
361 " -n name\n"
362 " name of kmem cache (or matching partial name)\n"
363 "\n"
364 "Column\tDescription\n"
365 "\n"
366 "ADDR\t\taddress of kmem cache\n"
367 "NAME\t\tname of kmem cache\n"
368 "FLAG\t\tvarious cache state flags\n"
369 "CFLAG\t\tcache creation flags\n"
370 "BUFSIZE\tobject size in bytes\n"
371 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
374 #define LABEL_WIDTH 11
375 static void
376 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
377 size_t maxbuckets, size_t minbucketsize)
379 uint64_t total;
380 int buckets;
381 int i;
382 const int *distarray;
383 int complete[2];
385 buckets = buffers_per_slab;
387 total = 0;
388 for (i = 0; i <= buffers_per_slab; i++)
389 total += ks_bucket[i];
391 if (maxbuckets > 1)
392 buckets = MIN(buckets, maxbuckets);
394 if (minbucketsize > 1) {
396 * minbucketsize does not apply to the first bucket reserved
397 * for completely allocated slabs
399 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
400 minbucketsize));
401 if ((buckets < 2) && (buffers_per_slab > 1)) {
402 buckets = 2;
403 minbucketsize = (buffers_per_slab - 1);
408 * The first printed bucket is reserved for completely allocated slabs.
409 * Passing (buckets - 1) excludes that bucket from the generated
410 * distribution, since we're handling it as a special case.
412 complete[0] = buffers_per_slab;
413 complete[1] = buffers_per_slab + 1;
414 distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
416 mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
417 dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
419 dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
421 * Print bucket ranges in descending order after the first bucket for
422 * completely allocated slabs, so a person can see immediately whether
423 * or not there is fragmentation without having to scan possibly
424 * multiple screens of output. Starting at (buckets - 2) excludes the
425 * extra terminating bucket.
427 for (i = buckets - 2; i >= 0; i--) {
428 dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
430 mdb_printf("\n");
432 #undef LABEL_WIDTH
434 /*ARGSUSED*/
435 static int
436 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
438 *is_slab = B_TRUE;
439 return (WALK_DONE);
442 /*ARGSUSED*/
443 static int
444 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
445 boolean_t *is_slab)
448 * The "kmem_partial_slab" walker reports the first full slab if there
449 * are no partial slabs (for the sake of consumers that require at least
450 * one callback if there are any buffers in the cache).
452 *is_slab = KMEM_SLAB_IS_PARTIAL(sp);
453 return (WALK_DONE);
456 typedef struct kmem_slab_usage {
457 int ksu_refcnt; /* count of allocated buffers on slab */
458 boolean_t ksu_nomove; /* slab marked non-reclaimable */
459 } kmem_slab_usage_t;
461 typedef struct kmem_slab_stats {
462 const kmem_cache_t *ks_cp;
463 int ks_slabs; /* slabs in cache */
464 int ks_partial_slabs; /* partially allocated slabs in cache */
465 uint64_t ks_unused_buffers; /* total unused buffers in cache */
466 int ks_max_buffers_per_slab; /* max buffers per slab */
467 int ks_usage_len; /* ks_usage array length */
468 kmem_slab_usage_t *ks_usage; /* partial slab usage */
469 uint_t *ks_bucket; /* slab usage distribution */
470 } kmem_slab_stats_t;
472 /*ARGSUSED*/
473 static int
474 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
475 kmem_slab_stats_t *ks)
477 kmem_slab_usage_t *ksu;
478 long unused;
480 ks->ks_slabs++;
481 ks->ks_bucket[sp->slab_refcnt]++;
483 unused = (sp->slab_chunks - sp->slab_refcnt);
484 if (unused == 0) {
485 return (WALK_NEXT);
488 ks->ks_partial_slabs++;
489 ks->ks_unused_buffers += unused;
491 if (ks->ks_partial_slabs > ks->ks_usage_len) {
492 kmem_slab_usage_t *usage;
493 int len = ks->ks_usage_len;
495 len = (len == 0 ? 16 : len * 2);
496 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
497 if (ks->ks_usage != NULL) {
498 bcopy(ks->ks_usage, usage,
499 ks->ks_usage_len * sizeof (kmem_slab_usage_t));
500 mdb_free(ks->ks_usage,
501 ks->ks_usage_len * sizeof (kmem_slab_usage_t));
503 ks->ks_usage = usage;
504 ks->ks_usage_len = len;
507 ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
508 ksu->ksu_refcnt = sp->slab_refcnt;
509 ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
510 return (WALK_NEXT);
513 static void
514 kmem_slabs_header()
516 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
517 "", "", "Partial", "", "Unused", "");
518 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
519 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
520 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
521 "-------------------------", "--------", "--------", "---------",
522 "---------", "------");
526 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
528 kmem_cache_t c;
529 kmem_slab_stats_t stats;
530 mdb_walk_cb_t cb;
531 int pct;
532 int tenths_pct;
533 size_t maxbuckets = 1;
534 size_t minbucketsize = 0;
535 const char *filter = NULL;
536 const char *name = NULL;
537 uint_t opt_v = FALSE;
538 boolean_t buckets = B_FALSE;
539 boolean_t skip = B_FALSE;
541 if (mdb_getopts(argc, argv,
542 'B', MDB_OPT_UINTPTR, &minbucketsize,
543 'b', MDB_OPT_UINTPTR, &maxbuckets,
544 'n', MDB_OPT_STR, &filter,
545 'N', MDB_OPT_STR, &name,
546 'v', MDB_OPT_SETBITS, TRUE, &opt_v,
547 NULL) != argc) {
548 return (DCMD_USAGE);
551 if ((maxbuckets != 1) || (minbucketsize != 0)) {
552 buckets = B_TRUE;
555 if (!(flags & DCMD_ADDRSPEC)) {
556 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
557 argv) == -1) {
558 mdb_warn("can't walk kmem_cache");
559 return (DCMD_ERR);
561 return (DCMD_OK);
564 if (mdb_vread(&c, sizeof (c), addr) == -1) {
565 mdb_warn("couldn't read kmem_cache at %p", addr);
566 return (DCMD_ERR);
569 if (name == NULL) {
570 skip = ((filter != NULL) &&
571 (strstr(c.cache_name, filter) == NULL));
572 } else if (filter == NULL) {
573 skip = (strcmp(c.cache_name, name) != 0);
574 } else {
575 /* match either -n or -N */
576 skip = ((strcmp(c.cache_name, name) != 0) &&
577 (strstr(c.cache_name, filter) == NULL));
580 if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) {
581 kmem_slabs_header();
582 } else if ((opt_v || buckets) && !skip) {
583 if (DCMD_HDRSPEC(flags)) {
584 kmem_slabs_header();
585 } else {
586 boolean_t is_slab = B_FALSE;
587 const char *walker_name;
588 if (opt_v) {
589 cb = (mdb_walk_cb_t)kmem_first_partial_slab;
590 walker_name = "kmem_slab_partial";
591 } else {
592 cb = (mdb_walk_cb_t)kmem_first_slab;
593 walker_name = "kmem_slab";
595 (void) mdb_pwalk(walker_name, cb, &is_slab, addr);
596 if (is_slab) {
597 kmem_slabs_header();
602 if (skip) {
603 return (DCMD_OK);
606 bzero(&stats, sizeof (kmem_slab_stats_t));
607 stats.ks_cp = &c;
608 stats.ks_max_buffers_per_slab = c.cache_maxchunks;
609 /* +1 to include a zero bucket */
610 stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) *
611 sizeof (*stats.ks_bucket), UM_SLEEP);
612 cb = (mdb_walk_cb_t)kmem_slablist_stat;
613 (void) mdb_pwalk("kmem_slab", cb, &stats, addr);
615 if (c.cache_buftotal == 0) {
616 pct = 0;
617 tenths_pct = 0;
618 } else {
619 uint64_t n = stats.ks_unused_buffers * 10000;
620 pct = (int)(n / c.cache_buftotal);
621 tenths_pct = pct - ((pct / 100) * 100);
622 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
623 if (tenths_pct == 10) {
624 pct += 100;
625 tenths_pct = 0;
629 pct /= 100;
630 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
631 stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
632 stats.ks_unused_buffers, pct, tenths_pct);
634 if (maxbuckets == 0) {
635 maxbuckets = stats.ks_max_buffers_per_slab;
638 if (((maxbuckets > 1) || (minbucketsize > 0)) &&
639 (stats.ks_slabs > 0)) {
640 mdb_printf("\n");
641 kmem_slabs_print_dist(stats.ks_bucket,
642 stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize);
645 mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) *
646 sizeof (*stats.ks_bucket));
648 if (!opt_v) {
649 return (DCMD_OK);
652 if (opt_v && (stats.ks_partial_slabs > 0)) {
653 int i;
654 kmem_slab_usage_t *ksu;
656 mdb_printf(" %d complete (%d), %d partial:",
657 (stats.ks_slabs - stats.ks_partial_slabs),
658 stats.ks_max_buffers_per_slab,
659 stats.ks_partial_slabs);
661 for (i = 0; i < stats.ks_partial_slabs; i++) {
662 ksu = &stats.ks_usage[i];
663 mdb_printf(" %d%s", ksu->ksu_refcnt,
664 (ksu->ksu_nomove ? "*" : ""));
666 mdb_printf("\n\n");
669 if (stats.ks_usage_len > 0) {
670 mdb_free(stats.ks_usage,
671 stats.ks_usage_len * sizeof (kmem_slab_usage_t));
674 return (DCMD_OK);
677 void
678 kmem_slabs_help(void)
680 mdb_printf("%s",
681 "Display slab usage per kmem cache.\n\n");
682 mdb_dec_indent(2);
683 mdb_printf("%<b>OPTIONS%</b>\n");
684 mdb_inc_indent(2);
685 mdb_printf("%s",
686 " -n name\n"
687 " name of kmem cache (or matching partial name)\n"
688 " -N name\n"
689 " exact name of kmem cache\n"
690 " -b maxbins\n"
691 " Print a distribution of allocated buffers per slab using at\n"
692 " most maxbins bins. The first bin is reserved for completely\n"
693 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
694 " effect as specifying the maximum allocated buffers per slab\n"
695 " or setting minbinsize to 1 (-B 1).\n"
696 " -B minbinsize\n"
697 " Print a distribution of allocated buffers per slab, making\n"
698 " all bins (except the first, reserved for completely allocated\n"
699 " slabs) at least minbinsize buffers apart.\n"
700 " -v verbose output: List the allocated buffer count of each partial\n"
701 " slab on the free list in order from front to back to show how\n"
702 " closely the slabs are ordered by usage. For example\n"
703 "\n"
704 " 10 complete, 3 partial (8): 7 3 1\n"
705 "\n"
706 " means there are thirteen slabs with eight buffers each, including\n"
707 " three partially allocated slabs with less than all eight buffers\n"
708 " allocated.\n"
709 "\n"
710 " Buffer allocations are always from the front of the partial slab\n"
711 " list. When a buffer is freed from a completely used slab, that\n"
712 " slab is added to the front of the partial slab list. Assuming\n"
713 " that all buffers are equally likely to be freed soon, the\n"
714 " desired order of partial slabs is most-used at the front of the\n"
715 " list and least-used at the back (as in the example above).\n"
716 " However, if a slab contains an allocated buffer that will not\n"
717 " soon be freed, it would be better for that slab to be at the\n"
718 " front where all of its buffers can be allocated. Taking a slab\n"
719 " off the partial slab list (either with all buffers freed or all\n"
720 " buffers allocated) reduces cache fragmentation.\n"
721 "\n"
722 " A slab's allocated buffer count representing a partial slab (9 in\n"
723 " the example below) may be marked as follows:\n"
724 "\n"
725 " 9* An asterisk indicates that kmem has marked the slab non-\n"
726 " reclaimable because the kmem client refused to move one of the\n"
727 " slab's buffers. Since kmem does not expect to completely free the\n"
728 " slab, it moves it to the front of the list in the hope of\n"
729 " completely allocating it instead. A slab marked with an asterisk\n"
730 " stays marked for as long as it remains on the partial slab list.\n"
731 "\n"
732 "Column\t\tDescription\n"
733 "\n"
734 "Cache Name\t\tname of kmem cache\n"
735 "Slabs\t\t\ttotal slab count\n"
736 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
737 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
738 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
739 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
740 "\t\t\t for accounting structures (debug mode), slab\n"
741 "\t\t\t coloring (incremental small offsets to stagger\n"
742 "\t\t\t buffer alignment), or the per-CPU magazine layer\n");
745 static int
746 addrcmp(const void *lhs, const void *rhs)
748 uintptr_t p1 = *((uintptr_t *)lhs);
749 uintptr_t p2 = *((uintptr_t *)rhs);
751 if (p1 < p2)
752 return (-1);
753 if (p1 > p2)
754 return (1);
755 return (0);
758 static int
759 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
761 const kmem_bufctl_audit_t *bcp1 = *lhs;
762 const kmem_bufctl_audit_t *bcp2 = *rhs;
764 if (bcp1->bc_timestamp > bcp2->bc_timestamp)
765 return (-1);
767 if (bcp1->bc_timestamp < bcp2->bc_timestamp)
768 return (1);
770 return (0);
773 typedef struct kmem_hash_walk {
774 uintptr_t *kmhw_table;
775 size_t kmhw_nelems;
776 size_t kmhw_pos;
777 kmem_bufctl_t kmhw_cur;
778 } kmem_hash_walk_t;
781 kmem_hash_walk_init(mdb_walk_state_t *wsp)
783 kmem_hash_walk_t *kmhw;
784 uintptr_t *hash;
785 kmem_cache_t c;
786 uintptr_t haddr, addr = wsp->walk_addr;
787 size_t nelems;
788 size_t hsize;
790 if (addr == NULL) {
791 mdb_warn("kmem_hash doesn't support global walks\n");
792 return (WALK_ERR);
795 if (mdb_vread(&c, sizeof (c), addr) == -1) {
796 mdb_warn("couldn't read cache at addr %p", addr);
797 return (WALK_ERR);
800 if (!(c.cache_flags & KMF_HASH)) {
801 mdb_warn("cache %p doesn't have a hash table\n", addr);
802 return (WALK_DONE); /* nothing to do */
805 kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
806 kmhw->kmhw_cur.bc_next = NULL;
807 kmhw->kmhw_pos = 0;
809 kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
810 hsize = nelems * sizeof (uintptr_t);
811 haddr = (uintptr_t)c.cache_hash_table;
813 kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
814 if (mdb_vread(hash, hsize, haddr) == -1) {
815 mdb_warn("failed to read hash table at %p", haddr);
816 mdb_free(hash, hsize);
817 mdb_free(kmhw, sizeof (kmem_hash_walk_t));
818 return (WALK_ERR);
821 wsp->walk_data = kmhw;
823 return (WALK_NEXT);
827 kmem_hash_walk_step(mdb_walk_state_t *wsp)
829 kmem_hash_walk_t *kmhw = wsp->walk_data;
830 uintptr_t addr = NULL;
832 if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) {
833 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
834 if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL)
835 break;
838 if (addr == NULL)
839 return (WALK_DONE);
841 if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
842 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
843 return (WALK_ERR);
846 return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
849 void
850 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
852 kmem_hash_walk_t *kmhw = wsp->walk_data;
854 if (kmhw == NULL)
855 return;
857 mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
858 mdb_free(kmhw, sizeof (kmem_hash_walk_t));
862 * Find the address of the bufctl structure for the address 'buf' in cache
863 * 'cp', which is at address caddr, and place it in *out.
865 static int
866 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
868 uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
869 kmem_bufctl_t *bcp;
870 kmem_bufctl_t bc;
872 if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
873 mdb_warn("unable to read hash bucket for %p in cache %p",
874 buf, caddr);
875 return (-1);
878 while (bcp != NULL) {
879 if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
880 (uintptr_t)bcp) == -1) {
881 mdb_warn("unable to read bufctl at %p", bcp);
882 return (-1);
884 if (bc.bc_addr == buf) {
885 *out = (uintptr_t)bcp;
886 return (0);
888 bcp = bc.bc_next;
891 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
892 return (-1);
896 kmem_get_magsize(const kmem_cache_t *cp)
898 uintptr_t addr = (uintptr_t)cp->cache_magtype;
899 GElf_Sym mt_sym;
900 kmem_magtype_t mt;
901 int res;
904 * if cpu 0 has a non-zero magsize, it must be correct. caches
905 * with KMF_NOMAGAZINE have disabled their magazine layers, so
906 * it is okay to return 0 for them.
908 if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
909 (cp->cache_flags & KMF_NOMAGAZINE))
910 return (res);
912 if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
913 mdb_warn("unable to read 'kmem_magtype'");
914 } else if (addr < mt_sym.st_value ||
915 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
916 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
917 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
918 cp->cache_name, addr);
919 return (0);
921 if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
922 mdb_warn("unable to read magtype at %a", addr);
923 return (0);
925 return (mt.mt_magsize);
928 /*ARGSUSED*/
929 static int
930 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
932 *est -= (sp->slab_chunks - sp->slab_refcnt);
934 return (WALK_NEXT);
938 * Returns an upper bound on the number of allocated buffers in a given
939 * cache.
941 size_t
942 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
944 int magsize;
945 size_t cache_est;
947 cache_est = cp->cache_buftotal;
949 (void) mdb_pwalk("kmem_slab_partial",
950 (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
952 if ((magsize = kmem_get_magsize(cp)) != 0) {
953 size_t mag_est = cp->cache_full.ml_total * magsize;
955 if (cache_est >= mag_est) {
956 cache_est -= mag_est;
957 } else {
958 mdb_warn("cache %p's magazine layer holds more buffers "
959 "than the slab layer.\n", addr);
962 return (cache_est);
965 #define READMAG_ROUNDS(rounds) { \
966 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
967 mdb_warn("couldn't read magazine at %p", kmp); \
968 goto fail; \
970 for (i = 0; i < rounds; i++) { \
971 maglist[magcnt++] = mp->mag_round[i]; \
972 if (magcnt == magmax) { \
973 mdb_warn("%d magazines exceeds fudge factor\n", \
974 magcnt); \
975 goto fail; \
981 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
982 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
984 kmem_magazine_t *kmp, *mp;
985 void **maglist = NULL;
986 int i, cpu;
987 size_t magsize, magmax, magbsize;
988 size_t magcnt = 0;
991 * Read the magtype out of the cache, after verifying the pointer's
992 * correctness.
994 magsize = kmem_get_magsize(cp);
995 if (magsize == 0) {
996 *maglistp = NULL;
997 *magcntp = 0;
998 *magmaxp = 0;
999 return (WALK_NEXT);
1003 * There are several places where we need to go buffer hunting:
1004 * the per-CPU loaded magazine, the per-CPU spare full magazine,
1005 * and the full magazine list in the depot.
1007 * For an upper bound on the number of buffers in the magazine
1008 * layer, we have the number of magazines on the cache_full
1009 * list plus at most two magazines per CPU (the loaded and the
1010 * spare). Toss in 100 magazines as a fudge factor in case this
1011 * is live (the number "100" comes from the same fudge factor in
1012 * crash(1M)).
1014 magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
1015 magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
1017 if (magbsize >= PAGESIZE / 2) {
1018 mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1019 addr, magbsize);
1020 return (WALK_ERR);
1023 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
1024 mp = mdb_alloc(magbsize, alloc_flags);
1025 if (mp == NULL || maglist == NULL)
1026 goto fail;
1029 * First up: the magazines in the depot (i.e. on the cache_full list).
1031 for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
1032 READMAG_ROUNDS(magsize);
1033 kmp = mp->mag_next;
1035 if (kmp == cp->cache_full.ml_list)
1036 break; /* cache_full list loop detected */
1039 dprintf(("cache_full list done\n"));
1042 * Now whip through the CPUs, snagging the loaded magazines
1043 * and full spares.
1045 * In order to prevent inconsistent dumps, rounds and prounds
1046 * are copied aside before dumping begins.
1048 for (cpu = 0; cpu < ncpus; cpu++) {
1049 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
1050 short rounds, prounds;
1052 if (KMEM_DUMPCC(ccp)) {
1053 rounds = ccp->cc_dump_rounds;
1054 prounds = ccp->cc_dump_prounds;
1055 } else {
1056 rounds = ccp->cc_rounds;
1057 prounds = ccp->cc_prounds;
1060 dprintf(("reading cpu cache %p\n",
1061 (uintptr_t)ccp - (uintptr_t)cp + addr));
1063 if (rounds > 0 &&
1064 (kmp = ccp->cc_loaded) != NULL) {
1065 dprintf(("reading %d loaded rounds\n", rounds));
1066 READMAG_ROUNDS(rounds);
1069 if (prounds > 0 &&
1070 (kmp = ccp->cc_ploaded) != NULL) {
1071 dprintf(("reading %d previously loaded rounds\n",
1072 prounds));
1073 READMAG_ROUNDS(prounds);
1077 dprintf(("magazine layer: %d buffers\n", magcnt));
1079 if (!(alloc_flags & UM_GC))
1080 mdb_free(mp, magbsize);
1082 *maglistp = maglist;
1083 *magcntp = magcnt;
1084 *magmaxp = magmax;
1086 return (WALK_NEXT);
1088 fail:
1089 if (!(alloc_flags & UM_GC)) {
1090 if (mp)
1091 mdb_free(mp, magbsize);
1092 if (maglist)
1093 mdb_free(maglist, magmax * sizeof (void *));
1095 return (WALK_ERR);
1098 static int
1099 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1101 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1104 static int
1105 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1107 kmem_bufctl_audit_t b;
1110 * if KMF_AUDIT is not set, we know that we're looking at a
1111 * kmem_bufctl_t.
1113 if (!(cp->cache_flags & KMF_AUDIT) ||
1114 mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1115 (void) memset(&b, 0, sizeof (b));
1116 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1117 mdb_warn("unable to read bufctl at %p", buf);
1118 return (WALK_ERR);
1122 return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1125 typedef struct kmem_walk {
1126 int kmw_type;
1128 uintptr_t kmw_addr; /* cache address */
1129 kmem_cache_t *kmw_cp;
1130 size_t kmw_csize;
1133 * magazine layer
1135 void **kmw_maglist;
1136 size_t kmw_max;
1137 size_t kmw_count;
1138 size_t kmw_pos;
1141 * slab layer
1143 char *kmw_valid; /* to keep track of freed buffers */
1144 char *kmw_ubase; /* buffer for slab data */
1145 } kmem_walk_t;
1147 static int
1148 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1150 kmem_walk_t *kmw;
1151 int ncpus, csize;
1152 kmem_cache_t *cp;
1153 size_t vm_quantum;
1155 size_t magmax, magcnt;
1156 void **maglist = NULL;
1157 uint_t chunksize, slabsize;
1158 int status = WALK_ERR;
1159 uintptr_t addr = wsp->walk_addr;
1160 const char *layered;
1162 type &= ~KM_HASH;
1164 if (addr == NULL) {
1165 mdb_warn("kmem walk doesn't support global walks\n");
1166 return (WALK_ERR);
1169 dprintf(("walking %p\n", addr));
1172 * First we need to figure out how many CPUs are configured in the
1173 * system to know how much to slurp out.
1175 mdb_readvar(&ncpus, "max_ncpus");
1177 csize = KMEM_CACHE_SIZE(ncpus);
1178 cp = mdb_alloc(csize, UM_SLEEP);
1180 if (mdb_vread(cp, csize, addr) == -1) {
1181 mdb_warn("couldn't read cache at addr %p", addr);
1182 goto out2;
1186 * It's easy for someone to hand us an invalid cache address.
1187 * Unfortunately, it is hard for this walker to survive an
1188 * invalid cache cleanly. So we make sure that:
1190 * 1. the vmem arena for the cache is readable,
1191 * 2. the vmem arena's quantum is a power of 2,
1192 * 3. our slabsize is a multiple of the quantum, and
1193 * 4. our chunksize is >0 and less than our slabsize.
1195 if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1196 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1197 vm_quantum == 0 ||
1198 (vm_quantum & (vm_quantum - 1)) != 0 ||
1199 cp->cache_slabsize < vm_quantum ||
1200 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1201 cp->cache_chunksize == 0 ||
1202 cp->cache_chunksize > cp->cache_slabsize) {
1203 mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1204 goto out2;
1207 dprintf(("buf total is %d\n", cp->cache_buftotal));
1209 if (cp->cache_buftotal == 0) {
1210 mdb_free(cp, csize);
1211 return (WALK_DONE);
1215 * If they ask for bufctls, but it's a small-slab cache,
1216 * there is nothing to report.
1218 if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1219 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1220 cp->cache_flags));
1221 mdb_free(cp, csize);
1222 return (WALK_DONE);
1226 * If they want constructed buffers, but there's no constructor or
1227 * the cache has DEADBEEF checking enabled, there is nothing to report.
1229 if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1230 cp->cache_constructor == NULL ||
1231 (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1232 mdb_free(cp, csize);
1233 return (WALK_DONE);
1237 * Read in the contents of the magazine layer
1239 if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1240 &magmax, UM_SLEEP) == WALK_ERR)
1241 goto out2;
1244 * We have all of the buffers from the magazines; if we are walking
1245 * allocated buffers, sort them so we can bsearch them later.
1247 if (type & KM_ALLOCATED)
1248 qsort(maglist, magcnt, sizeof (void *), addrcmp);
1250 wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1252 kmw->kmw_type = type;
1253 kmw->kmw_addr = addr;
1254 kmw->kmw_cp = cp;
1255 kmw->kmw_csize = csize;
1256 kmw->kmw_maglist = maglist;
1257 kmw->kmw_max = magmax;
1258 kmw->kmw_count = magcnt;
1259 kmw->kmw_pos = 0;
1262 * When walking allocated buffers in a KMF_HASH cache, we walk the
1263 * hash table instead of the slab layer.
1265 if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1266 layered = "kmem_hash";
1268 kmw->kmw_type |= KM_HASH;
1269 } else {
1271 * If we are walking freed buffers, we only need the
1272 * magazine layer plus the partially allocated slabs.
1273 * To walk allocated buffers, we need all of the slabs.
1275 if (type & KM_ALLOCATED)
1276 layered = "kmem_slab";
1277 else
1278 layered = "kmem_slab_partial";
1281 * for small-slab caches, we read in the entire slab. For
1282 * freed buffers, we can just walk the freelist. For
1283 * allocated buffers, we use a 'valid' array to track
1284 * the freed buffers.
1286 if (!(cp->cache_flags & KMF_HASH)) {
1287 chunksize = cp->cache_chunksize;
1288 slabsize = cp->cache_slabsize;
1290 kmw->kmw_ubase = mdb_alloc(slabsize +
1291 sizeof (kmem_bufctl_t), UM_SLEEP);
1293 if (type & KM_ALLOCATED)
1294 kmw->kmw_valid =
1295 mdb_alloc(slabsize / chunksize, UM_SLEEP);
1299 status = WALK_NEXT;
1301 if (mdb_layered_walk(layered, wsp) == -1) {
1302 mdb_warn("unable to start layered '%s' walk", layered);
1303 status = WALK_ERR;
1306 out1:
1307 if (status == WALK_ERR) {
1308 if (kmw->kmw_valid)
1309 mdb_free(kmw->kmw_valid, slabsize / chunksize);
1311 if (kmw->kmw_ubase)
1312 mdb_free(kmw->kmw_ubase, slabsize +
1313 sizeof (kmem_bufctl_t));
1315 if (kmw->kmw_maglist)
1316 mdb_free(kmw->kmw_maglist,
1317 kmw->kmw_max * sizeof (uintptr_t));
1319 mdb_free(kmw, sizeof (kmem_walk_t));
1320 wsp->walk_data = NULL;
1323 out2:
1324 if (status == WALK_ERR)
1325 mdb_free(cp, csize);
1327 return (status);
1331 kmem_walk_step(mdb_walk_state_t *wsp)
1333 kmem_walk_t *kmw = wsp->walk_data;
1334 int type = kmw->kmw_type;
1335 kmem_cache_t *cp = kmw->kmw_cp;
1337 void **maglist = kmw->kmw_maglist;
1338 int magcnt = kmw->kmw_count;
1340 uintptr_t chunksize, slabsize;
1341 uintptr_t addr;
1342 const kmem_slab_t *sp;
1343 const kmem_bufctl_t *bcp;
1344 kmem_bufctl_t bc;
1346 int chunks;
1347 char *kbase;
1348 void *buf;
1349 int i, ret;
1351 char *valid, *ubase;
1354 * first, handle the 'kmem_hash' layered walk case
1356 if (type & KM_HASH) {
1358 * We have a buffer which has been allocated out of the
1359 * global layer. We need to make sure that it's not
1360 * actually sitting in a magazine before we report it as
1361 * an allocated buffer.
1363 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1365 if (magcnt > 0 &&
1366 bsearch(&buf, maglist, magcnt, sizeof (void *),
1367 addrcmp) != NULL)
1368 return (WALK_NEXT);
1370 if (type & KM_BUFCTL)
1371 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1373 return (kmem_walk_callback(wsp, (uintptr_t)buf));
1376 ret = WALK_NEXT;
1378 addr = kmw->kmw_addr;
1381 * If we're walking freed buffers, report everything in the
1382 * magazine layer before processing the first slab.
1384 if ((type & KM_FREE) && magcnt != 0) {
1385 kmw->kmw_count = 0; /* only do this once */
1386 for (i = 0; i < magcnt; i++) {
1387 buf = maglist[i];
1389 if (type & KM_BUFCTL) {
1390 uintptr_t out;
1392 if (cp->cache_flags & KMF_BUFTAG) {
1393 kmem_buftag_t *btp;
1394 kmem_buftag_t tag;
1396 /* LINTED - alignment */
1397 btp = KMEM_BUFTAG(cp, buf);
1398 if (mdb_vread(&tag, sizeof (tag),
1399 (uintptr_t)btp) == -1) {
1400 mdb_warn("reading buftag for "
1401 "%p at %p", buf, btp);
1402 continue;
1404 out = (uintptr_t)tag.bt_bufctl;
1405 } else {
1406 if (kmem_hash_lookup(cp, addr, buf,
1407 &out) == -1)
1408 continue;
1410 ret = bufctl_walk_callback(cp, wsp, out);
1411 } else {
1412 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1415 if (ret != WALK_NEXT)
1416 return (ret);
1421 * If they want constructed buffers, we're finished, since the
1422 * magazine layer holds them all.
1424 if (type & KM_CONSTRUCTED)
1425 return (WALK_DONE);
1428 * Handle the buffers in the current slab
1430 chunksize = cp->cache_chunksize;
1431 slabsize = cp->cache_slabsize;
1433 sp = wsp->walk_layer;
1434 chunks = sp->slab_chunks;
1435 kbase = sp->slab_base;
1437 dprintf(("kbase is %p\n", kbase));
1439 if (!(cp->cache_flags & KMF_HASH)) {
1440 valid = kmw->kmw_valid;
1441 ubase = kmw->kmw_ubase;
1443 if (mdb_vread(ubase, chunks * chunksize,
1444 (uintptr_t)kbase) == -1) {
1445 mdb_warn("failed to read slab contents at %p", kbase);
1446 return (WALK_ERR);
1450 * Set up the valid map as fully allocated -- we'll punch
1451 * out the freelist.
1453 if (type & KM_ALLOCATED)
1454 (void) memset(valid, 1, chunks);
1455 } else {
1456 valid = NULL;
1457 ubase = NULL;
1461 * walk the slab's freelist
1463 bcp = sp->slab_head;
1465 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1468 * since we could be in the middle of allocating a buffer,
1469 * our refcnt could be one higher than it aught. So we
1470 * check one further on the freelist than the count allows.
1472 for (i = sp->slab_refcnt; i <= chunks; i++) {
1473 uint_t ndx;
1475 dprintf(("bcp is %p\n", bcp));
1477 if (bcp == NULL) {
1478 if (i == chunks)
1479 break;
1480 mdb_warn(
1481 "slab %p in cache %p freelist too short by %d\n",
1482 sp, addr, chunks - i);
1483 break;
1486 if (cp->cache_flags & KMF_HASH) {
1487 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1488 mdb_warn("failed to read bufctl ptr at %p",
1489 bcp);
1490 break;
1492 buf = bc.bc_addr;
1493 } else {
1495 * Otherwise the buffer is (or should be) in the slab
1496 * that we've read in; determine its offset in the
1497 * slab, validate that it's not corrupt, and add to
1498 * our base address to find the umem_bufctl_t. (Note
1499 * that we don't need to add the size of the bufctl
1500 * to our offset calculation because of the slop that's
1501 * allocated for the buffer at ubase.)
1503 uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1505 if (offs > chunks * chunksize) {
1506 mdb_warn("found corrupt bufctl ptr %p"
1507 " in slab %p in cache %p\n", bcp,
1508 wsp->walk_addr, addr);
1509 break;
1512 bc = *((kmem_bufctl_t *)((uintptr_t)ubase + offs));
1513 buf = KMEM_BUF(cp, bcp);
1516 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1518 if (ndx > slabsize / cp->cache_bufsize) {
1520 * This is very wrong; we have managed to find
1521 * a buffer in the slab which shouldn't
1522 * actually be here. Emit a warning, and
1523 * try to continue.
1525 mdb_warn("buf %p is out of range for "
1526 "slab %p, cache %p\n", buf, sp, addr);
1527 } else if (type & KM_ALLOCATED) {
1529 * we have found a buffer on the slab's freelist;
1530 * clear its entry
1532 valid[ndx] = 0;
1533 } else {
1535 * Report this freed buffer
1537 if (type & KM_BUFCTL) {
1538 ret = bufctl_walk_callback(cp, wsp,
1539 (uintptr_t)bcp);
1540 } else {
1541 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1543 if (ret != WALK_NEXT)
1544 return (ret);
1547 bcp = bc.bc_next;
1550 if (bcp != NULL) {
1551 dprintf(("slab %p in cache %p freelist too long (%p)\n",
1552 sp, addr, bcp));
1556 * If we are walking freed buffers, the loop above handled reporting
1557 * them.
1559 if (type & KM_FREE)
1560 return (WALK_NEXT);
1562 if (type & KM_BUFCTL) {
1563 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1564 "cache %p\n", addr);
1565 return (WALK_ERR);
1569 * Report allocated buffers, skipping buffers in the magazine layer.
1570 * We only get this far for small-slab caches.
1572 for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1573 buf = (char *)kbase + i * chunksize;
1575 if (!valid[i])
1576 continue; /* on slab freelist */
1578 if (magcnt > 0 &&
1579 bsearch(&buf, maglist, magcnt, sizeof (void *),
1580 addrcmp) != NULL)
1581 continue; /* in magazine layer */
1583 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1585 return (ret);
1588 void
1589 kmem_walk_fini(mdb_walk_state_t *wsp)
1591 kmem_walk_t *kmw = wsp->walk_data;
1592 uintptr_t chunksize;
1593 uintptr_t slabsize;
1595 if (kmw == NULL)
1596 return;
1598 if (kmw->kmw_maglist != NULL)
1599 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1601 chunksize = kmw->kmw_cp->cache_chunksize;
1602 slabsize = kmw->kmw_cp->cache_slabsize;
1604 if (kmw->kmw_valid != NULL)
1605 mdb_free(kmw->kmw_valid, slabsize / chunksize);
1606 if (kmw->kmw_ubase != NULL)
1607 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1609 mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1610 mdb_free(kmw, sizeof (kmem_walk_t));
1613 /*ARGSUSED*/
1614 static int
1615 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1618 * Buffers allocated from NOTOUCH caches can also show up as freed
1619 * memory in other caches. This can be a little confusing, so we
1620 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1621 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1623 if (c->cache_cflags & KMC_NOTOUCH)
1624 return (WALK_NEXT);
1626 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1627 wsp->walk_cbdata, addr) == -1)
1628 return (WALK_DONE);
1630 return (WALK_NEXT);
1633 #define KMEM_WALK_ALL(name, wsp) { \
1634 wsp->walk_data = (name); \
1635 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1636 return (WALK_ERR); \
1637 return (WALK_DONE); \
1641 kmem_walk_init(mdb_walk_state_t *wsp)
1643 if (wsp->walk_arg != NULL)
1644 wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1646 if (wsp->walk_addr == NULL)
1647 KMEM_WALK_ALL("kmem", wsp);
1648 return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1652 bufctl_walk_init(mdb_walk_state_t *wsp)
1654 if (wsp->walk_addr == NULL)
1655 KMEM_WALK_ALL("bufctl", wsp);
1656 return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1660 freemem_walk_init(mdb_walk_state_t *wsp)
1662 if (wsp->walk_addr == NULL)
1663 KMEM_WALK_ALL("freemem", wsp);
1664 return (kmem_walk_init_common(wsp, KM_FREE));
1668 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1670 if (wsp->walk_addr == NULL)
1671 KMEM_WALK_ALL("freemem_constructed", wsp);
1672 return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1676 freectl_walk_init(mdb_walk_state_t *wsp)
1678 if (wsp->walk_addr == NULL)
1679 KMEM_WALK_ALL("freectl", wsp);
1680 return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1684 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1686 if (wsp->walk_addr == NULL)
1687 KMEM_WALK_ALL("freectl_constructed", wsp);
1688 return (kmem_walk_init_common(wsp,
1689 KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1692 typedef struct bufctl_history_walk {
1693 void *bhw_next;
1694 kmem_cache_t *bhw_cache;
1695 kmem_slab_t *bhw_slab;
1696 hrtime_t bhw_timestamp;
1697 } bufctl_history_walk_t;
1700 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1702 bufctl_history_walk_t *bhw;
1703 kmem_bufctl_audit_t bc;
1704 kmem_bufctl_audit_t bcn;
1706 if (wsp->walk_addr == NULL) {
1707 mdb_warn("bufctl_history walk doesn't support global walks\n");
1708 return (WALK_ERR);
1711 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1712 mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1713 return (WALK_ERR);
1716 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1717 bhw->bhw_timestamp = 0;
1718 bhw->bhw_cache = bc.bc_cache;
1719 bhw->bhw_slab = bc.bc_slab;
1722 * sometimes the first log entry matches the base bufctl; in that
1723 * case, skip the base bufctl.
1725 if (bc.bc_lastlog != NULL &&
1726 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1727 bc.bc_addr == bcn.bc_addr &&
1728 bc.bc_cache == bcn.bc_cache &&
1729 bc.bc_slab == bcn.bc_slab &&
1730 bc.bc_timestamp == bcn.bc_timestamp &&
1731 bc.bc_thread == bcn.bc_thread)
1732 bhw->bhw_next = bc.bc_lastlog;
1733 else
1734 bhw->bhw_next = (void *)wsp->walk_addr;
1736 wsp->walk_addr = (uintptr_t)bc.bc_addr;
1737 wsp->walk_data = bhw;
1739 return (WALK_NEXT);
1743 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1745 bufctl_history_walk_t *bhw = wsp->walk_data;
1746 uintptr_t addr = (uintptr_t)bhw->bhw_next;
1747 uintptr_t baseaddr = wsp->walk_addr;
1748 kmem_bufctl_audit_t bc;
1750 if (addr == NULL)
1751 return (WALK_DONE);
1753 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1754 mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1755 return (WALK_ERR);
1759 * The bufctl is only valid if the address, cache, and slab are
1760 * correct. We also check that the timestamp is decreasing, to
1761 * prevent infinite loops.
1763 if ((uintptr_t)bc.bc_addr != baseaddr ||
1764 bc.bc_cache != bhw->bhw_cache ||
1765 bc.bc_slab != bhw->bhw_slab ||
1766 (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1767 return (WALK_DONE);
1769 bhw->bhw_next = bc.bc_lastlog;
1770 bhw->bhw_timestamp = bc.bc_timestamp;
1772 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1775 void
1776 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1778 bufctl_history_walk_t *bhw = wsp->walk_data;
1780 mdb_free(bhw, sizeof (*bhw));
1783 typedef struct kmem_log_walk {
1784 kmem_bufctl_audit_t *klw_base;
1785 kmem_bufctl_audit_t **klw_sorted;
1786 kmem_log_header_t klw_lh;
1787 size_t klw_size;
1788 size_t klw_maxndx;
1789 size_t klw_ndx;
1790 } kmem_log_walk_t;
1793 kmem_log_walk_init(mdb_walk_state_t *wsp)
1795 uintptr_t lp = wsp->walk_addr;
1796 kmem_log_walk_t *klw;
1797 kmem_log_header_t *lhp;
1798 int maxndx, i, j, k;
1801 * By default (global walk), walk the kmem_transaction_log. Otherwise
1802 * read the log whose kmem_log_header_t is stored at walk_addr.
1804 if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1805 mdb_warn("failed to read 'kmem_transaction_log'");
1806 return (WALK_ERR);
1809 if (lp == NULL) {
1810 mdb_warn("log is disabled\n");
1811 return (WALK_ERR);
1814 klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1815 lhp = &klw->klw_lh;
1817 if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1818 mdb_warn("failed to read log header at %p", lp);
1819 mdb_free(klw, sizeof (kmem_log_walk_t));
1820 return (WALK_ERR);
1823 klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1824 klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1825 maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1827 if (mdb_vread(klw->klw_base, klw->klw_size,
1828 (uintptr_t)lhp->lh_base) == -1) {
1829 mdb_warn("failed to read log at base %p", lhp->lh_base);
1830 mdb_free(klw->klw_base, klw->klw_size);
1831 mdb_free(klw, sizeof (kmem_log_walk_t));
1832 return (WALK_ERR);
1835 klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1836 sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1838 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1839 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1840 ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1842 for (j = 0; j < maxndx; j++)
1843 klw->klw_sorted[k++] = &chunk[j];
1846 qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1847 (int(*)(const void *, const void *))bufctlcmp);
1849 klw->klw_maxndx = k;
1850 wsp->walk_data = klw;
1852 return (WALK_NEXT);
1856 kmem_log_walk_step(mdb_walk_state_t *wsp)
1858 kmem_log_walk_t *klw = wsp->walk_data;
1859 kmem_bufctl_audit_t *bcp;
1861 if (klw->klw_ndx == klw->klw_maxndx)
1862 return (WALK_DONE);
1864 bcp = klw->klw_sorted[klw->klw_ndx++];
1866 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1867 (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1870 void
1871 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1873 kmem_log_walk_t *klw = wsp->walk_data;
1875 mdb_free(klw->klw_base, klw->klw_size);
1876 mdb_free(klw->klw_sorted, klw->klw_maxndx *
1877 sizeof (kmem_bufctl_audit_t *));
1878 mdb_free(klw, sizeof (kmem_log_walk_t));
1881 typedef struct allocdby_bufctl {
1882 uintptr_t abb_addr;
1883 hrtime_t abb_ts;
1884 } allocdby_bufctl_t;
1886 typedef struct allocdby_walk {
1887 const char *abw_walk;
1888 uintptr_t abw_thread;
1889 size_t abw_nbufs;
1890 size_t abw_size;
1891 allocdby_bufctl_t *abw_buf;
1892 size_t abw_ndx;
1893 } allocdby_walk_t;
1896 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1897 allocdby_walk_t *abw)
1899 if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1900 return (WALK_NEXT);
1902 if (abw->abw_nbufs == abw->abw_size) {
1903 allocdby_bufctl_t *buf;
1904 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1906 buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1908 bcopy(abw->abw_buf, buf, oldsize);
1909 mdb_free(abw->abw_buf, oldsize);
1911 abw->abw_size <<= 1;
1912 abw->abw_buf = buf;
1915 abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1916 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1917 abw->abw_nbufs++;
1919 return (WALK_NEXT);
1922 /*ARGSUSED*/
1924 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1926 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1927 abw, addr) == -1) {
1928 mdb_warn("couldn't walk bufctl for cache %p", addr);
1929 return (WALK_DONE);
1932 return (WALK_NEXT);
1935 static int
1936 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1938 if (lhs->abb_ts < rhs->abb_ts)
1939 return (1);
1940 if (lhs->abb_ts > rhs->abb_ts)
1941 return (-1);
1942 return (0);
1945 static int
1946 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1948 allocdby_walk_t *abw;
1950 if (wsp->walk_addr == NULL) {
1951 mdb_warn("allocdby walk doesn't support global walks\n");
1952 return (WALK_ERR);
1955 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1957 abw->abw_thread = wsp->walk_addr;
1958 abw->abw_walk = walk;
1959 abw->abw_size = 128; /* something reasonable */
1960 abw->abw_buf =
1961 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1963 wsp->walk_data = abw;
1965 if (mdb_walk("kmem_cache",
1966 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1967 mdb_warn("couldn't walk kmem_cache");
1968 allocdby_walk_fini(wsp);
1969 return (WALK_ERR);
1972 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1973 (int(*)(const void *, const void *))allocdby_cmp);
1975 return (WALK_NEXT);
1979 allocdby_walk_init(mdb_walk_state_t *wsp)
1981 return (allocdby_walk_init_common(wsp, "bufctl"));
1985 freedby_walk_init(mdb_walk_state_t *wsp)
1987 return (allocdby_walk_init_common(wsp, "freectl"));
1991 allocdby_walk_step(mdb_walk_state_t *wsp)
1993 allocdby_walk_t *abw = wsp->walk_data;
1994 kmem_bufctl_audit_t bc;
1995 uintptr_t addr;
1997 if (abw->abw_ndx == abw->abw_nbufs)
1998 return (WALK_DONE);
2000 addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
2002 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2003 mdb_warn("couldn't read bufctl at %p", addr);
2004 return (WALK_DONE);
2007 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
2010 void
2011 allocdby_walk_fini(mdb_walk_state_t *wsp)
2013 allocdby_walk_t *abw = wsp->walk_data;
2015 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
2016 mdb_free(abw, sizeof (allocdby_walk_t));
2019 /*ARGSUSED*/
2021 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
2023 char c[MDB_SYM_NAMLEN];
2024 GElf_Sym sym;
2025 int i;
2027 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
2028 for (i = 0; i < bcp->bc_depth; i++) {
2029 if (mdb_lookup_by_addr(bcp->bc_stack[i],
2030 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2031 continue;
2032 if (strncmp(c, "kmem_", 5) == 0)
2033 continue;
2034 mdb_printf("%s+0x%lx",
2035 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
2036 break;
2038 mdb_printf("\n");
2040 return (WALK_NEXT);
2043 static int
2044 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
2046 if (!(flags & DCMD_ADDRSPEC))
2047 return (DCMD_USAGE);
2049 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2051 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
2052 mdb_warn("can't walk '%s' for %p", w, addr);
2053 return (DCMD_ERR);
2056 return (DCMD_OK);
2059 /*ARGSUSED*/
2061 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2063 return (allocdby_common(addr, flags, "allocdby"));
2066 /*ARGSUSED*/
2068 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2070 return (allocdby_common(addr, flags, "freedby"));
2074 * Return a string describing the address in relation to the given thread's
2075 * stack.
2077 * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2079 * - If the address is above the stack pointer, return an empty string
2080 * signifying that the address is active.
2082 * - If the address is below the stack pointer, and the thread is not on proc,
2083 * return " (below sp)".
2085 * - If the address is below the stack pointer, and the thread is on proc,
2086 * return " (possibly below sp)". Depending on context, we may or may not
2087 * have an accurate t_sp.
2089 static const char *
2090 stack_active(const kthread_t *t, uintptr_t addr)
2092 uintptr_t panicstk;
2093 GElf_Sym sym;
2095 if (t->t_state == TS_FREE)
2096 return (" (inactive interrupt thread)");
2099 * Check to see if we're on the panic stack. If so, ignore t_sp, as it
2100 * no longer relates to the thread's real stack.
2102 if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
2103 panicstk = (uintptr_t)sym.st_value;
2105 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
2106 return ("");
2109 if (addr >= t->t_sp + STACK_BIAS)
2110 return ("");
2112 if (t->t_state == TS_ONPROC)
2113 return (" (possibly below sp)");
2115 return (" (below sp)");
2119 * Additional state for the kmem and vmem ::whatis handlers
2121 typedef struct whatis_info {
2122 mdb_whatis_t *wi_w;
2123 const kmem_cache_t *wi_cache;
2124 const vmem_t *wi_vmem;
2125 vmem_t *wi_msb_arena;
2126 size_t wi_slab_size;
2127 uint_t wi_slab_found;
2128 uint_t wi_kmem_lite_count;
2129 uint_t wi_freemem;
2130 } whatis_info_t;
2132 /* call one of our dcmd functions with "-v" and the provided address */
2133 static void
2134 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2136 mdb_arg_t a;
2137 a.a_type = MDB_TYPE_STRING;
2138 a.a_un.a_str = "-v";
2140 mdb_printf(":\n");
2141 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2144 static void
2145 whatis_print_kmf_lite(uintptr_t btaddr, size_t count)
2147 #define KMEM_LITE_MAX 16
2148 pc_t callers[KMEM_LITE_MAX];
2149 pc_t uninit = (pc_t)KMEM_UNINITIALIZED_PATTERN;
2151 kmem_buftag_t bt;
2152 intptr_t stat;
2153 const char *plural = "";
2154 int i;
2156 /* validate our arguments and read in the buftag */
2157 if (count == 0 || count > KMEM_LITE_MAX ||
2158 mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2159 return;
2161 /* validate the buffer state and read in the callers */
2162 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2164 if (stat != KMEM_BUFTAG_ALLOC || stat != KMEM_BUFTAG_FREE ||
2165 mdb_vread(callers, count * sizeof (pc_t),
2166 btaddr + offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2167 return;
2169 /* If there aren't any filled in callers, bail */
2170 if (callers[0] == uninit)
2171 return;
2173 plural = (callers[1] == uninit) ? "" : "s";
2175 /* Everything's done and checked; print them out */
2176 mdb_printf(":\n");
2178 mdb_inc_indent(8);
2179 mdb_printf("recent caller%s: %a", plural, callers[0]);
2180 for (i = 1; i < count; i++) {
2181 if (callers[i] == uninit)
2182 break;
2183 mdb_printf(", %a", callers[i]);
2185 mdb_dec_indent(8);
2188 static void
2189 whatis_print_kmem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2190 uintptr_t baddr)
2192 mdb_whatis_t *w = wi->wi_w;
2194 const kmem_cache_t *cp = wi->wi_cache;
2195 /* LINTED pointer cast may result in improper alignment */
2196 uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr);
2197 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2198 int call_printer = (!quiet && (cp->cache_flags & KMF_AUDIT));
2200 mdb_whatis_report_object(w, maddr, addr, "");
2202 if (baddr != 0 && !call_printer)
2203 mdb_printf("bufctl %p ", baddr);
2205 mdb_printf("%s from %s",
2206 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2208 if (baddr != 0 && call_printer) {
2209 whatis_call_printer(bufctl, baddr);
2210 return;
2213 /* for KMF_LITE caches, try to print out the previous callers */
2214 if (!quiet && (cp->cache_flags & KMF_LITE))
2215 whatis_print_kmf_lite(btaddr, wi->wi_kmem_lite_count);
2217 mdb_printf("\n");
2220 /*ARGSUSED*/
2221 static int
2222 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2224 mdb_whatis_t *w = wi->wi_w;
2226 uintptr_t cur;
2227 size_t size = wi->wi_cache->cache_bufsize;
2229 while (mdb_whatis_match(w, addr, size, &cur))
2230 whatis_print_kmem(wi, cur, addr, NULL);
2232 return (WHATIS_WALKRET(w));
2235 /*ARGSUSED*/
2236 static int
2237 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_info_t *wi)
2239 mdb_whatis_t *w = wi->wi_w;
2241 uintptr_t cur;
2242 uintptr_t addr = (uintptr_t)bcp->bc_addr;
2243 size_t size = wi->wi_cache->cache_bufsize;
2245 while (mdb_whatis_match(w, addr, size, &cur))
2246 whatis_print_kmem(wi, cur, addr, baddr);
2248 return (WHATIS_WALKRET(w));
2251 static int
2252 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2254 mdb_whatis_t *w = wi->wi_w;
2256 size_t size = vs->vs_end - vs->vs_start;
2257 uintptr_t cur;
2259 /* We're not interested in anything but alloc and free segments */
2260 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2261 return (WALK_NEXT);
2263 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2264 mdb_whatis_report_object(w, cur, vs->vs_start, "");
2267 * If we're not printing it seperately, provide the vmem_seg
2268 * pointer if it has a stack trace.
2270 if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2271 (!(mdb_whatis_flags(w) & WHATIS_BUFCTL) ||
2272 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2273 mdb_printf("vmem_seg %p ", addr);
2276 mdb_printf("%s from the %s vmem arena",
2277 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2278 wi->wi_vmem->vm_name);
2280 if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2281 whatis_call_printer(vmem_seg, addr);
2282 else
2283 mdb_printf("\n");
2286 return (WHATIS_WALKRET(w));
2289 static int
2290 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2292 mdb_whatis_t *w = wi->wi_w;
2293 const char *nm = vmem->vm_name;
2295 int identifier = ((vmem->vm_cflags & VMC_IDENTIFIER) != 0);
2296 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2298 if (identifier != idspace)
2299 return (WALK_NEXT);
2301 wi->wi_vmem = vmem;
2303 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2304 mdb_printf("Searching vmem arena %s...\n", nm);
2306 if (mdb_pwalk("vmem_seg",
2307 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2308 mdb_warn("can't walk vmem_seg for %p", addr);
2309 return (WALK_NEXT);
2312 return (WHATIS_WALKRET(w));
2315 /*ARGSUSED*/
2316 static int
2317 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_info_t *wi)
2319 mdb_whatis_t *w = wi->wi_w;
2321 /* It must overlap with the slab data, or it's not interesting */
2322 if (mdb_whatis_overlaps(w,
2323 (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2324 wi->wi_slab_found++;
2325 return (WALK_DONE);
2327 return (WALK_NEXT);
2330 static int
2331 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2333 mdb_whatis_t *w = wi->wi_w;
2335 char *walk, *freewalk;
2336 mdb_walk_cb_t func;
2337 int do_bufctl;
2339 int identifier = ((c->cache_flags & KMC_IDENTIFIER) != 0);
2340 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2342 if (identifier != idspace)
2343 return (WALK_NEXT);
2345 /* Override the '-b' flag as necessary */
2346 if (!(c->cache_flags & KMF_HASH))
2347 do_bufctl = FALSE; /* no bufctls to walk */
2348 else if (c->cache_flags & KMF_AUDIT)
2349 do_bufctl = TRUE; /* we always want debugging info */
2350 else
2351 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2353 if (do_bufctl) {
2354 walk = "bufctl";
2355 freewalk = "freectl";
2356 func = (mdb_walk_cb_t)whatis_walk_bufctl;
2357 } else {
2358 walk = "kmem";
2359 freewalk = "freemem";
2360 func = (mdb_walk_cb_t)whatis_walk_kmem;
2363 wi->wi_cache = c;
2365 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2366 mdb_printf("Searching %s...\n", c->cache_name);
2369 * If more then two buffers live on each slab, figure out if we're
2370 * interested in anything in any slab before doing the more expensive
2371 * kmem/freemem (bufctl/freectl) walkers.
2373 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2374 if (!(c->cache_flags & KMF_HASH))
2375 wi->wi_slab_size -= sizeof (kmem_slab_t);
2377 if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2378 wi->wi_slab_found = 0;
2379 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2380 addr) == -1) {
2381 mdb_warn("can't find kmem_slab walker");
2382 return (WALK_DONE);
2384 if (wi->wi_slab_found == 0)
2385 return (WALK_NEXT);
2388 wi->wi_freemem = FALSE;
2389 if (mdb_pwalk(walk, func, wi, addr) == -1) {
2390 mdb_warn("can't find %s walker", walk);
2391 return (WALK_DONE);
2394 if (mdb_whatis_done(w))
2395 return (WALK_DONE);
2398 * We have searched for allocated memory; now search for freed memory.
2400 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2401 mdb_printf("Searching %s for free memory...\n", c->cache_name);
2403 wi->wi_freemem = TRUE;
2404 if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2405 mdb_warn("can't find %s walker", freewalk);
2406 return (WALK_DONE);
2409 return (WHATIS_WALKRET(w));
2412 static int
2413 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2415 if (c->cache_arena == wi->wi_msb_arena ||
2416 (c->cache_cflags & KMC_NOTOUCH))
2417 return (WALK_NEXT);
2419 return (whatis_walk_cache(addr, c, wi));
2422 static int
2423 whatis_walk_metadata(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2425 if (c->cache_arena != wi->wi_msb_arena)
2426 return (WALK_NEXT);
2428 return (whatis_walk_cache(addr, c, wi));
2431 static int
2432 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2434 if (c->cache_arena == wi->wi_msb_arena ||
2435 !(c->cache_cflags & KMC_NOTOUCH))
2436 return (WALK_NEXT);
2438 return (whatis_walk_cache(addr, c, wi));
2441 static int
2442 whatis_walk_thread(uintptr_t addr, const kthread_t *t, mdb_whatis_t *w)
2444 uintptr_t cur;
2445 uintptr_t saddr;
2446 size_t size;
2449 * Often, one calls ::whatis on an address from a thread structure.
2450 * We use this opportunity to short circuit this case...
2452 while (mdb_whatis_match(w, addr, sizeof (kthread_t), &cur))
2453 mdb_whatis_report_object(w, cur, addr,
2454 "allocated as a thread structure\n");
2457 * Now check the stack
2459 if (t->t_stkbase == NULL)
2460 return (WALK_NEXT);
2463 * This assumes that t_stk is the end of the stack, but it's really
2464 * only the initial stack pointer for the thread. Arguments to the
2465 * initial procedure, SA(MINFRAME), etc. are all after t_stk. So
2466 * that 't->t_stk::whatis' reports "part of t's stack", we include
2467 * t_stk in the range (the "+ 1", below), but the kernel should
2468 * really include the full stack bounds where we can find it.
2470 saddr = (uintptr_t)t->t_stkbase;
2471 size = (uintptr_t)t->t_stk - saddr + 1;
2472 while (mdb_whatis_match(w, saddr, size, &cur))
2473 mdb_whatis_report_object(w, cur, cur,
2474 "in thread %p's stack%s\n", addr, stack_active(t, cur));
2476 return (WHATIS_WALKRET(w));
2479 static void
2480 whatis_modctl_match(mdb_whatis_t *w, const char *name,
2481 uintptr_t base, size_t size, const char *where)
2483 uintptr_t cur;
2486 * Since we're searching for addresses inside a module, we report
2487 * them as symbols.
2489 while (mdb_whatis_match(w, base, size, &cur))
2490 mdb_whatis_report_address(w, cur, "in %s's %s\n", name, where);
2493 static int
2494 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, mdb_whatis_t *w)
2496 char name[MODMAXNAMELEN];
2497 struct module mod;
2498 Shdr shdr;
2500 if (m->mod_mp == NULL)
2501 return (WALK_NEXT);
2503 if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
2504 mdb_warn("couldn't read modctl %p's module", addr);
2505 return (WALK_NEXT);
2508 if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2509 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2511 whatis_modctl_match(w, name,
2512 (uintptr_t)mod.text, mod.text_size, "text segment");
2513 whatis_modctl_match(w, name,
2514 (uintptr_t)mod.data, mod.data_size, "data segment");
2515 whatis_modctl_match(w, name,
2516 (uintptr_t)mod.bss, mod.bss_size, "bss segment");
2518 if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2519 mdb_warn("couldn't read symbol header for %p's module", addr);
2520 return (WALK_NEXT);
2523 whatis_modctl_match(w, name,
2524 (uintptr_t)mod.symtbl, mod.nsyms * shdr.sh_entsize, "symtab");
2525 whatis_modctl_match(w, name,
2526 (uintptr_t)mod.symspace, mod.symsize, "symtab");
2528 return (WHATIS_WALKRET(w));
2531 /*ARGSUSED*/
2532 static int
2533 whatis_walk_memseg(uintptr_t addr, const struct memseg *seg, mdb_whatis_t *w)
2535 uintptr_t cur;
2537 uintptr_t base = (uintptr_t)seg->pages;
2538 size_t size = (uintptr_t)seg->epages - base;
2540 while (mdb_whatis_match(w, base, size, &cur)) {
2541 /* round our found pointer down to the page_t base. */
2542 size_t offset = (cur - base) % sizeof (page_t);
2544 mdb_whatis_report_object(w, cur, cur - offset,
2545 "allocated as a page structure\n");
2548 return (WHATIS_WALKRET(w));
2551 /*ARGSUSED*/
2552 static int
2553 whatis_run_modules(mdb_whatis_t *w, void *arg)
2555 if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, w) == -1) {
2556 mdb_warn("couldn't find modctl walker");
2557 return (1);
2559 return (0);
2562 /*ARGSUSED*/
2563 static int
2564 whatis_run_threads(mdb_whatis_t *w, void *ignored)
2567 * Now search all thread stacks. Yes, this is a little weak; we
2568 * can save a lot of work by first checking to see if the
2569 * address is in segkp vs. segkmem. But hey, computers are
2570 * fast.
2572 if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, w) == -1) {
2573 mdb_warn("couldn't find thread walker");
2574 return (1);
2576 return (0);
2579 /*ARGSUSED*/
2580 static int
2581 whatis_run_pages(mdb_whatis_t *w, void *ignored)
2583 if (mdb_walk("memseg", (mdb_walk_cb_t)whatis_walk_memseg, w) == -1) {
2584 mdb_warn("couldn't find memseg walker");
2585 return (1);
2587 return (0);
2590 /*ARGSUSED*/
2591 static int
2592 whatis_run_kmem(mdb_whatis_t *w, void *ignored)
2594 whatis_info_t wi;
2596 bzero(&wi, sizeof (wi));
2597 wi.wi_w = w;
2599 if (mdb_readvar(&wi.wi_msb_arena, "kmem_msb_arena") == -1)
2600 mdb_warn("unable to readvar \"kmem_msb_arena\"");
2602 if (mdb_readvar(&wi.wi_kmem_lite_count,
2603 "kmem_lite_count") == -1 || wi.wi_kmem_lite_count > 16)
2604 wi.wi_kmem_lite_count = 0;
2607 * We process kmem caches in the following order:
2609 * non-KMC_NOTOUCH, non-metadata (typically the most interesting)
2610 * metadata (can be huge with KMF_AUDIT)
2611 * KMC_NOTOUCH, non-metadata (see kmem_walk_all())
2613 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2614 &wi) == -1 ||
2615 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2616 &wi) == -1 ||
2617 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2618 &wi) == -1) {
2619 mdb_warn("couldn't find kmem_cache walker");
2620 return (1);
2622 return (0);
2625 /*ARGSUSED*/
2626 static int
2627 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2629 whatis_info_t wi;
2631 bzero(&wi, sizeof (wi));
2632 wi.wi_w = w;
2634 if (mdb_walk("vmem_postfix",
2635 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2636 mdb_warn("couldn't find vmem_postfix walker");
2637 return (1);
2639 return (0);
2642 typedef struct kmem_log_cpu {
2643 uintptr_t kmc_low;
2644 uintptr_t kmc_high;
2645 } kmem_log_cpu_t;
2647 typedef struct kmem_log_data {
2648 uintptr_t kmd_addr;
2649 kmem_log_cpu_t *kmd_cpu;
2650 } kmem_log_data_t;
2653 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2654 kmem_log_data_t *kmd)
2656 int i;
2657 kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2658 size_t bufsize;
2660 for (i = 0; i < NCPU; i++) {
2661 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2662 break;
2665 if (kmd->kmd_addr) {
2666 if (b->bc_cache == NULL)
2667 return (WALK_NEXT);
2669 if (mdb_vread(&bufsize, sizeof (bufsize),
2670 (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2671 mdb_warn(
2672 "failed to read cache_bufsize for cache at %p",
2673 b->bc_cache);
2674 return (WALK_ERR);
2677 if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2678 kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2679 return (WALK_NEXT);
2682 if (i == NCPU)
2683 mdb_printf(" ");
2684 else
2685 mdb_printf("%3d", i);
2687 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2688 b->bc_timestamp, b->bc_thread);
2690 return (WALK_NEXT);
2693 /*ARGSUSED*/
2695 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2697 kmem_log_header_t lh;
2698 kmem_cpu_log_header_t clh;
2699 uintptr_t lhp, clhp;
2700 int ncpus;
2701 uintptr_t *cpu;
2702 GElf_Sym sym;
2703 kmem_log_cpu_t *kmc;
2704 int i;
2705 kmem_log_data_t kmd;
2706 uint_t opt_b = FALSE;
2708 if (mdb_getopts(argc, argv,
2709 'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2710 return (DCMD_USAGE);
2712 if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2713 mdb_warn("failed to read 'kmem_transaction_log'");
2714 return (DCMD_ERR);
2717 if (lhp == NULL) {
2718 mdb_warn("no kmem transaction log\n");
2719 return (DCMD_ERR);
2722 mdb_readvar(&ncpus, "ncpus");
2724 if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2725 mdb_warn("failed to read log header at %p", lhp);
2726 return (DCMD_ERR);
2729 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2731 cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2733 if (mdb_lookup_by_name("cpu", &sym) == -1) {
2734 mdb_warn("couldn't find 'cpu' array");
2735 return (DCMD_ERR);
2738 if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2739 mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2740 NCPU * sizeof (uintptr_t), sym.st_size);
2741 return (DCMD_ERR);
2744 if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2745 mdb_warn("failed to read cpu array at %p", sym.st_value);
2746 return (DCMD_ERR);
2749 kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2750 kmd.kmd_addr = NULL;
2751 kmd.kmd_cpu = kmc;
2753 for (i = 0; i < NCPU; i++) {
2755 if (cpu[i] == NULL)
2756 continue;
2758 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2759 mdb_warn("cannot read cpu %d's log header at %p",
2760 i, clhp);
2761 return (DCMD_ERR);
2764 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2765 (uintptr_t)lh.lh_base;
2766 kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2768 clhp += sizeof (kmem_cpu_log_header_t);
2771 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2772 "TIMESTAMP", "THREAD");
2775 * If we have been passed an address, print out only log entries
2776 * corresponding to that address. If opt_b is specified, then interpret
2777 * the address as a bufctl.
2779 if (flags & DCMD_ADDRSPEC) {
2780 kmem_bufctl_audit_t b;
2782 if (opt_b) {
2783 kmd.kmd_addr = addr;
2784 } else {
2785 if (mdb_vread(&b,
2786 sizeof (kmem_bufctl_audit_t), addr) == -1) {
2787 mdb_warn("failed to read bufctl at %p", addr);
2788 return (DCMD_ERR);
2791 (void) kmem_log_walk(addr, &b, &kmd);
2793 return (DCMD_OK);
2797 if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2798 mdb_warn("can't find kmem log walker");
2799 return (DCMD_ERR);
2802 return (DCMD_OK);
2805 typedef struct bufctl_history_cb {
2806 int bhc_flags;
2807 int bhc_argc;
2808 const mdb_arg_t *bhc_argv;
2809 int bhc_ret;
2810 } bufctl_history_cb_t;
2812 /*ARGSUSED*/
2813 static int
2814 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2816 bufctl_history_cb_t *bhc = arg;
2818 bhc->bhc_ret =
2819 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2821 bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2823 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2826 void
2827 bufctl_help(void)
2829 mdb_printf("%s",
2830 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2831 mdb_dec_indent(2);
2832 mdb_printf("%<b>OPTIONS%</b>\n");
2833 mdb_inc_indent(2);
2834 mdb_printf("%s",
2835 " -v Display the full content of the bufctl, including its stack trace\n"
2836 " -h retrieve the bufctl's transaction history, if available\n"
2837 " -a addr\n"
2838 " filter out bufctls not involving the buffer at addr\n"
2839 " -c caller\n"
2840 " filter out bufctls without the function/PC in their stack trace\n"
2841 " -e earliest\n"
2842 " filter out bufctls timestamped before earliest\n"
2843 " -l latest\n"
2844 " filter out bufctls timestamped after latest\n"
2845 " -t thread\n"
2846 " filter out bufctls not involving thread\n");
2850 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2852 kmem_bufctl_audit_t bc;
2853 uint_t verbose = FALSE;
2854 uint_t history = FALSE;
2855 uint_t in_history = FALSE;
2856 uintptr_t caller = NULL, thread = NULL;
2857 uintptr_t laddr, haddr, baddr = NULL;
2858 hrtime_t earliest = 0, latest = 0;
2859 int i, depth;
2860 char c[MDB_SYM_NAMLEN];
2861 GElf_Sym sym;
2863 if (mdb_getopts(argc, argv,
2864 'v', MDB_OPT_SETBITS, TRUE, &verbose,
2865 'h', MDB_OPT_SETBITS, TRUE, &history,
2866 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */
2867 'c', MDB_OPT_UINTPTR, &caller,
2868 't', MDB_OPT_UINTPTR, &thread,
2869 'e', MDB_OPT_UINT64, &earliest,
2870 'l', MDB_OPT_UINT64, &latest,
2871 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2872 return (DCMD_USAGE);
2874 if (!(flags & DCMD_ADDRSPEC))
2875 return (DCMD_USAGE);
2877 if (in_history && !history)
2878 return (DCMD_USAGE);
2880 if (history && !in_history) {
2881 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2882 UM_SLEEP | UM_GC);
2883 bufctl_history_cb_t bhc;
2885 nargv[0].a_type = MDB_TYPE_STRING;
2886 nargv[0].a_un.a_str = "-H"; /* prevent recursion */
2888 for (i = 0; i < argc; i++)
2889 nargv[i + 1] = argv[i];
2892 * When in history mode, we treat each element as if it
2893 * were in a seperate loop, so that the headers group
2894 * bufctls with similar histories.
2896 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2897 bhc.bhc_argc = argc + 1;
2898 bhc.bhc_argv = nargv;
2899 bhc.bhc_ret = DCMD_OK;
2901 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2902 addr) == -1) {
2903 mdb_warn("unable to walk bufctl_history");
2904 return (DCMD_ERR);
2907 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2908 mdb_printf("\n");
2910 return (bhc.bhc_ret);
2913 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2914 if (verbose) {
2915 mdb_printf("%16s %16s %16s %16s\n"
2916 "%<u>%16s %16s %16s %16s%</u>\n",
2917 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2918 "", "CACHE", "LASTLOG", "CONTENTS");
2919 } else {
2920 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2921 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2925 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2926 mdb_warn("couldn't read bufctl at %p", addr);
2927 return (DCMD_ERR);
2931 * Guard against bogus bc_depth in case the bufctl is corrupt or
2932 * the address does not really refer to a bufctl.
2934 depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2936 if (caller != NULL) {
2937 laddr = caller;
2938 haddr = caller + sizeof (caller);
2940 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2941 &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2943 * We were provided an exact symbol value; any
2944 * address in the function is valid.
2946 laddr = (uintptr_t)sym.st_value;
2947 haddr = (uintptr_t)sym.st_value + sym.st_size;
2950 for (i = 0; i < depth; i++)
2951 if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2952 break;
2954 if (i == depth)
2955 return (DCMD_OK);
2958 if (thread != NULL && (uintptr_t)bc.bc_thread != thread)
2959 return (DCMD_OK);
2961 if (earliest != 0 && bc.bc_timestamp < earliest)
2962 return (DCMD_OK);
2964 if (latest != 0 && bc.bc_timestamp > latest)
2965 return (DCMD_OK);
2967 if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2968 return (DCMD_OK);
2970 if (flags & DCMD_PIPE_OUT) {
2971 mdb_printf("%#lr\n", addr);
2972 return (DCMD_OK);
2975 if (verbose) {
2976 mdb_printf(
2977 "%<b>%16p%</b> %16p %16llx %16p\n"
2978 "%16s %16p %16p %16p\n",
2979 addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2980 "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2982 mdb_inc_indent(17);
2983 for (i = 0; i < depth; i++)
2984 mdb_printf("%a\n", bc.bc_stack[i]);
2985 mdb_dec_indent(17);
2986 mdb_printf("\n");
2987 } else {
2988 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
2989 bc.bc_timestamp, bc.bc_thread);
2991 for (i = 0; i < depth; i++) {
2992 if (mdb_lookup_by_addr(bc.bc_stack[i],
2993 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2994 continue;
2995 if (strncmp(c, "kmem_", 5) == 0)
2996 continue;
2997 mdb_printf(" %a\n", bc.bc_stack[i]);
2998 break;
3001 if (i >= depth)
3002 mdb_printf("\n");
3005 return (DCMD_OK);
3008 typedef struct kmem_verify {
3009 uint64_t *kmv_buf; /* buffer to read cache contents into */
3010 size_t kmv_size; /* number of bytes in kmv_buf */
3011 int kmv_corruption; /* > 0 if corruption found. */
3012 int kmv_besilent; /* report actual corruption sites */
3013 struct kmem_cache kmv_cache; /* the cache we're operating on */
3014 } kmem_verify_t;
3017 * verify_pattern()
3018 * verify that buf is filled with the pattern pat.
3020 static int64_t
3021 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
3023 /*LINTED*/
3024 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
3025 uint64_t *buf;
3027 for (buf = buf_arg; buf < bufend; buf++)
3028 if (*buf != pat)
3029 return ((uintptr_t)buf - (uintptr_t)buf_arg);
3030 return (-1);
3034 * verify_buftag()
3035 * verify that btp->bt_bxstat == (bcp ^ pat)
3037 static int
3038 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
3040 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
3044 * verify_free()
3045 * verify the integrity of a free block of memory by checking
3046 * that it is filled with 0xdeadbeef and that its buftag is sane.
3048 /*ARGSUSED1*/
3049 static int
3050 verify_free(uintptr_t addr, const void *data, void *private)
3052 kmem_verify_t *kmv = (kmem_verify_t *)private;
3053 uint64_t *buf = kmv->kmv_buf; /* buf to validate */
3054 int64_t corrupt; /* corruption offset */
3055 kmem_buftag_t *buftagp; /* ptr to buftag */
3056 kmem_cache_t *cp = &kmv->kmv_cache;
3057 int besilent = kmv->kmv_besilent;
3059 /*LINTED*/
3060 buftagp = KMEM_BUFTAG(cp, buf);
3063 * Read the buffer to check.
3065 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3066 if (!besilent)
3067 mdb_warn("couldn't read %p", addr);
3068 return (WALK_NEXT);
3071 if ((corrupt = verify_pattern(buf, cp->cache_verify,
3072 KMEM_FREE_PATTERN)) >= 0) {
3073 if (!besilent)
3074 mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3075 addr, (uintptr_t)addr + corrupt);
3076 goto corrupt;
3079 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3080 * the first bytes of the buffer, hence we cannot check for red
3081 * zone corruption.
3083 if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
3084 buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
3085 if (!besilent)
3086 mdb_printf("buffer %p (free) seems to "
3087 "have a corrupt redzone pattern\n", addr);
3088 goto corrupt;
3092 * confirm bufctl pointer integrity.
3094 if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
3095 if (!besilent)
3096 mdb_printf("buffer %p (free) has a corrupt "
3097 "buftag\n", addr);
3098 goto corrupt;
3101 return (WALK_NEXT);
3102 corrupt:
3103 kmv->kmv_corruption++;
3104 return (WALK_NEXT);
3108 * verify_alloc()
3109 * Verify that the buftag of an allocated buffer makes sense with respect
3110 * to the buffer.
3112 /*ARGSUSED1*/
3113 static int
3114 verify_alloc(uintptr_t addr, const void *data, void *private)
3116 kmem_verify_t *kmv = (kmem_verify_t *)private;
3117 kmem_cache_t *cp = &kmv->kmv_cache;
3118 uint64_t *buf = kmv->kmv_buf; /* buf to validate */
3119 /*LINTED*/
3120 kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3121 uint32_t *ip = (uint32_t *)buftagp;
3122 uint8_t *bp = (uint8_t *)buf;
3123 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */
3124 int besilent = kmv->kmv_besilent;
3127 * Read the buffer to check.
3129 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3130 if (!besilent)
3131 mdb_warn("couldn't read %p", addr);
3132 return (WALK_NEXT);
3136 * There are two cases to handle:
3137 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3138 * 0xfeedfacefeedface at the end of it
3139 * 2. If the buf was alloc'd using kmem_alloc, it will have
3140 * 0xbb just past the end of the region in use. At the buftag,
3141 * it will have 0xfeedface (or, if the whole buffer is in use,
3142 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3143 * endianness), followed by 32 bits containing the offset of the
3144 * 0xbb byte in the buffer.
3146 * Finally, the two 32-bit words that comprise the second half of the
3147 * buftag should xor to KMEM_BUFTAG_ALLOC
3150 if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3151 looks_ok = 1;
3152 else if (!KMEM_SIZE_VALID(ip[1]))
3153 size_ok = 0;
3154 else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3155 looks_ok = 1;
3156 else
3157 size_ok = 0;
3159 if (!size_ok) {
3160 if (!besilent)
3161 mdb_printf("buffer %p (allocated) has a corrupt "
3162 "redzone size encoding\n", addr);
3163 goto corrupt;
3166 if (!looks_ok) {
3167 if (!besilent)
3168 mdb_printf("buffer %p (allocated) has a corrupt "
3169 "redzone signature\n", addr);
3170 goto corrupt;
3173 if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3174 if (!besilent)
3175 mdb_printf("buffer %p (allocated) has a "
3176 "corrupt buftag\n", addr);
3177 goto corrupt;
3180 return (WALK_NEXT);
3181 corrupt:
3182 kmv->kmv_corruption++;
3183 return (WALK_NEXT);
3186 /*ARGSUSED2*/
3188 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3190 if (flags & DCMD_ADDRSPEC) {
3191 int check_alloc = 0, check_free = 0;
3192 kmem_verify_t kmv;
3194 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3195 addr) == -1) {
3196 mdb_warn("couldn't read kmem_cache %p", addr);
3197 return (DCMD_ERR);
3200 kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3201 sizeof (kmem_buftag_t);
3202 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3203 kmv.kmv_corruption = 0;
3205 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3206 check_alloc = 1;
3207 if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3208 check_free = 1;
3209 } else {
3210 if (!(flags & DCMD_LOOP)) {
3211 mdb_warn("cache %p (%s) does not have "
3212 "redzone checking enabled\n", addr,
3213 kmv.kmv_cache.cache_name);
3215 return (DCMD_ERR);
3218 if (flags & DCMD_LOOP) {
3220 * table mode, don't print out every corrupt buffer
3222 kmv.kmv_besilent = 1;
3223 } else {
3224 mdb_printf("Summary for cache '%s'\n",
3225 kmv.kmv_cache.cache_name);
3226 mdb_inc_indent(2);
3227 kmv.kmv_besilent = 0;
3230 if (check_alloc)
3231 (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3232 if (check_free)
3233 (void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3235 if (flags & DCMD_LOOP) {
3236 if (kmv.kmv_corruption == 0) {
3237 mdb_printf("%-*s %?p clean\n",
3238 KMEM_CACHE_NAMELEN,
3239 kmv.kmv_cache.cache_name, addr);
3240 } else {
3241 char *s = ""; /* optional s in "buffer[s]" */
3242 if (kmv.kmv_corruption > 1)
3243 s = "s";
3245 mdb_printf("%-*s %?p %d corrupt buffer%s\n",
3246 KMEM_CACHE_NAMELEN,
3247 kmv.kmv_cache.cache_name, addr,
3248 kmv.kmv_corruption, s);
3250 } else {
3252 * This is the more verbose mode, when the user has
3253 * type addr::kmem_verify. If the cache was clean,
3254 * nothing will have yet been printed. So say something.
3256 if (kmv.kmv_corruption == 0)
3257 mdb_printf("clean\n");
3259 mdb_dec_indent(2);
3261 } else {
3263 * If the user didn't specify a cache to verify, we'll walk all
3264 * kmem_cache's, specifying ourself as a callback for each...
3265 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3267 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN,
3268 "Cache Name", "Addr", "Cache Integrity");
3269 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3272 return (DCMD_OK);
3275 typedef struct vmem_node {
3276 struct vmem_node *vn_next;
3277 struct vmem_node *vn_parent;
3278 struct vmem_node *vn_sibling;
3279 struct vmem_node *vn_children;
3280 uintptr_t vn_addr;
3281 int vn_marked;
3282 vmem_t vn_vmem;
3283 } vmem_node_t;
3285 typedef struct vmem_walk {
3286 vmem_node_t *vw_root;
3287 vmem_node_t *vw_current;
3288 } vmem_walk_t;
3291 vmem_walk_init(mdb_walk_state_t *wsp)
3293 uintptr_t vaddr, paddr;
3294 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3295 vmem_walk_t *vw;
3297 if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3298 mdb_warn("couldn't read 'vmem_list'");
3299 return (WALK_ERR);
3302 while (vaddr != NULL) {
3303 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3304 vp->vn_addr = vaddr;
3305 vp->vn_next = head;
3306 head = vp;
3308 if (vaddr == wsp->walk_addr)
3309 current = vp;
3311 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3312 mdb_warn("couldn't read vmem_t at %p", vaddr);
3313 goto err;
3316 vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3319 for (vp = head; vp != NULL; vp = vp->vn_next) {
3321 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
3322 vp->vn_sibling = root;
3323 root = vp;
3324 continue;
3327 for (parent = head; parent != NULL; parent = parent->vn_next) {
3328 if (parent->vn_addr != paddr)
3329 continue;
3330 vp->vn_sibling = parent->vn_children;
3331 parent->vn_children = vp;
3332 vp->vn_parent = parent;
3333 break;
3336 if (parent == NULL) {
3337 mdb_warn("couldn't find %p's parent (%p)\n",
3338 vp->vn_addr, paddr);
3339 goto err;
3343 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3344 vw->vw_root = root;
3346 if (current != NULL)
3347 vw->vw_current = current;
3348 else
3349 vw->vw_current = root;
3351 wsp->walk_data = vw;
3352 return (WALK_NEXT);
3353 err:
3354 for (vp = head; head != NULL; vp = head) {
3355 head = vp->vn_next;
3356 mdb_free(vp, sizeof (vmem_node_t));
3359 return (WALK_ERR);
3363 vmem_walk_step(mdb_walk_state_t *wsp)
3365 vmem_walk_t *vw = wsp->walk_data;
3366 vmem_node_t *vp;
3367 int rval;
3369 if ((vp = vw->vw_current) == NULL)
3370 return (WALK_DONE);
3372 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3374 if (vp->vn_children != NULL) {
3375 vw->vw_current = vp->vn_children;
3376 return (rval);
3379 do {
3380 vw->vw_current = vp->vn_sibling;
3381 vp = vp->vn_parent;
3382 } while (vw->vw_current == NULL && vp != NULL);
3384 return (rval);
3388 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3389 * children are visited before their parent. We perform the postfix walk
3390 * iteratively (rather than recursively) to allow mdb to regain control
3391 * after each callback.
3394 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3396 vmem_walk_t *vw = wsp->walk_data;
3397 vmem_node_t *vp = vw->vw_current;
3398 int rval;
3401 * If this node is marked, then we know that we have already visited
3402 * all of its children. If the node has any siblings, they need to
3403 * be visited next; otherwise, we need to visit the parent. Note
3404 * that vp->vn_marked will only be zero on the first invocation of
3405 * the step function.
3407 if (vp->vn_marked) {
3408 if (vp->vn_sibling != NULL)
3409 vp = vp->vn_sibling;
3410 else if (vp->vn_parent != NULL)
3411 vp = vp->vn_parent;
3412 else {
3414 * We have neither a parent, nor a sibling, and we
3415 * have already been visited; we're done.
3417 return (WALK_DONE);
3422 * Before we visit this node, visit its children.
3424 while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3425 vp = vp->vn_children;
3427 vp->vn_marked = 1;
3428 vw->vw_current = vp;
3429 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3431 return (rval);
3434 void
3435 vmem_walk_fini(mdb_walk_state_t *wsp)
3437 vmem_walk_t *vw = wsp->walk_data;
3438 vmem_node_t *root = vw->vw_root;
3439 int done;
3441 if (root == NULL)
3442 return;
3444 if ((vw->vw_root = root->vn_children) != NULL)
3445 vmem_walk_fini(wsp);
3447 vw->vw_root = root->vn_sibling;
3448 done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3449 mdb_free(root, sizeof (vmem_node_t));
3451 if (done) {
3452 mdb_free(vw, sizeof (vmem_walk_t));
3453 } else {
3454 vmem_walk_fini(wsp);
3458 typedef struct vmem_seg_walk {
3459 uint8_t vsw_type;
3460 uintptr_t vsw_start;
3461 uintptr_t vsw_current;
3462 } vmem_seg_walk_t;
3464 /*ARGSUSED*/
3466 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3468 vmem_seg_walk_t *vsw;
3470 if (wsp->walk_addr == NULL) {
3471 mdb_warn("vmem_%s does not support global walks\n", name);
3472 return (WALK_ERR);
3475 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3477 vsw->vsw_type = type;
3478 vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3479 vsw->vsw_current = vsw->vsw_start;
3481 return (WALK_NEXT);
3485 * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3487 #define VMEM_NONE 0
3490 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3492 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3496 vmem_free_walk_init(mdb_walk_state_t *wsp)
3498 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3502 vmem_span_walk_init(mdb_walk_state_t *wsp)
3504 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3508 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3510 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3514 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3516 vmem_seg_t seg;
3517 vmem_seg_walk_t *vsw = wsp->walk_data;
3518 uintptr_t addr = vsw->vsw_current;
3519 static size_t seg_size = 0;
3520 int rval;
3522 if (!seg_size) {
3523 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3524 mdb_warn("failed to read 'vmem_seg_size'");
3525 seg_size = sizeof (vmem_seg_t);
3529 if (seg_size < sizeof (seg))
3530 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3532 if (mdb_vread(&seg, seg_size, addr) == -1) {
3533 mdb_warn("couldn't read vmem_seg at %p", addr);
3534 return (WALK_ERR);
3537 vsw->vsw_current = (uintptr_t)seg.vs_anext;
3538 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3539 rval = WALK_NEXT;
3540 } else {
3541 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3544 if (vsw->vsw_current == vsw->vsw_start)
3545 return (WALK_DONE);
3547 return (rval);
3550 void
3551 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3553 vmem_seg_walk_t *vsw = wsp->walk_data;
3555 mdb_free(vsw, sizeof (vmem_seg_walk_t));
3558 #define VMEM_NAMEWIDTH 22
3561 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3563 vmem_t v, parent;
3564 vmem_kstat_t *vkp = &v.vm_kstat;
3565 uintptr_t paddr;
3566 int ident = 0;
3567 char c[VMEM_NAMEWIDTH];
3569 if (!(flags & DCMD_ADDRSPEC)) {
3570 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3571 mdb_warn("can't walk vmem");
3572 return (DCMD_ERR);
3574 return (DCMD_OK);
3577 if (DCMD_HDRSPEC(flags))
3578 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3579 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3580 "TOTAL", "SUCCEED", "FAIL");
3582 if (mdb_vread(&v, sizeof (v), addr) == -1) {
3583 mdb_warn("couldn't read vmem at %p", addr);
3584 return (DCMD_ERR);
3587 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3588 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3589 mdb_warn("couldn't trace %p's ancestry", addr);
3590 ident = 0;
3591 break;
3593 paddr = (uintptr_t)parent.vm_source;
3596 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3598 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3599 addr, VMEM_NAMEWIDTH, c,
3600 vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3601 vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3603 return (DCMD_OK);
3606 void
3607 vmem_seg_help(void)
3609 mdb_printf("%s",
3610 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3611 "\n"
3612 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3613 "representing a single chunk of data. Only ALLOC segments have debugging\n"
3614 "information.\n");
3615 mdb_dec_indent(2);
3616 mdb_printf("%<b>OPTIONS%</b>\n");
3617 mdb_inc_indent(2);
3618 mdb_printf("%s",
3619 " -v Display the full content of the vmem_seg, including its stack trace\n"
3620 " -s report the size of the segment, instead of the end address\n"
3621 " -c caller\n"
3622 " filter out segments without the function/PC in their stack trace\n"
3623 " -e earliest\n"
3624 " filter out segments timestamped before earliest\n"
3625 " -l latest\n"
3626 " filter out segments timestamped after latest\n"
3627 " -m minsize\n"
3628 " filer out segments smaller than minsize\n"
3629 " -M maxsize\n"
3630 " filer out segments larger than maxsize\n"
3631 " -t thread\n"
3632 " filter out segments not involving thread\n"
3633 " -T type\n"
3634 " filter out segments not of type 'type'\n"
3635 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3638 /*ARGSUSED*/
3640 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3642 vmem_seg_t vs;
3643 pc_t *stk = vs.vs_stack;
3644 uintptr_t sz;
3645 uint8_t t;
3646 const char *type = NULL;
3647 GElf_Sym sym;
3648 char c[MDB_SYM_NAMLEN];
3649 int no_debug;
3650 int i;
3651 int depth;
3652 uintptr_t laddr, haddr;
3654 uintptr_t caller = NULL, thread = NULL;
3655 uintptr_t minsize = 0, maxsize = 0;
3657 hrtime_t earliest = 0, latest = 0;
3659 uint_t size = 0;
3660 uint_t verbose = 0;
3662 if (!(flags & DCMD_ADDRSPEC))
3663 return (DCMD_USAGE);
3665 if (mdb_getopts(argc, argv,
3666 'c', MDB_OPT_UINTPTR, &caller,
3667 'e', MDB_OPT_UINT64, &earliest,
3668 'l', MDB_OPT_UINT64, &latest,
3669 's', MDB_OPT_SETBITS, TRUE, &size,
3670 'm', MDB_OPT_UINTPTR, &minsize,
3671 'M', MDB_OPT_UINTPTR, &maxsize,
3672 't', MDB_OPT_UINTPTR, &thread,
3673 'T', MDB_OPT_STR, &type,
3674 'v', MDB_OPT_SETBITS, TRUE, &verbose,
3675 NULL) != argc)
3676 return (DCMD_USAGE);
3678 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3679 if (verbose) {
3680 mdb_printf("%16s %4s %16s %16s %16s\n"
3681 "%<u>%16s %4s %16s %16s %16s%</u>\n",
3682 "ADDR", "TYPE", "START", "END", "SIZE",
3683 "", "", "THREAD", "TIMESTAMP", "");
3684 } else {
3685 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3686 "START", size? "SIZE" : "END", "WHO");
3690 if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3691 mdb_warn("couldn't read vmem_seg at %p", addr);
3692 return (DCMD_ERR);
3695 if (type != NULL) {
3696 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3697 t = VMEM_ALLOC;
3698 else if (strcmp(type, "FREE") == 0)
3699 t = VMEM_FREE;
3700 else if (strcmp(type, "SPAN") == 0)
3701 t = VMEM_SPAN;
3702 else if (strcmp(type, "ROTR") == 0 ||
3703 strcmp(type, "ROTOR") == 0)
3704 t = VMEM_ROTOR;
3705 else if (strcmp(type, "WLKR") == 0 ||
3706 strcmp(type, "WALKER") == 0)
3707 t = VMEM_WALKER;
3708 else {
3709 mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3710 type);
3711 return (DCMD_ERR);
3714 if (vs.vs_type != t)
3715 return (DCMD_OK);
3718 sz = vs.vs_end - vs.vs_start;
3720 if (minsize != 0 && sz < minsize)
3721 return (DCMD_OK);
3723 if (maxsize != 0 && sz > maxsize)
3724 return (DCMD_OK);
3726 t = vs.vs_type;
3727 depth = vs.vs_depth;
3730 * debug info, when present, is only accurate for VMEM_ALLOC segments
3732 no_debug = (t != VMEM_ALLOC) ||
3733 (depth == 0 || depth > VMEM_STACK_DEPTH);
3735 if (no_debug) {
3736 if (caller != NULL || thread != NULL || earliest != 0 ||
3737 latest != 0)
3738 return (DCMD_OK); /* not enough info */
3739 } else {
3740 if (caller != NULL) {
3741 laddr = caller;
3742 haddr = caller + sizeof (caller);
3744 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3745 sizeof (c), &sym) != -1 &&
3746 caller == (uintptr_t)sym.st_value) {
3748 * We were provided an exact symbol value; any
3749 * address in the function is valid.
3751 laddr = (uintptr_t)sym.st_value;
3752 haddr = (uintptr_t)sym.st_value + sym.st_size;
3755 for (i = 0; i < depth; i++)
3756 if (vs.vs_stack[i] >= laddr &&
3757 vs.vs_stack[i] < haddr)
3758 break;
3760 if (i == depth)
3761 return (DCMD_OK);
3764 if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3765 return (DCMD_OK);
3767 if (earliest != 0 && vs.vs_timestamp < earliest)
3768 return (DCMD_OK);
3770 if (latest != 0 && vs.vs_timestamp > latest)
3771 return (DCMD_OK);
3774 type = (t == VMEM_ALLOC ? "ALLC" :
3775 t == VMEM_FREE ? "FREE" :
3776 t == VMEM_SPAN ? "SPAN" :
3777 t == VMEM_ROTOR ? "ROTR" :
3778 t == VMEM_WALKER ? "WLKR" :
3779 "????");
3781 if (flags & DCMD_PIPE_OUT) {
3782 mdb_printf("%#lr\n", addr);
3783 return (DCMD_OK);
3786 if (verbose) {
3787 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3788 addr, type, vs.vs_start, vs.vs_end, sz);
3790 if (no_debug)
3791 return (DCMD_OK);
3793 mdb_printf("%16s %4s %16p %16llx\n",
3794 "", "", vs.vs_thread, vs.vs_timestamp);
3796 mdb_inc_indent(17);
3797 for (i = 0; i < depth; i++) {
3798 mdb_printf("%a\n", stk[i]);
3800 mdb_dec_indent(17);
3801 mdb_printf("\n");
3802 } else {
3803 mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3804 vs.vs_start, size? sz : vs.vs_end);
3806 if (no_debug) {
3807 mdb_printf("\n");
3808 return (DCMD_OK);
3811 for (i = 0; i < depth; i++) {
3812 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3813 c, sizeof (c), &sym) == -1)
3814 continue;
3815 if (strncmp(c, "vmem_", 5) == 0)
3816 continue;
3817 break;
3819 mdb_printf(" %a\n", stk[i]);
3821 return (DCMD_OK);
3824 typedef struct kmalog_data {
3825 uintptr_t kma_addr;
3826 hrtime_t kma_newest;
3827 } kmalog_data_t;
3829 /*ARGSUSED*/
3830 static int
3831 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3833 char name[KMEM_CACHE_NAMELEN + 1];
3834 hrtime_t delta;
3835 int i, depth;
3836 size_t bufsize;
3838 if (bcp->bc_timestamp == 0)
3839 return (WALK_DONE);
3841 if (kma->kma_newest == 0)
3842 kma->kma_newest = bcp->bc_timestamp;
3844 if (kma->kma_addr) {
3845 if (mdb_vread(&bufsize, sizeof (bufsize),
3846 (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3847 mdb_warn(
3848 "failed to read cache_bufsize for cache at %p",
3849 bcp->bc_cache);
3850 return (WALK_ERR);
3853 if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3854 kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3855 return (WALK_NEXT);
3858 delta = kma->kma_newest - bcp->bc_timestamp;
3859 depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3861 if (mdb_readstr(name, sizeof (name), (uintptr_t)
3862 &bcp->bc_cache->cache_name) <= 0)
3863 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3865 mdb_printf("\nT-%lld.%09lld addr=%p %s\n",
3866 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3868 for (i = 0; i < depth; i++)
3869 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3871 return (WALK_NEXT);
3875 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3877 const char *logname = "kmem_transaction_log";
3878 kmalog_data_t kma;
3880 if (argc > 1)
3881 return (DCMD_USAGE);
3883 kma.kma_newest = 0;
3884 if (flags & DCMD_ADDRSPEC)
3885 kma.kma_addr = addr;
3886 else
3887 kma.kma_addr = NULL;
3889 if (argc > 0) {
3890 if (argv->a_type != MDB_TYPE_STRING)
3891 return (DCMD_USAGE);
3892 if (strcmp(argv->a_un.a_str, "fail") == 0)
3893 logname = "kmem_failure_log";
3894 else if (strcmp(argv->a_un.a_str, "slab") == 0)
3895 logname = "kmem_slab_log";
3896 else
3897 return (DCMD_USAGE);
3900 if (mdb_readvar(&addr, logname) == -1) {
3901 mdb_warn("failed to read %s log header pointer");
3902 return (DCMD_ERR);
3905 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3906 mdb_warn("failed to walk kmem log");
3907 return (DCMD_ERR);
3910 return (DCMD_OK);
3914 * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3915 * The first piece is a structure which we use to accumulate kmem_cache_t
3916 * addresses of interest. The kmc_add is used as a callback for the kmem_cache
3917 * walker; we either add all caches, or ones named explicitly as arguments.
3920 typedef struct kmclist {
3921 const char *kmc_name; /* Name to match (or NULL) */
3922 uintptr_t *kmc_caches; /* List of kmem_cache_t addrs */
3923 int kmc_nelems; /* Num entries in kmc_caches */
3924 int kmc_size; /* Size of kmc_caches array */
3925 } kmclist_t;
3927 static int
3928 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3930 void *p;
3931 int s;
3933 if (kmc->kmc_name == NULL ||
3934 strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3936 * If we have a match, grow our array (if necessary), and then
3937 * add the virtual address of the matching cache to our list.
3939 if (kmc->kmc_nelems >= kmc->kmc_size) {
3940 s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3941 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3943 bcopy(kmc->kmc_caches, p,
3944 sizeof (uintptr_t) * kmc->kmc_size);
3946 kmc->kmc_caches = p;
3947 kmc->kmc_size = s;
3950 kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3951 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3954 return (WALK_NEXT);
3958 * The second piece of ::kmausers is a hash table of allocations. Each
3959 * allocation owner is identified by its stack trace and data_size. We then
3960 * track the total bytes of all such allocations, and the number of allocations
3961 * to report at the end. Once we have a list of caches, we walk through the
3962 * allocated bufctls of each, and update our hash table accordingly.
3965 typedef struct kmowner {
3966 struct kmowner *kmo_head; /* First hash elt in bucket */
3967 struct kmowner *kmo_next; /* Next hash elt in chain */
3968 size_t kmo_signature; /* Hash table signature */
3969 uint_t kmo_num; /* Number of allocations */
3970 size_t kmo_data_size; /* Size of each allocation */
3971 size_t kmo_total_size; /* Total bytes of allocation */
3972 int kmo_depth; /* Depth of stack trace */
3973 uintptr_t kmo_stack[KMEM_STACK_DEPTH]; /* Stack trace */
3974 } kmowner_t;
3976 typedef struct kmusers {
3977 uintptr_t kmu_addr; /* address of interest */
3978 const kmem_cache_t *kmu_cache; /* Current kmem cache */
3979 kmowner_t *kmu_hash; /* Hash table of owners */
3980 int kmu_nelems; /* Number of entries in use */
3981 int kmu_size; /* Total number of entries */
3982 } kmusers_t;
3984 static void
3985 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
3986 size_t size, size_t data_size)
3988 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3989 size_t bucket, signature = data_size;
3990 kmowner_t *kmo, *kmoend;
3993 * If the hash table is full, double its size and rehash everything.
3995 if (kmu->kmu_nelems >= kmu->kmu_size) {
3996 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
3998 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
3999 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
4000 kmu->kmu_hash = kmo;
4001 kmu->kmu_size = s;
4003 kmoend = kmu->kmu_hash + kmu->kmu_size;
4004 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
4005 kmo->kmo_head = NULL;
4007 kmoend = kmu->kmu_hash + kmu->kmu_nelems;
4008 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
4009 bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
4010 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4011 kmu->kmu_hash[bucket].kmo_head = kmo;
4016 * Finish computing the hash signature from the stack trace, and then
4017 * see if the owner is in the hash table. If so, update our stats.
4019 for (i = 0; i < depth; i++)
4020 signature += bcp->bc_stack[i];
4022 bucket = signature & (kmu->kmu_size - 1);
4024 for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
4025 if (kmo->kmo_signature == signature) {
4026 size_t difference = 0;
4028 difference |= kmo->kmo_data_size - data_size;
4029 difference |= kmo->kmo_depth - depth;
4031 for (i = 0; i < depth; i++) {
4032 difference |= kmo->kmo_stack[i] -
4033 bcp->bc_stack[i];
4036 if (difference == 0) {
4037 kmo->kmo_total_size += size;
4038 kmo->kmo_num++;
4039 return;
4045 * If the owner is not yet hashed, grab the next element and fill it
4046 * in based on the allocation information.
4048 kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
4049 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4050 kmu->kmu_hash[bucket].kmo_head = kmo;
4052 kmo->kmo_signature = signature;
4053 kmo->kmo_num = 1;
4054 kmo->kmo_data_size = data_size;
4055 kmo->kmo_total_size = size;
4056 kmo->kmo_depth = depth;
4058 for (i = 0; i < depth; i++)
4059 kmo->kmo_stack[i] = bcp->bc_stack[i];
4063 * When ::kmausers is invoked without the -f flag, we simply update our hash
4064 * table with the information from each allocated bufctl.
4066 /*ARGSUSED*/
4067 static int
4068 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4070 const kmem_cache_t *cp = kmu->kmu_cache;
4072 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4073 return (WALK_NEXT);
4077 * When ::kmausers is invoked with the -f flag, we print out the information
4078 * for each bufctl as well as updating the hash table.
4080 static int
4081 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4083 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4084 const kmem_cache_t *cp = kmu->kmu_cache;
4085 kmem_bufctl_t bufctl;
4087 if (kmu->kmu_addr) {
4088 if (mdb_vread(&bufctl, sizeof (bufctl), addr) == -1)
4089 mdb_warn("couldn't read bufctl at %p", addr);
4090 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
4091 kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
4092 cp->cache_bufsize)
4093 return (WALK_NEXT);
4096 mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4097 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4099 for (i = 0; i < depth; i++)
4100 mdb_printf("\t %a\n", bcp->bc_stack[i]);
4102 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4103 return (WALK_NEXT);
4107 * We sort our results by allocation size before printing them.
4109 static int
4110 kmownercmp(const void *lp, const void *rp)
4112 const kmowner_t *lhs = lp;
4113 const kmowner_t *rhs = rp;
4115 return (rhs->kmo_total_size - lhs->kmo_total_size);
4119 * The main engine of ::kmausers is relatively straightforward: First we
4120 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4121 * iterate over the allocated bufctls of each cache in the list. Finally,
4122 * we sort and print our results.
4124 /*ARGSUSED*/
4126 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4128 int mem_threshold = 8192; /* Minimum # bytes for printing */
4129 int cnt_threshold = 100; /* Minimum # blocks for printing */
4130 int audited_caches = 0; /* Number of KMF_AUDIT caches found */
4131 int do_all_caches = 1; /* Do all caches (no arguments) */
4132 int opt_e = FALSE; /* Include "small" users */
4133 int opt_f = FALSE; /* Print stack traces */
4135 mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4136 kmowner_t *kmo, *kmoend;
4137 int i, oelems;
4139 kmclist_t kmc;
4140 kmusers_t kmu;
4142 bzero(&kmc, sizeof (kmc));
4143 bzero(&kmu, sizeof (kmu));
4145 while ((i = mdb_getopts(argc, argv,
4146 'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4147 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4149 argv += i; /* skip past options we just processed */
4150 argc -= i; /* adjust argc */
4152 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4153 return (DCMD_USAGE);
4155 oelems = kmc.kmc_nelems;
4156 kmc.kmc_name = argv->a_un.a_str;
4157 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4159 if (kmc.kmc_nelems == oelems) {
4160 mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4161 return (DCMD_ERR);
4164 do_all_caches = 0;
4165 argv++;
4166 argc--;
4169 if (flags & DCMD_ADDRSPEC) {
4170 opt_f = TRUE;
4171 kmu.kmu_addr = addr;
4172 } else {
4173 kmu.kmu_addr = NULL;
4176 if (opt_e)
4177 mem_threshold = cnt_threshold = 0;
4179 if (opt_f)
4180 callback = (mdb_walk_cb_t)kmause2;
4182 if (do_all_caches) {
4183 kmc.kmc_name = NULL; /* match all cache names */
4184 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4187 for (i = 0; i < kmc.kmc_nelems; i++) {
4188 uintptr_t cp = kmc.kmc_caches[i];
4189 kmem_cache_t c;
4191 if (mdb_vread(&c, sizeof (c), cp) == -1) {
4192 mdb_warn("failed to read cache at %p", cp);
4193 continue;
4196 if (!(c.cache_flags & KMF_AUDIT)) {
4197 if (!do_all_caches) {
4198 mdb_warn("KMF_AUDIT is not enabled for %s\n",
4199 c.cache_name);
4201 continue;
4204 kmu.kmu_cache = &c;
4205 (void) mdb_pwalk("bufctl", callback, &kmu, cp);
4206 audited_caches++;
4209 if (audited_caches == 0 && do_all_caches) {
4210 mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4211 return (DCMD_ERR);
4214 qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4215 kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4217 for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4218 if (kmo->kmo_total_size < mem_threshold &&
4219 kmo->kmo_num < cnt_threshold)
4220 continue;
4221 mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4222 kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4223 for (i = 0; i < kmo->kmo_depth; i++)
4224 mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4227 return (DCMD_OK);
4230 void
4231 kmausers_help(void)
4233 mdb_printf(
4234 "Displays the largest users of the kmem allocator, sorted by \n"
4235 "trace. If one or more caches is specified, only those caches\n"
4236 "will be searched. By default, all caches are searched. If an\n"
4237 "address is specified, then only those allocations which include\n"
4238 "the given address are displayed. Specifying an address implies\n"
4239 "-f.\n"
4240 "\n"
4241 "\t-e\tInclude all users, not just the largest\n"
4242 "\t-f\tDisplay individual allocations. By default, users are\n"
4243 "\t\tgrouped by stack\n");
4246 static int
4247 kmem_ready_check(void)
4249 int ready;
4251 if (mdb_readvar(&ready, "kmem_ready") < 0)
4252 return (-1); /* errno is set for us */
4254 return (ready);
4257 void
4258 kmem_statechange(void)
4260 static int been_ready = 0;
4262 if (been_ready)
4263 return;
4265 if (kmem_ready_check() <= 0)
4266 return;
4268 been_ready = 1;
4269 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4272 void
4273 kmem_init(void)
4275 mdb_walker_t w = {
4276 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4277 list_walk_step, list_walk_fini
4281 * If kmem is ready, we'll need to invoke the kmem_cache walker
4282 * immediately. Walkers in the linkage structure won't be ready until
4283 * _mdb_init returns, so we'll need to add this one manually. If kmem
4284 * is ready, we'll use the walker to initialize the caches. If kmem
4285 * isn't ready, we'll register a callback that will allow us to defer
4286 * cache walking until it is.
4288 if (mdb_add_walker(&w) != 0) {
4289 mdb_warn("failed to add kmem_cache walker");
4290 return;
4293 kmem_statechange();
4295 /* register our ::whatis handlers */
4296 mdb_whatis_register("modules", whatis_run_modules, NULL,
4297 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4298 mdb_whatis_register("threads", whatis_run_threads, NULL,
4299 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4300 mdb_whatis_register("pages", whatis_run_pages, NULL,
4301 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4302 mdb_whatis_register("kmem", whatis_run_kmem, NULL,
4303 WHATIS_PRIO_ALLOCATOR, 0);
4304 mdb_whatis_register("vmem", whatis_run_vmem, NULL,
4305 WHATIS_PRIO_ALLOCATOR, 0);
4308 typedef struct whatthread {
4309 uintptr_t wt_target;
4310 int wt_verbose;
4311 } whatthread_t;
4313 static int
4314 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4316 uintptr_t current, data;
4318 if (t->t_stkbase == NULL)
4319 return (WALK_NEXT);
4322 * Warn about swapped out threads, but drive on anyway
4324 if (!(t->t_schedflag & TS_LOAD)) {
4325 mdb_warn("thread %p's stack swapped out\n", addr);
4326 return (WALK_NEXT);
4330 * Search the thread's stack for the given pointer. Note that it would
4331 * be more efficient to follow ::kgrep's lead and read in page-sized
4332 * chunks, but this routine is already fast and simple.
4334 for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4335 current += sizeof (uintptr_t)) {
4336 if (mdb_vread(&data, sizeof (data), current) == -1) {
4337 mdb_warn("couldn't read thread %p's stack at %p",
4338 addr, current);
4339 return (WALK_ERR);
4342 if (data == w->wt_target) {
4343 if (w->wt_verbose) {
4344 mdb_printf("%p in thread %p's stack%s\n",
4345 current, addr, stack_active(t, current));
4346 } else {
4347 mdb_printf("%#lr\n", addr);
4348 return (WALK_NEXT);
4353 return (WALK_NEXT);
4357 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4359 whatthread_t w;
4361 if (!(flags & DCMD_ADDRSPEC))
4362 return (DCMD_USAGE);
4364 w.wt_verbose = FALSE;
4365 w.wt_target = addr;
4367 if (mdb_getopts(argc, argv,
4368 'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4369 return (DCMD_USAGE);
4371 if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4372 == -1) {
4373 mdb_warn("couldn't walk threads");
4374 return (DCMD_ERR);
4377 return (DCMD_OK);