2 * Copyright (c) 1987, 1991, 1993
3 * The Regents of the University of California.
4 * Copyright (c) 2005-2009 Robert N. M. Watson
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 4. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94
35 * Kernel malloc(9) implementation -- general purpose kernel memory allocator
36 * based on memory types. Back end is implemented using the UMA(9) zone
37 * allocator. A set of fixed-size buckets are used for smaller allocations,
38 * and a special UMA allocation interface is used for larger allocations.
39 * Callers declare memory types, and statistics are maintained independently
40 * for each memory type. Statistics are maintained per-CPU for performance
41 * reasons. See malloc(9) and comments in malloc.h for a detailed
45 #include <sys/cdefs.h>
46 __FBSDID("$FreeBSD$");
51 #include <sys/param.h>
52 #include <sys/systm.h>
54 #include <sys/kernel.h>
56 #include <sys/malloc.h>
57 #include <sys/mutex.h>
58 #include <sys/vmmeter.h>
61 #include <sys/sysctl.h>
67 #include <vm/vm_pageout.h>
68 #include <vm/vm_param.h>
69 #include <vm/vm_kern.h>
70 #include <vm/vm_extern.h>
71 #include <vm/vm_map.h>
72 #include <vm/vm_page.h>
74 #include <vm/uma_int.h>
75 #include <vm/uma_dbg.h>
78 #include <vm/memguard.h>
81 #include <vm/redzone.h>
84 #if defined(INVARIANTS) && defined(__i386__)
85 #include <machine/cpu.h>
91 #include <sys/dtrace_bsd.h>
93 dtrace_malloc_probe_func_t dtrace_malloc_probe
;
97 * When realloc() is called, if the new size is sufficiently smaller than
98 * the old size, realloc() will allocate a new, smaller block to avoid
99 * wasting memory. 'Sufficiently smaller' is defined as: newsize <=
100 * oldsize / 2^n, where REALLOC_FRACTION defines the value of 'n'.
102 #ifndef REALLOC_FRACTION
103 #define REALLOC_FRACTION 1 /* new block if <= half the size */
107 * Centrally define some common malloc types.
109 MALLOC_DEFINE(M_CACHE
, "cache", "Various Dynamically allocated caches");
110 MALLOC_DEFINE(M_DEVBUF
, "devbuf", "device driver memory");
111 MALLOC_DEFINE(M_TEMP
, "temp", "misc temporary data buffers");
113 MALLOC_DEFINE(M_IP6OPT
, "ip6opt", "IPv6 options");
114 MALLOC_DEFINE(M_IP6NDP
, "ip6ndp", "IPv6 Neighbor Discovery");
116 static struct malloc_type
*kmemstatistics
;
117 static int kmemcount
;
119 #define KMEM_ZSHIFT 4
120 #define KMEM_ZBASE 16
121 #define KMEM_ZMASK (KMEM_ZBASE - 1)
123 #define KMEM_ZMAX 65536
124 #define KMEM_ZSIZE (KMEM_ZMAX >> KMEM_ZSHIFT)
125 static uint8_t kmemsize
[KMEM_ZSIZE
+ 1];
127 #ifndef MALLOC_DEBUG_MAXZONES
128 #define MALLOC_DEBUG_MAXZONES 1
130 static int numzones
= MALLOC_DEBUG_MAXZONES
;
133 * Small malloc(9) memory allocations are allocated from a set of UMA buckets
136 * XXX: The comment here used to read "These won't be powers of two for
137 * long." It's possible that a significant amount of wasted memory could be
138 * recovered by tuning the sizes of these buckets.
143 uma_zone_t kz_zone
[MALLOC_DEBUG_MAXZONES
];
162 * Zone to allocate malloc type descriptions from. For ABI reasons, memory
163 * types are described by a data structure passed by the declaring code, but
164 * the malloc(9) implementation has its own data structure describing the
165 * type and statistics. This permits the malloc(9)-internal data structures
166 * to be modified without breaking binary-compiled kernel modules that
167 * declare malloc types.
169 static uma_zone_t mt_zone
;
172 SYSCTL_ULONG(_vm
, OID_AUTO
, kmem_size
, CTLFLAG_RDTUN
, &vm_kmem_size
, 0,
173 "Size of kernel memory");
175 static u_long kmem_zmax
= KMEM_ZMAX
;
176 SYSCTL_ULONG(_vm
, OID_AUTO
, kmem_zmax
, CTLFLAG_RDTUN
, &kmem_zmax
, 0,
177 "Maximum allocation size that malloc(9) would use UMA as backend");
179 static u_long vm_kmem_size_min
;
180 SYSCTL_ULONG(_vm
, OID_AUTO
, kmem_size_min
, CTLFLAG_RDTUN
, &vm_kmem_size_min
, 0,
181 "Minimum size of kernel memory");
183 static u_long vm_kmem_size_max
;
184 SYSCTL_ULONG(_vm
, OID_AUTO
, kmem_size_max
, CTLFLAG_RDTUN
, &vm_kmem_size_max
, 0,
185 "Maximum size of kernel memory");
187 static u_int vm_kmem_size_scale
;
188 SYSCTL_UINT(_vm
, OID_AUTO
, kmem_size_scale
, CTLFLAG_RDTUN
, &vm_kmem_size_scale
, 0,
189 "Scale factor for kernel memory size");
191 static int sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS
);
192 SYSCTL_PROC(_vm
, OID_AUTO
, kmem_map_size
,
193 CTLFLAG_RD
| CTLTYPE_ULONG
| CTLFLAG_MPSAFE
, NULL
, 0,
194 sysctl_kmem_map_size
, "LU", "Current kmem allocation size");
196 static int sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS
);
197 SYSCTL_PROC(_vm
, OID_AUTO
, kmem_map_free
,
198 CTLFLAG_RD
| CTLTYPE_ULONG
| CTLFLAG_MPSAFE
, NULL
, 0,
199 sysctl_kmem_map_free
, "LU", "Free space in kmem");
202 * The malloc_mtx protects the kmemstatistics linked list.
204 struct mtx malloc_mtx
;
206 #ifdef MALLOC_PROFILE
207 uint64_t krequests
[KMEM_ZSIZE
+ 1];
209 static int sysctl_kern_mprof(SYSCTL_HANDLER_ARGS
);
212 static int sysctl_kern_malloc_stats(SYSCTL_HANDLER_ARGS
);
215 * time_uptime of the last malloc(9) failure (induced or real).
217 static time_t t_malloc_fail
;
219 #if defined(MALLOC_MAKE_FAILURES) || (MALLOC_DEBUG_MAXZONES > 1)
220 static SYSCTL_NODE(_debug
, OID_AUTO
, malloc
, CTLFLAG_RD
, 0,
221 "Kernel malloc debugging options");
225 * malloc(9) fault injection -- cause malloc failures every (n) mallocs when
226 * the caller specifies M_NOWAIT. If set to 0, no failures are caused.
228 #ifdef MALLOC_MAKE_FAILURES
229 static int malloc_failure_rate
;
230 static int malloc_nowait_count
;
231 static int malloc_failure_count
;
232 SYSCTL_INT(_debug_malloc
, OID_AUTO
, failure_rate
, CTLFLAG_RWTUN
,
233 &malloc_failure_rate
, 0, "Every (n) mallocs with M_NOWAIT will fail");
234 SYSCTL_INT(_debug_malloc
, OID_AUTO
, failure_count
, CTLFLAG_RD
,
235 &malloc_failure_count
, 0, "Number of imposed M_NOWAIT malloc failures");
239 sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS
)
243 size
= vmem_size(kmem_arena
, VMEM_ALLOC
);
244 return (sysctl_handle_long(oidp
, &size
, 0, req
));
248 sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS
)
252 size
= vmem_size(kmem_arena
, VMEM_FREE
);
253 return (sysctl_handle_long(oidp
, &size
, 0, req
));
257 * malloc(9) uma zone separation -- sub-page buffer overruns in one
258 * malloc type will affect only a subset of other malloc types.
260 #if MALLOC_DEBUG_MAXZONES > 1
262 tunable_set_numzones(void)
265 TUNABLE_INT_FETCH("debug.malloc.numzones",
268 /* Sanity check the number of malloc uma zones. */
271 if (numzones
> MALLOC_DEBUG_MAXZONES
)
272 numzones
= MALLOC_DEBUG_MAXZONES
;
274 SYSINIT(numzones
, SI_SUB_TUNABLES
, SI_ORDER_ANY
, tunable_set_numzones
, NULL
);
275 SYSCTL_INT(_debug_malloc
, OID_AUTO
, numzones
, CTLFLAG_RDTUN
| CTLFLAG_NOFETCH
,
276 &numzones
, 0, "Number of malloc uma subzones");
279 * Any number that changes regularly is an okay choice for the
280 * offset. Build numbers are pretty good of you have them.
282 static u_int zone_offset
= __FreeBSD_version
;
283 TUNABLE_INT("debug.malloc.zone_offset", &zone_offset
);
284 SYSCTL_UINT(_debug_malloc
, OID_AUTO
, zone_offset
, CTLFLAG_RDTUN
,
285 &zone_offset
, 0, "Separate malloc types by examining the "
286 "Nth character in the malloc type short description.");
289 mtp_get_subzone(const char *desc
)
294 if (desc
== NULL
|| (len
= strlen(desc
)) == 0)
296 val
= desc
[zone_offset
% len
];
297 return (val
% numzones
);
299 #elif MALLOC_DEBUG_MAXZONES == 0
300 #error "MALLOC_DEBUG_MAXZONES must be positive."
303 mtp_get_subzone(const char *desc
)
308 #endif /* MALLOC_DEBUG_MAXZONES > 1 */
311 malloc_last_fail(void)
314 return (time_uptime
- t_malloc_fail
);
318 * An allocation has succeeded -- update malloc type statistics for the
319 * amount of bucket size. Occurs within a critical section so that the
320 * thread isn't preempted and doesn't migrate while updating per-PCU
324 malloc_type_zone_allocated(struct malloc_type
*mtp
, unsigned long size
,
327 struct malloc_type_internal
*mtip
;
328 struct malloc_type_stats
*mtsp
;
331 mtip
= mtp
->ks_handle
;
332 mtsp
= &mtip
->mti_stats
[curcpu
];
334 mtsp
->mts_memalloced
+= size
;
335 mtsp
->mts_numallocs
++;
338 mtsp
->mts_size
|= 1 << zindx
;
341 if (dtrace_malloc_probe
!= NULL
) {
342 uint32_t probe_id
= mtip
->mti_probes
[DTMALLOC_PROBE_MALLOC
];
344 (dtrace_malloc_probe
)(probe_id
,
345 (uintptr_t) mtp
, (uintptr_t) mtip
,
346 (uintptr_t) mtsp
, size
, zindx
);
354 malloc_type_allocated(struct malloc_type
*mtp
, unsigned long size
)
358 malloc_type_zone_allocated(mtp
, size
, -1);
362 * A free operation has occurred -- update malloc type statistics for the
363 * amount of the bucket size. Occurs within a critical section so that the
364 * thread isn't preempted and doesn't migrate while updating per-CPU
368 malloc_type_freed(struct malloc_type
*mtp
, unsigned long size
)
370 struct malloc_type_internal
*mtip
;
371 struct malloc_type_stats
*mtsp
;
374 mtip
= mtp
->ks_handle
;
375 mtsp
= &mtip
->mti_stats
[curcpu
];
376 mtsp
->mts_memfreed
+= size
;
377 mtsp
->mts_numfrees
++;
380 if (dtrace_malloc_probe
!= NULL
) {
381 uint32_t probe_id
= mtip
->mti_probes
[DTMALLOC_PROBE_FREE
];
383 (dtrace_malloc_probe
)(probe_id
,
384 (uintptr_t) mtp
, (uintptr_t) mtip
,
385 (uintptr_t) mtsp
, size
, 0);
395 * Allocate a block of physically contiguous memory.
397 * If M_NOWAIT is set, this routine will not block and return NULL if
398 * the allocation fails.
401 contigmalloc(unsigned long size
, struct malloc_type
*type
, int flags
,
402 vm_paddr_t low
, vm_paddr_t high
, unsigned long alignment
,
407 ret
= (void *)kmem_alloc_contig(kernel_arena
, size
, flags
, low
, high
,
408 alignment
, boundary
, VM_MEMATTR_DEFAULT
);
410 malloc_type_allocated(type
, round_page(size
));
417 * Free a block of memory allocated by contigmalloc.
419 * This routine may not block.
422 contigfree(void *addr
, unsigned long size
, struct malloc_type
*type
)
425 kmem_free(kernel_arena
, (vm_offset_t
)addr
, size
);
426 malloc_type_freed(type
, round_page(size
));
432 * Allocate a block of memory.
434 * If M_NOWAIT is set, this routine will not block and return NULL if
435 * the allocation fails.
438 malloc(unsigned long size
, struct malloc_type
*mtp
, int flags
)
441 struct malloc_type_internal
*mtip
;
444 #if defined(DIAGNOSTIC) || defined(DEBUG_REDZONE)
445 unsigned long osize
= size
;
449 KASSERT(mtp
->ks_magic
== M_MAGIC
, ("malloc: bad malloc type magic"));
451 * Check that exactly one of M_WAITOK or M_NOWAIT is specified.
453 indx
= flags
& (M_WAITOK
| M_NOWAIT
);
454 if (indx
!= M_NOWAIT
&& indx
!= M_WAITOK
) {
455 static struct timeval lasterr
;
456 static int curerr
, once
;
457 if (once
== 0 && ppsratecheck(&lasterr
, &curerr
, 1)) {
458 printf("Bad malloc flags: %x\n", indx
);
465 #ifdef MALLOC_MAKE_FAILURES
466 if ((flags
& M_NOWAIT
) && (malloc_failure_rate
!= 0)) {
467 atomic_add_int(&malloc_nowait_count
, 1);
468 if ((malloc_nowait_count
% malloc_failure_rate
) == 0) {
469 atomic_add_int(&malloc_failure_count
, 1);
470 t_malloc_fail
= time_uptime
;
475 if (flags
& M_WAITOK
)
476 KASSERT(curthread
->td_intr_nesting_level
== 0,
477 ("malloc(M_WAITOK) in interrupt context"));
478 KASSERT(curthread
->td_critnest
== 0 || SCHEDULER_STOPPED(),
479 ("malloc: called with spinlock or critical section held"));
481 #ifdef DEBUG_MEMGUARD
482 if (memguard_cmp_mtp(mtp
, size
)) {
483 va
= memguard_alloc(size
, flags
);
486 /* This is unfortunate but should not be fatal. */
491 size
= redzone_size_ntor(size
);
494 if (size
<= kmem_zmax
) {
495 mtip
= mtp
->ks_handle
;
496 if (size
& KMEM_ZMASK
)
497 size
= (size
& ~KMEM_ZMASK
) + KMEM_ZBASE
;
498 indx
= kmemsize
[size
>> KMEM_ZSHIFT
];
499 KASSERT(mtip
->mti_zone
< numzones
,
500 ("mti_zone %u out of range %d",
501 mtip
->mti_zone
, numzones
));
502 zone
= kmemzones
[indx
].kz_zone
[mtip
->mti_zone
];
503 #ifdef MALLOC_PROFILE
504 krequests
[size
>> KMEM_ZSHIFT
]++;
506 va
= uma_zalloc(zone
, flags
);
508 size
= zone
->uz_size
;
509 malloc_type_zone_allocated(mtp
, va
== NULL
? 0 : size
, indx
);
511 size
= roundup(size
, PAGE_SIZE
);
513 va
= uma_large_malloc(size
, flags
);
514 malloc_type_allocated(mtp
, va
== NULL
? 0 : size
);
516 if (flags
& M_WAITOK
)
517 KASSERT(va
!= NULL
, ("malloc(M_WAITOK) returned NULL"));
519 t_malloc_fail
= time_uptime
;
521 if (va
!= NULL
&& !(flags
& M_ZERO
)) {
522 memset(va
, 0x70, osize
);
527 va
= redzone_setup(va
, osize
);
529 return ((void *) va
);
535 * Free a block of memory allocated by malloc.
537 * This routine may not block.
540 free(void *addr
, struct malloc_type
*mtp
)
545 KASSERT(mtp
->ks_magic
== M_MAGIC
, ("free: bad malloc type magic"));
546 KASSERT(curthread
->td_critnest
== 0 || SCHEDULER_STOPPED(),
547 ("free: called with spinlock or critical section held"));
549 /* free(NULL, ...) does nothing */
553 #ifdef DEBUG_MEMGUARD
554 if (is_memguard_addr(addr
)) {
562 addr
= redzone_addr_ntor(addr
);
565 slab
= vtoslab((vm_offset_t
)addr
& (~UMA_SLAB_MASK
));
568 panic("free: address %p(%p) has not been allocated.\n",
569 addr
, (void *)((u_long
)addr
& (~UMA_SLAB_MASK
)));
571 if (!(slab
->us_flags
& UMA_SLAB_MALLOC
)) {
573 struct malloc_type
**mtpp
= addr
;
575 size
= slab
->us_keg
->uk_size
;
578 * Cache a pointer to the malloc_type that most recently freed
579 * this memory here. This way we know who is most likely to
580 * have stepped on it later.
582 * This code assumes that size is a multiple of 8 bytes for
585 mtpp
= (struct malloc_type
**)
586 ((unsigned long)mtpp
& ~UMA_ALIGN_PTR
);
587 mtpp
+= (size
- sizeof(struct malloc_type
*)) /
588 sizeof(struct malloc_type
*);
591 uma_zfree_arg(LIST_FIRST(&slab
->us_keg
->uk_zones
), addr
, slab
);
593 size
= slab
->us_size
;
594 uma_large_free(slab
);
596 malloc_type_freed(mtp
, size
);
600 * realloc: change the size of a memory block
603 realloc(void *addr
, unsigned long size
, struct malloc_type
*mtp
, int flags
)
609 KASSERT(mtp
->ks_magic
== M_MAGIC
,
610 ("realloc: bad malloc type magic"));
611 KASSERT(curthread
->td_critnest
== 0 || SCHEDULER_STOPPED(),
612 ("realloc: called with spinlock or critical section held"));
614 /* realloc(NULL, ...) is equivalent to malloc(...) */
616 return (malloc(size
, mtp
, flags
));
619 * XXX: Should report free of old memory and alloc of new memory to
623 #ifdef DEBUG_MEMGUARD
624 if (is_memguard_addr(addr
))
625 return (memguard_realloc(addr
, size
, mtp
, flags
));
630 alloc
= redzone_get_size(addr
);
632 slab
= vtoslab((vm_offset_t
)addr
& ~(UMA_SLAB_MASK
));
635 KASSERT(slab
!= NULL
,
636 ("realloc: address %p out of range", (void *)addr
));
638 /* Get the size of the original block */
639 if (!(slab
->us_flags
& UMA_SLAB_MALLOC
))
640 alloc
= slab
->us_keg
->uk_size
;
642 alloc
= slab
->us_size
;
644 /* Reuse the original block if appropriate */
646 && (size
> (alloc
>> REALLOC_FRACTION
) || alloc
== MINALLOCSIZE
))
648 #endif /* !DEBUG_REDZONE */
650 /* Allocate a new, bigger (or smaller) block */
651 if ((newaddr
= malloc(size
, mtp
, flags
)) == NULL
)
654 /* Copy over original contents */
655 bcopy(addr
, newaddr
, min(size
, alloc
));
661 * reallocf: same as realloc() but free memory on failure.
664 reallocf(void *addr
, unsigned long size
, struct malloc_type
*mtp
, int flags
)
668 if ((mem
= realloc(addr
, size
, mtp
, flags
)) == NULL
)
674 * Wake the uma reclamation pagedaemon thread when we exhaust KVA. It
675 * will call the lowmem handler and uma_reclaim() callbacks in a
676 * context that is safe.
679 kmem_reclaim(vmem_t
*vm
, int flags
)
682 uma_reclaim_wakeup();
687 CTASSERT(VM_KMEM_SIZE_SCALE
>= 1);
691 * Initialize the kernel memory (kmem) arena.
700 if (vm_kmem_size
== 0)
701 vm_kmem_size
= VM_KMEM_SIZE
;
703 #ifdef VM_KMEM_SIZE_MIN
704 if (vm_kmem_size_min
== 0)
705 vm_kmem_size_min
= VM_KMEM_SIZE_MIN
;
707 #ifdef VM_KMEM_SIZE_MAX
708 if (vm_kmem_size_max
== 0)
709 vm_kmem_size_max
= VM_KMEM_SIZE_MAX
;
712 * Calculate the amount of kernel virtual address (KVA) space that is
713 * preallocated to the kmem arena. In order to support a wide range
714 * of machines, it is a function of the physical memory size,
717 * min(max(physical memory size / VM_KMEM_SIZE_SCALE,
718 * VM_KMEM_SIZE_MIN), VM_KMEM_SIZE_MAX)
720 * Every architecture must define an integral value for
721 * VM_KMEM_SIZE_SCALE. However, the definitions of VM_KMEM_SIZE_MIN
722 * and VM_KMEM_SIZE_MAX, which represent respectively the floor and
723 * ceiling on this preallocation, are optional. Typically,
724 * VM_KMEM_SIZE_MAX is itself a function of the available KVA space on
725 * a given architecture.
727 mem_size
= vm_cnt
.v_page_count
;
728 if (mem_size
<= 32768) /* delphij XXX 128MB */
729 kmem_zmax
= PAGE_SIZE
;
731 if (vm_kmem_size_scale
< 1)
732 vm_kmem_size_scale
= VM_KMEM_SIZE_SCALE
;
735 * Check if we should use defaults for the "vm_kmem_size"
738 if (vm_kmem_size
== 0) {
739 vm_kmem_size
= (mem_size
/ vm_kmem_size_scale
) * PAGE_SIZE
;
741 if (vm_kmem_size_min
> 0 && vm_kmem_size
< vm_kmem_size_min
)
742 vm_kmem_size
= vm_kmem_size_min
;
743 if (vm_kmem_size_max
> 0 && vm_kmem_size
>= vm_kmem_size_max
)
744 vm_kmem_size
= vm_kmem_size_max
;
748 * The amount of KVA space that is preallocated to the
749 * kmem arena can be set statically at compile-time or manually
750 * through the kernel environment. However, it is still limited to
751 * twice the physical memory size, which has been sufficient to handle
752 * the most severe cases of external fragmentation in the kmem arena.
754 if (vm_kmem_size
/ 2 / PAGE_SIZE
> mem_size
)
755 vm_kmem_size
= 2 * mem_size
* PAGE_SIZE
;
757 vm_kmem_size
= round_page(vm_kmem_size
);
758 #ifdef DEBUG_MEMGUARD
759 tmp
= memguard_fudge(vm_kmem_size
, kernel_map
);
763 vmem_init(kmem_arena
, "kmem arena", kva_alloc(tmp
), tmp
, PAGE_SIZE
,
765 vmem_set_reclaim(kmem_arena
, kmem_reclaim
);
767 #ifdef DEBUG_MEMGUARD
769 * Initialize MemGuard if support compiled in. MemGuard is a
770 * replacement allocator used for detecting tamper-after-free
771 * scenarios as they occur. It is only used for debugging.
773 memguard_init(kmem_arena
);
778 * Initialize the kernel memory allocator
782 mallocinit(void *dummy
)
787 mtx_init(&malloc_mtx
, "malloc", NULL
, MTX_DEF
);
793 if (kmem_zmax
< PAGE_SIZE
|| kmem_zmax
> KMEM_ZMAX
)
794 kmem_zmax
= KMEM_ZMAX
;
796 mt_zone
= uma_zcreate("mt_zone", sizeof(struct malloc_type_internal
),
798 mtrash_ctor
, mtrash_dtor
, mtrash_init
, mtrash_fini
,
800 NULL
, NULL
, NULL
, NULL
,
802 UMA_ALIGN_PTR
, UMA_ZONE_MALLOC
);
803 for (i
= 0, indx
= 0; kmemzones
[indx
].kz_size
!= 0; indx
++) {
804 int size
= kmemzones
[indx
].kz_size
;
805 char *name
= kmemzones
[indx
].kz_name
;
808 for (subzone
= 0; subzone
< numzones
; subzone
++) {
809 kmemzones
[indx
].kz_zone
[subzone
] =
810 uma_zcreate(name
, size
,
812 mtrash_ctor
, mtrash_dtor
, mtrash_init
, mtrash_fini
,
814 NULL
, NULL
, NULL
, NULL
,
816 UMA_ALIGN_PTR
, UMA_ZONE_MALLOC
);
818 for (;i
<= size
; i
+= KMEM_ZBASE
)
819 kmemsize
[i
>> KMEM_ZSHIFT
] = indx
;
823 SYSINIT(kmem
, SI_SUB_KMEM
, SI_ORDER_SECOND
, mallocinit
, NULL
);
826 malloc_init(void *data
)
828 struct malloc_type_internal
*mtip
;
829 struct malloc_type
*mtp
;
831 KASSERT(vm_cnt
.v_page_count
!= 0, ("malloc_register before vm_init"));
834 if (mtp
->ks_magic
!= M_MAGIC
)
835 panic("malloc_init: bad malloc type magic");
837 mtip
= uma_zalloc(mt_zone
, M_WAITOK
| M_ZERO
);
838 mtp
->ks_handle
= mtip
;
839 mtip
->mti_zone
= mtp_get_subzone(mtp
->ks_shortdesc
);
841 mtx_lock(&malloc_mtx
);
842 mtp
->ks_next
= kmemstatistics
;
843 kmemstatistics
= mtp
;
845 mtx_unlock(&malloc_mtx
);
849 malloc_uninit(void *data
)
851 struct malloc_type_internal
*mtip
;
852 struct malloc_type_stats
*mtsp
;
853 struct malloc_type
*mtp
, *temp
;
855 long temp_allocs
, temp_bytes
;
859 KASSERT(mtp
->ks_magic
== M_MAGIC
,
860 ("malloc_uninit: bad malloc type magic"));
861 KASSERT(mtp
->ks_handle
!= NULL
, ("malloc_deregister: cookie NULL"));
863 mtx_lock(&malloc_mtx
);
864 mtip
= mtp
->ks_handle
;
865 mtp
->ks_handle
= NULL
;
866 if (mtp
!= kmemstatistics
) {
867 for (temp
= kmemstatistics
; temp
!= NULL
;
868 temp
= temp
->ks_next
) {
869 if (temp
->ks_next
== mtp
) {
870 temp
->ks_next
= mtp
->ks_next
;
875 ("malloc_uninit: type '%s' not found", mtp
->ks_shortdesc
));
877 kmemstatistics
= mtp
->ks_next
;
879 mtx_unlock(&malloc_mtx
);
882 * Look for memory leaks.
884 temp_allocs
= temp_bytes
= 0;
885 for (i
= 0; i
< MAXCPU
; i
++) {
886 mtsp
= &mtip
->mti_stats
[i
];
887 temp_allocs
+= mtsp
->mts_numallocs
;
888 temp_allocs
-= mtsp
->mts_numfrees
;
889 temp_bytes
+= mtsp
->mts_memalloced
;
890 temp_bytes
-= mtsp
->mts_memfreed
;
892 if (temp_allocs
> 0 || temp_bytes
> 0) {
893 printf("Warning: memory type %s leaked memory on destroy "
894 "(%ld allocations, %ld bytes leaked).\n", mtp
->ks_shortdesc
,
895 temp_allocs
, temp_bytes
);
898 slab
= vtoslab((vm_offset_t
) mtip
& (~UMA_SLAB_MASK
));
899 uma_zfree_arg(mt_zone
, mtip
, slab
);
903 malloc_desc2type(const char *desc
)
905 struct malloc_type
*mtp
;
907 mtx_assert(&malloc_mtx
, MA_OWNED
);
908 for (mtp
= kmemstatistics
; mtp
!= NULL
; mtp
= mtp
->ks_next
) {
909 if (strcmp(mtp
->ks_shortdesc
, desc
) == 0)
916 sysctl_kern_malloc_stats(SYSCTL_HANDLER_ARGS
)
918 struct malloc_type_stream_header mtsh
;
919 struct malloc_type_internal
*mtip
;
920 struct malloc_type_header mth
;
921 struct malloc_type
*mtp
;
925 error
= sysctl_wire_old_buffer(req
, 0);
928 sbuf_new_for_sysctl(&sbuf
, NULL
, 128, req
);
929 sbuf_clear_flags(&sbuf
, SBUF_INCLUDENUL
);
930 mtx_lock(&malloc_mtx
);
933 * Insert stream header.
935 bzero(&mtsh
, sizeof(mtsh
));
936 mtsh
.mtsh_version
= MALLOC_TYPE_STREAM_VERSION
;
937 mtsh
.mtsh_maxcpus
= MAXCPU
;
938 mtsh
.mtsh_count
= kmemcount
;
939 (void)sbuf_bcat(&sbuf
, &mtsh
, sizeof(mtsh
));
942 * Insert alternating sequence of type headers and type statistics.
944 for (mtp
= kmemstatistics
; mtp
!= NULL
; mtp
= mtp
->ks_next
) {
945 mtip
= (struct malloc_type_internal
*)mtp
->ks_handle
;
948 * Insert type header.
950 bzero(&mth
, sizeof(mth
));
951 strlcpy(mth
.mth_name
, mtp
->ks_shortdesc
, MALLOC_MAX_NAME
);
952 (void)sbuf_bcat(&sbuf
, &mth
, sizeof(mth
));
955 * Insert type statistics for each CPU.
957 for (i
= 0; i
< MAXCPU
; i
++) {
958 (void)sbuf_bcat(&sbuf
, &mtip
->mti_stats
[i
],
959 sizeof(mtip
->mti_stats
[i
]));
962 mtx_unlock(&malloc_mtx
);
963 error
= sbuf_finish(&sbuf
);
968 SYSCTL_PROC(_kern
, OID_AUTO
, malloc_stats
, CTLFLAG_RD
|CTLTYPE_STRUCT
,
969 0, 0, sysctl_kern_malloc_stats
, "s,malloc_type_ustats",
970 "Return malloc types");
972 SYSCTL_INT(_kern
, OID_AUTO
, malloc_count
, CTLFLAG_RD
, &kmemcount
, 0,
973 "Count of kernel malloc types");
976 malloc_type_list(malloc_type_list_func_t
*func
, void *arg
)
978 struct malloc_type
*mtp
, **bufmtp
;
982 mtx_lock(&malloc_mtx
);
984 mtx_assert(&malloc_mtx
, MA_OWNED
);
986 mtx_unlock(&malloc_mtx
);
988 buflen
= sizeof(struct malloc_type
*) * count
;
989 bufmtp
= malloc(buflen
, M_TEMP
, M_WAITOK
);
991 mtx_lock(&malloc_mtx
);
993 if (count
< kmemcount
) {
994 free(bufmtp
, M_TEMP
);
998 for (mtp
= kmemstatistics
, i
= 0; mtp
!= NULL
; mtp
= mtp
->ks_next
, i
++)
1001 mtx_unlock(&malloc_mtx
);
1003 for (i
= 0; i
< count
; i
++)
1004 (func
)(bufmtp
[i
], arg
);
1006 free(bufmtp
, M_TEMP
);
1010 DB_SHOW_COMMAND(malloc
, db_show_malloc
)
1012 struct malloc_type_internal
*mtip
;
1013 struct malloc_type
*mtp
;
1014 uint64_t allocs
, frees
;
1015 uint64_t alloced
, freed
;
1018 db_printf("%18s %12s %12s %12s\n", "Type", "InUse", "MemUse",
1020 for (mtp
= kmemstatistics
; mtp
!= NULL
; mtp
= mtp
->ks_next
) {
1021 mtip
= (struct malloc_type_internal
*)mtp
->ks_handle
;
1026 for (i
= 0; i
< MAXCPU
; i
++) {
1027 allocs
+= mtip
->mti_stats
[i
].mts_numallocs
;
1028 frees
+= mtip
->mti_stats
[i
].mts_numfrees
;
1029 alloced
+= mtip
->mti_stats
[i
].mts_memalloced
;
1030 freed
+= mtip
->mti_stats
[i
].mts_memfreed
;
1032 db_printf("%18s %12ju %12juK %12ju\n",
1033 mtp
->ks_shortdesc
, allocs
- frees
,
1034 (alloced
- freed
+ 1023) / 1024, allocs
);
1040 #if MALLOC_DEBUG_MAXZONES > 1
1041 DB_SHOW_COMMAND(multizone_matches
, db_show_multizone_matches
)
1043 struct malloc_type_internal
*mtip
;
1044 struct malloc_type
*mtp
;
1048 db_printf("Usage: show multizone_matches <malloc type/addr>\n");
1052 if (mtp
->ks_magic
!= M_MAGIC
) {
1053 db_printf("Magic %lx does not match expected %x\n",
1054 mtp
->ks_magic
, M_MAGIC
);
1058 mtip
= mtp
->ks_handle
;
1059 subzone
= mtip
->mti_zone
;
1061 for (mtp
= kmemstatistics
; mtp
!= NULL
; mtp
= mtp
->ks_next
) {
1062 mtip
= mtp
->ks_handle
;
1063 if (mtip
->mti_zone
!= subzone
)
1065 db_printf("%s\n", mtp
->ks_shortdesc
);
1070 #endif /* MALLOC_DEBUG_MAXZONES > 1 */
1073 #ifdef MALLOC_PROFILE
1076 sysctl_kern_mprof(SYSCTL_HANDLER_ARGS
)
1090 error
= sysctl_wire_old_buffer(req
, 0);
1093 sbuf_new_for_sysctl(&sbuf
, NULL
, 128, req
);
1095 "\n Size Requests Real Size\n");
1096 for (i
= 0; i
< KMEM_ZSIZE
; i
++) {
1097 size
= i
<< KMEM_ZSHIFT
;
1098 rsize
= kmemzones
[kmemsize
[i
]].kz_size
;
1099 count
= (long long unsigned)krequests
[i
];
1101 sbuf_printf(&sbuf
, "%6d%28llu%11d\n", size
,
1102 (unsigned long long)count
, rsize
);
1104 if ((rsize
* count
) > (size
* count
))
1105 waste
+= (rsize
* count
) - (size
* count
);
1106 mem
+= (rsize
* count
);
1109 "\nTotal memory used:\t%30llu\nTotal Memory wasted:\t%30llu\n",
1110 (unsigned long long)mem
, (unsigned long long)waste
);
1111 error
= sbuf_finish(&sbuf
);
1116 SYSCTL_OID(_kern
, OID_AUTO
, mprof
, CTLTYPE_STRING
|CTLFLAG_RD
,
1117 NULL
, 0, sysctl_kern_mprof
, "A", "Malloc Profiling");
1118 #endif /* MALLOC_PROFILE */