2 * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
3 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
4 * Copyright (c) 2004-2006 Robert N. M. Watson
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * uma_core.c Implementation of the Universal Memory allocator
32 * This allocator is intended to replace the multitude of similar object caches
33 * in the standard FreeBSD kernel. The intent is to be flexible as well as
34 * efficient. A primary design goal is to return unused memory to the rest of
35 * the system. This will make the system as a whole more flexible due to the
36 * ability to move memory to subsystems which most need it instead of leaving
37 * pools of reserved memory unused.
39 * The basic ideas stem from similar slab/zone based allocators whose algorithms
46 * - Improve memory usage for large allocations
47 * - Investigate cache size adjustments
50 #include <sys/cdefs.h>
51 __FBSDID("$FreeBSD$");
53 /* I should really use ktr.. */
56 #define UMA_DEBUG_ALLOC 1
57 #define UMA_DEBUG_ALLOC_1 1
61 #include "opt_param.h"
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/bitset.h>
67 #include <sys/kernel.h>
68 #include <sys/types.h>
69 #include <sys/queue.h>
70 #include <sys/malloc.h>
73 #include <sys/sysctl.h>
74 #include <sys/mutex.h>
76 #include <sys/random.h>
77 #include <sys/rwlock.h>
79 #include <sys/sched.h>
81 #include <sys/taskqueue.h>
82 #include <sys/vmmeter.h>
85 #include <vm/vm_object.h>
86 #include <vm/vm_page.h>
87 #include <vm/vm_pageout.h>
88 #include <vm/vm_param.h>
89 #include <vm/vm_map.h>
90 #include <vm/vm_kern.h>
91 #include <vm/vm_extern.h>
93 #include <vm/uma_int.h>
94 #include <vm/uma_dbg.h>
99 #include <vm/memguard.h>
103 * This is the zone and keg from which all zones are spawned. The idea is that
104 * even the zone & keg heads are allocated from the allocator, so we use the
105 * bss section to bootstrap us.
107 static struct uma_keg masterkeg
;
108 static struct uma_zone masterzone_k
;
109 static struct uma_zone masterzone_z
;
110 static uma_zone_t kegs
= &masterzone_k
;
111 static uma_zone_t zones
= &masterzone_z
;
113 /* This is the zone from which all of uma_slab_t's are allocated. */
114 static uma_zone_t slabzone
;
117 * The initial hash tables come out of this zone so they can be allocated
118 * prior to malloc coming up.
120 static uma_zone_t hashzone
;
122 /* The boot-time adjusted value for cache line alignment. */
123 int uma_align_cache
= 64 - 1;
125 static MALLOC_DEFINE(M_UMAHASH
, "UMAHash", "UMA Hash Buckets");
128 * Are we allowed to allocate buckets?
130 static int bucketdisable
= 1;
132 /* Linked list of all kegs in the system */
133 static LIST_HEAD(,uma_keg
) uma_kegs
= LIST_HEAD_INITIALIZER(uma_kegs
);
135 /* Linked list of all cache-only zones in the system */
136 static LIST_HEAD(,uma_zone
) uma_cachezones
=
137 LIST_HEAD_INITIALIZER(uma_cachezones
);
139 /* This RW lock protects the keg list */
140 static struct rwlock_padalign uma_rwlock
;
142 /* Linked list of boot time pages */
143 static LIST_HEAD(,uma_slab
) uma_boot_pages
=
144 LIST_HEAD_INITIALIZER(uma_boot_pages
);
146 /* This mutex protects the boot time pages list */
147 static struct mtx_padalign uma_boot_pages_mtx
;
149 static struct sx uma_drain_lock
;
151 /* Is the VM done starting up? */
152 static int booted
= 0;
153 #define UMA_STARTUP 1
154 #define UMA_STARTUP2 2
157 * This is the handle used to schedule events that need to happen
158 * outside of the allocation fast path.
160 static struct callout uma_callout
;
161 #define UMA_TIMEOUT 20 /* Seconds for callout interval. */
164 * This structure is passed as the zone ctor arg so that I don't have to create
165 * a special allocation function just for zones.
167 struct uma_zctor_args
{
182 struct uma_kctor_args
{
191 struct uma_bucket_zone
{
194 int ubz_entries
; /* Number of items it can hold. */
195 int ubz_maxsize
; /* Maximum allocation size per-item. */
199 * Compute the actual number of bucket entries to pack them in power
200 * of two sizes for more efficient space utilization.
202 #define BUCKET_SIZE(n) \
203 (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
205 #define BUCKET_MAX BUCKET_SIZE(256)
207 struct uma_bucket_zone bucket_zones
[] = {
208 { NULL
, "4 Bucket", BUCKET_SIZE(4), 4096 },
209 { NULL
, "6 Bucket", BUCKET_SIZE(6), 3072 },
210 { NULL
, "8 Bucket", BUCKET_SIZE(8), 2048 },
211 { NULL
, "12 Bucket", BUCKET_SIZE(12), 1536 },
212 { NULL
, "16 Bucket", BUCKET_SIZE(16), 1024 },
213 { NULL
, "32 Bucket", BUCKET_SIZE(32), 512 },
214 { NULL
, "64 Bucket", BUCKET_SIZE(64), 256 },
215 { NULL
, "128 Bucket", BUCKET_SIZE(128), 128 },
216 { NULL
, "256 Bucket", BUCKET_SIZE(256), 64 },
221 * Flags and enumerations to be passed to internal functions.
223 enum zfreeskip
{ SKIP_NONE
= 0, SKIP_DTOR
, SKIP_FINI
};
227 static void *noobj_alloc(uma_zone_t
, vm_size_t
, uint8_t *, int);
228 static void *page_alloc(uma_zone_t
, vm_size_t
, uint8_t *, int);
229 static void *startup_alloc(uma_zone_t
, vm_size_t
, uint8_t *, int);
230 static void page_free(void *, vm_size_t
, uint8_t);
231 static uma_slab_t
keg_alloc_slab(uma_keg_t
, uma_zone_t
, int);
232 static void cache_drain(uma_zone_t
);
233 static void bucket_drain(uma_zone_t
, uma_bucket_t
);
234 static void bucket_cache_drain(uma_zone_t zone
);
235 static int keg_ctor(void *, int, void *, int);
236 static void keg_dtor(void *, int, void *);
237 static int zone_ctor(void *, int, void *, int);
238 static void zone_dtor(void *, int, void *);
239 static int zero_init(void *, int, int);
240 static void keg_small_init(uma_keg_t keg
);
241 static void keg_large_init(uma_keg_t keg
);
242 static void zone_foreach(void (*zfunc
)(uma_zone_t
));
243 static void zone_timeout(uma_zone_t zone
);
244 static int hash_alloc(struct uma_hash
*);
245 static int hash_expand(struct uma_hash
*, struct uma_hash
*);
246 static void hash_free(struct uma_hash
*hash
);
247 static void uma_timeout(void *);
248 static void uma_startup3(void);
249 static void *zone_alloc_item(uma_zone_t
, void *, int);
250 static void zone_free_item(uma_zone_t
, void *, void *, enum zfreeskip
);
251 static void bucket_enable(void);
252 static void bucket_init(void);
253 static uma_bucket_t
bucket_alloc(uma_zone_t zone
, void *, int);
254 static void bucket_free(uma_zone_t zone
, uma_bucket_t
, void *);
255 static void bucket_zone_drain(void);
256 static uma_bucket_t
zone_alloc_bucket(uma_zone_t zone
, void *, int flags
);
257 static uma_slab_t
zone_fetch_slab(uma_zone_t zone
, uma_keg_t last
, int flags
);
258 static uma_slab_t
zone_fetch_slab_multi(uma_zone_t zone
, uma_keg_t last
, int flags
);
259 static void *slab_alloc_item(uma_keg_t keg
, uma_slab_t slab
);
260 static void slab_free_item(uma_keg_t keg
, uma_slab_t slab
, void *item
);
261 static uma_keg_t
uma_kcreate(uma_zone_t zone
, size_t size
, uma_init uminit
,
262 uma_fini fini
, int align
, uint32_t flags
);
263 static int zone_import(uma_zone_t zone
, void **bucket
, int max
, int flags
);
264 static void zone_release(uma_zone_t zone
, void **bucket
, int cnt
);
265 static void uma_zero_item(void *item
, uma_zone_t zone
);
267 void uma_print_zone(uma_zone_t
);
268 void uma_print_stats(void);
269 static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS
);
270 static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS
);
273 static void uma_dbg_free(uma_zone_t zone
, uma_slab_t slab
, void *item
);
274 static void uma_dbg_alloc(uma_zone_t zone
, uma_slab_t slab
, void *item
);
277 SYSINIT(uma_startup3
, SI_SUB_VM_CONF
, SI_ORDER_SECOND
, uma_startup3
, NULL
);
279 SYSCTL_PROC(_vm
, OID_AUTO
, zone_count
, CTLFLAG_RD
|CTLTYPE_INT
,
280 0, 0, sysctl_vm_zone_count
, "I", "Number of UMA zones");
282 SYSCTL_PROC(_vm
, OID_AUTO
, zone_stats
, CTLFLAG_RD
|CTLTYPE_STRUCT
,
283 0, 0, sysctl_vm_zone_stats
, "s,struct uma_type_header", "Zone Stats");
285 static int zone_warnings
= 1;
286 SYSCTL_INT(_vm
, OID_AUTO
, zone_warnings
, CTLFLAG_RWTUN
, &zone_warnings
, 0,
287 "Warn when UMA zones becomes full");
290 * This routine checks to see whether or not it's safe to enable buckets.
295 bucketdisable
= vm_page_count_min();
299 * Initialize bucket_zones, the array of zones of buckets of various sizes.
301 * For each zone, calculate the memory required for each bucket, consisting
302 * of the header and an array of pointers.
307 struct uma_bucket_zone
*ubz
;
310 for (ubz
= &bucket_zones
[0]; ubz
->ubz_entries
!= 0; ubz
++) {
311 size
= roundup(sizeof(struct uma_bucket
), sizeof(void *));
312 size
+= sizeof(void *) * ubz
->ubz_entries
;
313 ubz
->ubz_zone
= uma_zcreate(ubz
->ubz_name
, size
,
314 NULL
, NULL
, NULL
, NULL
, UMA_ALIGN_PTR
,
315 UMA_ZONE_MTXCLASS
| UMA_ZFLAG_BUCKET
);
320 * Given a desired number of entries for a bucket, return the zone from which
321 * to allocate the bucket.
323 static struct uma_bucket_zone
*
324 bucket_zone_lookup(int entries
)
326 struct uma_bucket_zone
*ubz
;
328 for (ubz
= &bucket_zones
[0]; ubz
->ubz_entries
!= 0; ubz
++)
329 if (ubz
->ubz_entries
>= entries
)
336 bucket_select(int size
)
338 struct uma_bucket_zone
*ubz
;
340 ubz
= &bucket_zones
[0];
341 if (size
> ubz
->ubz_maxsize
)
342 return MAX((ubz
->ubz_maxsize
* ubz
->ubz_entries
) / size
, 1);
344 for (; ubz
->ubz_entries
!= 0; ubz
++)
345 if (ubz
->ubz_maxsize
< size
)
348 return (ubz
->ubz_entries
);
352 bucket_alloc(uma_zone_t zone
, void *udata
, int flags
)
354 struct uma_bucket_zone
*ubz
;
358 * This is to stop us from allocating per cpu buckets while we're
359 * running out of vm.boot_pages. Otherwise, we would exhaust the
360 * boot pages. This also prevents us from allocating buckets in
361 * low memory situations.
366 * To limit bucket recursion we store the original zone flags
367 * in a cookie passed via zalloc_arg/zfree_arg. This allows the
368 * NOVM flag to persist even through deep recursions. We also
369 * store ZFLAG_BUCKET once we have recursed attempting to allocate
370 * a bucket for a bucket zone so we do not allow infinite bucket
371 * recursion. This cookie will even persist to frees of unused
372 * buckets via the allocation path or bucket allocations in the
375 if ((zone
->uz_flags
& UMA_ZFLAG_BUCKET
) == 0)
376 udata
= (void *)(uintptr_t)zone
->uz_flags
;
378 if ((uintptr_t)udata
& UMA_ZFLAG_BUCKET
)
380 udata
= (void *)((uintptr_t)udata
| UMA_ZFLAG_BUCKET
);
382 if ((uintptr_t)udata
& UMA_ZFLAG_CACHEONLY
)
384 ubz
= bucket_zone_lookup(zone
->uz_count
);
385 if (ubz
->ubz_zone
== zone
&& (ubz
+ 1)->ubz_entries
!= 0)
387 bucket
= uma_zalloc_arg(ubz
->ubz_zone
, udata
, flags
);
390 bzero(bucket
->ub_bucket
, sizeof(void *) * ubz
->ubz_entries
);
393 bucket
->ub_entries
= ubz
->ubz_entries
;
400 bucket_free(uma_zone_t zone
, uma_bucket_t bucket
, void *udata
)
402 struct uma_bucket_zone
*ubz
;
404 KASSERT(bucket
->ub_cnt
== 0,
405 ("bucket_free: Freeing a non free bucket."));
406 if ((zone
->uz_flags
& UMA_ZFLAG_BUCKET
) == 0)
407 udata
= (void *)(uintptr_t)zone
->uz_flags
;
408 ubz
= bucket_zone_lookup(bucket
->ub_entries
);
409 uma_zfree_arg(ubz
->ubz_zone
, bucket
, udata
);
413 bucket_zone_drain(void)
415 struct uma_bucket_zone
*ubz
;
417 for (ubz
= &bucket_zones
[0]; ubz
->ubz_entries
!= 0; ubz
++)
418 zone_drain(ubz
->ubz_zone
);
422 zone_log_warning(uma_zone_t zone
)
424 static const struct timeval warninterval
= { 300, 0 };
426 if (!zone_warnings
|| zone
->uz_warning
== NULL
)
429 if (ratecheck(&zone
->uz_ratecheck
, &warninterval
))
430 printf("[zone: %s] %s\n", zone
->uz_name
, zone
->uz_warning
);
434 zone_maxaction(uma_zone_t zone
)
437 if (zone
->uz_maxaction
.ta_func
!= NULL
)
438 taskqueue_enqueue(taskqueue_thread
, &zone
->uz_maxaction
);
442 zone_foreach_keg(uma_zone_t zone
, void (*kegfn
)(uma_keg_t
))
446 LIST_FOREACH(klink
, &zone
->uz_kegs
, kl_link
)
447 kegfn(klink
->kl_keg
);
451 * Routine called by timeout which is used to fire off some time interval
452 * based calculations. (stats, hash size, etc.)
461 uma_timeout(void *unused
)
464 zone_foreach(zone_timeout
);
466 /* Reschedule this event */
467 callout_reset(&uma_callout
, UMA_TIMEOUT
* hz
, uma_timeout
, NULL
);
471 * Routine to perform timeout driven calculations. This expands the
472 * hashes and does per cpu statistics aggregation.
477 keg_timeout(uma_keg_t keg
)
482 * Expand the keg hash table.
484 * This is done if the number of slabs is larger than the hash size.
485 * What I'm trying to do here is completely reduce collisions. This
486 * may be a little aggressive. Should I allow for two collisions max?
488 if (keg
->uk_flags
& UMA_ZONE_HASH
&&
489 keg
->uk_pages
/ keg
->uk_ppera
>= keg
->uk_hash
.uh_hashsize
) {
490 struct uma_hash newhash
;
491 struct uma_hash oldhash
;
495 * This is so involved because allocating and freeing
496 * while the keg lock is held will lead to deadlock.
497 * I have to do everything in stages and check for
500 newhash
= keg
->uk_hash
;
502 ret
= hash_alloc(&newhash
);
505 if (hash_expand(&keg
->uk_hash
, &newhash
)) {
506 oldhash
= keg
->uk_hash
;
507 keg
->uk_hash
= newhash
;
520 zone_timeout(uma_zone_t zone
)
523 zone_foreach_keg(zone
, &keg_timeout
);
527 * Allocate and zero fill the next sized hash table from the appropriate
531 * hash A new hash structure with the old hash size in uh_hashsize
534 * 1 on success and 0 on failure.
537 hash_alloc(struct uma_hash
*hash
)
542 oldsize
= hash
->uh_hashsize
;
544 /* We're just going to go to a power of two greater */
546 hash
->uh_hashsize
= oldsize
* 2;
547 alloc
= sizeof(hash
->uh_slab_hash
[0]) * hash
->uh_hashsize
;
548 hash
->uh_slab_hash
= (struct slabhead
*)malloc(alloc
,
549 M_UMAHASH
, M_NOWAIT
);
551 alloc
= sizeof(hash
->uh_slab_hash
[0]) * UMA_HASH_SIZE_INIT
;
552 hash
->uh_slab_hash
= zone_alloc_item(hashzone
, NULL
,
554 hash
->uh_hashsize
= UMA_HASH_SIZE_INIT
;
556 if (hash
->uh_slab_hash
) {
557 bzero(hash
->uh_slab_hash
, alloc
);
558 hash
->uh_hashmask
= hash
->uh_hashsize
- 1;
566 * Expands the hash table for HASH zones. This is done from zone_timeout
567 * to reduce collisions. This must not be done in the regular allocation
568 * path, otherwise, we can recurse on the vm while allocating pages.
571 * oldhash The hash you want to expand
572 * newhash The hash structure for the new table
580 hash_expand(struct uma_hash
*oldhash
, struct uma_hash
*newhash
)
586 if (!newhash
->uh_slab_hash
)
589 if (oldhash
->uh_hashsize
>= newhash
->uh_hashsize
)
593 * I need to investigate hash algorithms for resizing without a
597 for (i
= 0; i
< oldhash
->uh_hashsize
; i
++)
598 while (!SLIST_EMPTY(&oldhash
->uh_slab_hash
[i
])) {
599 slab
= SLIST_FIRST(&oldhash
->uh_slab_hash
[i
]);
600 SLIST_REMOVE_HEAD(&oldhash
->uh_slab_hash
[i
], us_hlink
);
601 hval
= UMA_HASH(newhash
, slab
->us_data
);
602 SLIST_INSERT_HEAD(&newhash
->uh_slab_hash
[hval
],
610 * Free the hash bucket to the appropriate backing store.
613 * slab_hash The hash bucket we're freeing
614 * hashsize The number of entries in that hash bucket
620 hash_free(struct uma_hash
*hash
)
622 if (hash
->uh_slab_hash
== NULL
)
624 if (hash
->uh_hashsize
== UMA_HASH_SIZE_INIT
)
625 zone_free_item(hashzone
, hash
->uh_slab_hash
, NULL
, SKIP_NONE
);
627 free(hash
->uh_slab_hash
, M_UMAHASH
);
631 * Frees all outstanding items in a bucket
634 * zone The zone to free to, must be unlocked.
635 * bucket The free/alloc bucket with items, cpu queue must be locked.
642 bucket_drain(uma_zone_t zone
, uma_bucket_t bucket
)
650 for (i
= 0; i
< bucket
->ub_cnt
; i
++)
651 zone
->uz_fini(bucket
->ub_bucket
[i
], zone
->uz_size
);
652 zone
->uz_release(zone
->uz_arg
, bucket
->ub_bucket
, bucket
->ub_cnt
);
657 * Drains the per cpu caches for a zone.
659 * NOTE: This may only be called while the zone is being turn down, and not
660 * during normal operation. This is necessary in order that we do not have
661 * to migrate CPUs to drain the per-CPU caches.
664 * zone The zone to drain, must be unlocked.
670 cache_drain(uma_zone_t zone
)
676 * XXX: It is safe to not lock the per-CPU caches, because we're
677 * tearing down the zone anyway. I.e., there will be no further use
678 * of the caches at this point.
680 * XXX: It would good to be able to assert that the zone is being
681 * torn down to prevent improper use of cache_drain().
683 * XXX: We lock the zone before passing into bucket_cache_drain() as
684 * it is used elsewhere. Should the tear-down path be made special
685 * there in some form?
688 cache
= &zone
->uz_cpu
[cpu
];
689 bucket_drain(zone
, cache
->uc_allocbucket
);
690 bucket_drain(zone
, cache
->uc_freebucket
);
691 if (cache
->uc_allocbucket
!= NULL
)
692 bucket_free(zone
, cache
->uc_allocbucket
, NULL
);
693 if (cache
->uc_freebucket
!= NULL
)
694 bucket_free(zone
, cache
->uc_freebucket
, NULL
);
695 cache
->uc_allocbucket
= cache
->uc_freebucket
= NULL
;
698 bucket_cache_drain(zone
);
703 cache_shrink(uma_zone_t zone
)
706 if (zone
->uz_flags
& UMA_ZFLAG_INTERNAL
)
710 zone
->uz_count
= (zone
->uz_count_min
+ zone
->uz_count
) / 2;
715 cache_drain_safe_cpu(uma_zone_t zone
)
720 if (zone
->uz_flags
& UMA_ZFLAG_INTERNAL
)
726 cache
= &zone
->uz_cpu
[curcpu
];
727 if (cache
->uc_allocbucket
) {
728 if (cache
->uc_allocbucket
->ub_cnt
!= 0)
729 LIST_INSERT_HEAD(&zone
->uz_buckets
,
730 cache
->uc_allocbucket
, ub_link
);
732 b1
= cache
->uc_allocbucket
;
733 cache
->uc_allocbucket
= NULL
;
735 if (cache
->uc_freebucket
) {
736 if (cache
->uc_freebucket
->ub_cnt
!= 0)
737 LIST_INSERT_HEAD(&zone
->uz_buckets
,
738 cache
->uc_freebucket
, ub_link
);
740 b2
= cache
->uc_freebucket
;
741 cache
->uc_freebucket
= NULL
;
746 bucket_free(zone
, b1
, NULL
);
748 bucket_free(zone
, b2
, NULL
);
752 * Safely drain per-CPU caches of a zone(s) to alloc bucket.
753 * This is an expensive call because it needs to bind to all CPUs
754 * one by one and enter a critical section on each of them in order
755 * to safely access their cache buckets.
756 * Zone lock must not be held on call this function.
759 cache_drain_safe(uma_zone_t zone
)
764 * Polite bucket sizes shrinking was not enouth, shrink aggressively.
769 zone_foreach(cache_shrink
);
772 thread_lock(curthread
);
773 sched_bind(curthread
, cpu
);
774 thread_unlock(curthread
);
777 cache_drain_safe_cpu(zone
);
779 zone_foreach(cache_drain_safe_cpu
);
781 thread_lock(curthread
);
782 sched_unbind(curthread
);
783 thread_unlock(curthread
);
787 * Drain the cached buckets from a zone. Expects a locked zone on entry.
790 bucket_cache_drain(uma_zone_t zone
)
795 * Drain the bucket queues and free the buckets, we just keep two per
798 while ((bucket
= LIST_FIRST(&zone
->uz_buckets
)) != NULL
) {
799 LIST_REMOVE(bucket
, ub_link
);
801 bucket_drain(zone
, bucket
);
802 bucket_free(zone
, bucket
, NULL
);
807 * Shrink further bucket sizes. Price of single zone lock collision
808 * is probably lower then price of global cache drain.
810 if (zone
->uz_count
> zone
->uz_count_min
)
815 keg_free_slab(uma_keg_t keg
, uma_slab_t slab
, int start
)
822 flags
= slab
->us_flags
;
824 if (keg
->uk_fini
!= NULL
) {
825 for (i
--; i
> -1; i
--)
826 keg
->uk_fini(slab
->us_data
+ (keg
->uk_rsize
* i
),
829 if (keg
->uk_flags
& UMA_ZONE_OFFPAGE
)
830 zone_free_item(keg
->uk_slabzone
, slab
, NULL
, SKIP_NONE
);
832 printf("%s: Returning %d bytes.\n", keg
->uk_name
,
833 PAGE_SIZE
* keg
->uk_ppera
);
835 keg
->uk_freef(mem
, PAGE_SIZE
* keg
->uk_ppera
, flags
);
839 * Frees pages from a keg back to the system. This is done on demand from
840 * the pageout daemon.
845 keg_drain(uma_keg_t keg
)
847 struct slabhead freeslabs
= { 0 };
852 * We don't want to take pages from statically allocated kegs at this
855 if (keg
->uk_flags
& UMA_ZONE_NOFREE
|| keg
->uk_freef
== NULL
)
859 printf("%s free items: %u\n", keg
->uk_name
, keg
->uk_free
);
862 if (keg
->uk_free
== 0)
865 slab
= LIST_FIRST(&keg
->uk_free_slab
);
867 n
= LIST_NEXT(slab
, us_link
);
869 /* We have no where to free these to */
870 if (slab
->us_flags
& UMA_SLAB_BOOT
) {
875 LIST_REMOVE(slab
, us_link
);
876 keg
->uk_pages
-= keg
->uk_ppera
;
877 keg
->uk_free
-= keg
->uk_ipers
;
879 if (keg
->uk_flags
& UMA_ZONE_HASH
)
880 UMA_HASH_REMOVE(&keg
->uk_hash
, slab
, slab
->us_data
);
882 SLIST_INSERT_HEAD(&freeslabs
, slab
, us_hlink
);
889 while ((slab
= SLIST_FIRST(&freeslabs
)) != NULL
) {
890 SLIST_REMOVE(&freeslabs
, slab
, uma_slab
, us_hlink
);
891 keg_free_slab(keg
, slab
, keg
->uk_ipers
);
896 zone_drain_wait(uma_zone_t zone
, int waitok
)
900 * Set draining to interlock with zone_dtor() so we can release our
901 * locks as we go. Only dtor() should do a WAITOK call since it
902 * is the only call that knows the structure will still be available
906 while (zone
->uz_flags
& UMA_ZFLAG_DRAINING
) {
907 if (waitok
== M_NOWAIT
)
909 msleep(zone
, zone
->uz_lockptr
, PVM
, "zonedrain", 1);
911 zone
->uz_flags
|= UMA_ZFLAG_DRAINING
;
912 bucket_cache_drain(zone
);
915 * The DRAINING flag protects us from being freed while
916 * we're running. Normally the uma_rwlock would protect us but we
917 * must be able to release and acquire the right lock for each keg.
919 zone_foreach_keg(zone
, &keg_drain
);
921 zone
->uz_flags
&= ~UMA_ZFLAG_DRAINING
;
928 zone_drain(uma_zone_t zone
)
931 zone_drain_wait(zone
, M_NOWAIT
);
935 * Allocate a new slab for a keg. This does not insert the slab onto a list.
938 * wait Shall we wait?
941 * The slab that was allocated or NULL if there is no memory and the
942 * caller specified M_NOWAIT.
945 keg_alloc_slab(uma_keg_t keg
, uma_zone_t zone
, int wait
)
953 mtx_assert(&keg
->uk_lock
, MA_OWNED
);
958 printf("alloc_slab: Allocating a new slab for %s\n", keg
->uk_name
);
960 allocf
= keg
->uk_allocf
;
963 if (keg
->uk_flags
& UMA_ZONE_OFFPAGE
) {
964 slab
= zone_alloc_item(keg
->uk_slabzone
, NULL
, wait
);
970 * This reproduces the old vm_zone behavior of zero filling pages the
971 * first time they are added to a zone.
973 * Malloced items are zeroed in uma_zalloc.
976 if ((keg
->uk_flags
& UMA_ZONE_MALLOC
) == 0)
981 if (keg
->uk_flags
& UMA_ZONE_NODUMP
)
984 /* zone is passed for legacy reasons. */
985 mem
= allocf(zone
, keg
->uk_ppera
* PAGE_SIZE
, &flags
, wait
);
987 if (keg
->uk_flags
& UMA_ZONE_OFFPAGE
)
988 zone_free_item(keg
->uk_slabzone
, slab
, NULL
, SKIP_NONE
);
993 /* Point the slab into the allocated memory */
994 if (!(keg
->uk_flags
& UMA_ZONE_OFFPAGE
))
995 slab
= (uma_slab_t
)(mem
+ keg
->uk_pgoff
);
997 if (keg
->uk_flags
& UMA_ZONE_VTOSLAB
)
998 for (i
= 0; i
< keg
->uk_ppera
; i
++)
999 vsetslab((vm_offset_t
)mem
+ (i
* PAGE_SIZE
), slab
);
1002 slab
->us_data
= mem
;
1003 slab
->us_freecount
= keg
->uk_ipers
;
1004 slab
->us_flags
= flags
;
1005 BIT_FILL(SLAB_SETSIZE
, &slab
->us_free
);
1007 BIT_ZERO(SLAB_SETSIZE
, &slab
->us_debugfree
);
1010 if (keg
->uk_init
!= NULL
) {
1011 for (i
= 0; i
< keg
->uk_ipers
; i
++)
1012 if (keg
->uk_init(slab
->us_data
+ (keg
->uk_rsize
* i
),
1013 keg
->uk_size
, wait
) != 0)
1015 if (i
!= keg
->uk_ipers
) {
1016 keg_free_slab(keg
, slab
, i
);
1025 if (keg
->uk_flags
& UMA_ZONE_HASH
)
1026 UMA_HASH_INSERT(&keg
->uk_hash
, slab
, mem
);
1028 keg
->uk_pages
+= keg
->uk_ppera
;
1029 keg
->uk_free
+= keg
->uk_ipers
;
1036 * This function is intended to be used early on in place of page_alloc() so
1037 * that we may use the boot time page cache to satisfy allocations before
1041 startup_alloc(uma_zone_t zone
, vm_size_t bytes
, uint8_t *pflag
, int wait
)
1045 int pages
, check_pages
;
1047 keg
= zone_first_keg(zone
);
1048 pages
= howmany(bytes
, PAGE_SIZE
);
1049 check_pages
= pages
- 1;
1050 KASSERT(pages
> 0, ("startup_alloc can't reserve 0 pages\n"));
1053 * Check our small startup cache to see if it has pages remaining.
1055 mtx_lock(&uma_boot_pages_mtx
);
1057 /* First check if we have enough room. */
1058 tmps
= LIST_FIRST(&uma_boot_pages
);
1059 while (tmps
!= NULL
&& check_pages
-- > 0)
1060 tmps
= LIST_NEXT(tmps
, us_link
);
1063 * It's ok to lose tmps references. The last one will
1064 * have tmps->us_data pointing to the start address of
1065 * "pages" contiguous pages of memory.
1067 while (pages
-- > 0) {
1068 tmps
= LIST_FIRST(&uma_boot_pages
);
1069 LIST_REMOVE(tmps
, us_link
);
1071 mtx_unlock(&uma_boot_pages_mtx
);
1072 *pflag
= tmps
->us_flags
;
1073 return (tmps
->us_data
);
1075 mtx_unlock(&uma_boot_pages_mtx
);
1076 if (booted
< UMA_STARTUP2
)
1077 panic("UMA: Increase vm.boot_pages");
1079 * Now that we've booted reset these users to their real allocator.
1081 #ifdef UMA_MD_SMALL_ALLOC
1082 keg
->uk_allocf
= (keg
->uk_ppera
> 1) ? page_alloc
: uma_small_alloc
;
1084 keg
->uk_allocf
= page_alloc
;
1086 return keg
->uk_allocf(zone
, bytes
, pflag
, wait
);
1090 * Allocates a number of pages from the system
1093 * bytes The number of bytes requested
1094 * wait Shall we wait?
1097 * A pointer to the alloced memory or possibly
1098 * NULL if M_NOWAIT is set.
1101 page_alloc(uma_zone_t zone
, vm_size_t bytes
, uint8_t *pflag
, int wait
)
1103 void *p
; /* Returned page */
1105 *pflag
= UMA_SLAB_KMEM
;
1106 p
= (void *) kmem_malloc(kmem_arena
, bytes
, wait
);
1112 * Allocates a number of pages from within an object
1115 * bytes The number of bytes requested
1116 * wait Shall we wait?
1119 * A pointer to the alloced memory or possibly
1120 * NULL if M_NOWAIT is set.
1123 noobj_alloc(uma_zone_t zone
, vm_size_t bytes
, uint8_t *flags
, int wait
)
1125 TAILQ_HEAD(, vm_page
) alloctail
;
1127 vm_offset_t retkva
, zkva
;
1128 vm_page_t p
, p_next
;
1131 TAILQ_INIT(&alloctail
);
1132 keg
= zone_first_keg(zone
);
1134 npages
= howmany(bytes
, PAGE_SIZE
);
1135 while (npages
> 0) {
1136 p
= vm_page_alloc(NULL
, 0, VM_ALLOC_INTERRUPT
|
1137 VM_ALLOC_WIRED
| VM_ALLOC_NOOBJ
);
1140 * Since the page does not belong to an object, its
1143 TAILQ_INSERT_TAIL(&alloctail
, p
, listq
);
1147 if (wait
& M_WAITOK
) {
1153 * Page allocation failed, free intermediate pages and
1156 TAILQ_FOREACH_SAFE(p
, &alloctail
, listq
, p_next
) {
1157 vm_page_unwire(p
, PQ_NONE
);
1162 *flags
= UMA_SLAB_PRIV
;
1163 zkva
= keg
->uk_kva
+
1164 atomic_fetchadd_long(&keg
->uk_offset
, round_page(bytes
));
1166 TAILQ_FOREACH(p
, &alloctail
, listq
) {
1167 pmap_qenter(zkva
, &p
, 1);
1171 return ((void *)retkva
);
1175 * Frees a number of pages to the system
1178 * mem A pointer to the memory to be freed
1179 * size The size of the memory being freed
1180 * flags The original p->us_flags field
1186 page_free(void *mem
, vm_size_t size
, uint8_t flags
)
1190 if (flags
& UMA_SLAB_KMEM
)
1192 else if (flags
& UMA_SLAB_KERNEL
)
1193 vmem
= kernel_arena
;
1195 panic("UMA: page_free used with invalid flags %d", flags
);
1197 kmem_free(vmem
, (vm_offset_t
)mem
, size
);
1201 * Zero fill initializer
1203 * Arguments/Returns follow uma_init specifications
1206 zero_init(void *mem
, int size
, int flags
)
1213 * Finish creating a small uma keg. This calculates ipers, and the keg size.
1216 * keg The zone we should initialize
1222 keg_small_init(uma_keg_t keg
)
1229 if (keg
->uk_flags
& UMA_ZONE_PCPU
) {
1230 u_int ncpus
= (mp_maxid
+ 1) ? (mp_maxid
+ 1) : MAXCPU
;
1232 keg
->uk_slabsize
= sizeof(struct pcpu
);
1233 keg
->uk_ppera
= howmany(ncpus
* sizeof(struct pcpu
),
1236 keg
->uk_slabsize
= UMA_SLAB_SIZE
;
1241 * Calculate the size of each allocation (rsize) according to
1242 * alignment. If the requested size is smaller than we have
1243 * allocation bits for we round it up.
1245 rsize
= keg
->uk_size
;
1246 if (rsize
< keg
->uk_slabsize
/ SLAB_SETSIZE
)
1247 rsize
= keg
->uk_slabsize
/ SLAB_SETSIZE
;
1248 if (rsize
& keg
->uk_align
)
1249 rsize
= (rsize
& ~keg
->uk_align
) + (keg
->uk_align
+ 1);
1250 keg
->uk_rsize
= rsize
;
1252 KASSERT((keg
->uk_flags
& UMA_ZONE_PCPU
) == 0 ||
1253 keg
->uk_rsize
< sizeof(struct pcpu
),
1254 ("%s: size %u too large", __func__
, keg
->uk_rsize
));
1256 if (keg
->uk_flags
& UMA_ZONE_OFFPAGE
)
1259 shsize
= sizeof(struct uma_slab
);
1261 keg
->uk_ipers
= (keg
->uk_slabsize
- shsize
) / rsize
;
1262 KASSERT(keg
->uk_ipers
> 0 && keg
->uk_ipers
<= SLAB_SETSIZE
,
1263 ("%s: keg->uk_ipers %u", __func__
, keg
->uk_ipers
));
1265 memused
= keg
->uk_ipers
* rsize
+ shsize
;
1266 wastedspace
= keg
->uk_slabsize
- memused
;
1269 * We can't do OFFPAGE if we're internal or if we've been
1270 * asked to not go to the VM for buckets. If we do this we
1271 * may end up going to the VM for slabs which we do not
1272 * want to do if we're UMA_ZFLAG_CACHEONLY as a result
1273 * of UMA_ZONE_VM, which clearly forbids it.
1275 if ((keg
->uk_flags
& UMA_ZFLAG_INTERNAL
) ||
1276 (keg
->uk_flags
& UMA_ZFLAG_CACHEONLY
))
1280 * See if using an OFFPAGE slab will limit our waste. Only do
1281 * this if it permits more items per-slab.
1283 * XXX We could try growing slabsize to limit max waste as well.
1284 * Historically this was not done because the VM could not
1285 * efficiently handle contiguous allocations.
1287 if ((wastedspace
>= keg
->uk_slabsize
/ UMA_MAX_WASTE
) &&
1288 (keg
->uk_ipers
< (keg
->uk_slabsize
/ keg
->uk_rsize
))) {
1289 keg
->uk_ipers
= keg
->uk_slabsize
/ keg
->uk_rsize
;
1290 KASSERT(keg
->uk_ipers
> 0 && keg
->uk_ipers
<= SLAB_SETSIZE
,
1291 ("%s: keg->uk_ipers %u", __func__
, keg
->uk_ipers
));
1293 printf("UMA decided we need offpage slab headers for "
1294 "keg: %s, calculated wastedspace = %d, "
1295 "maximum wasted space allowed = %d, "
1296 "calculated ipers = %d, "
1297 "new wasted space = %d\n", keg
->uk_name
, wastedspace
,
1298 keg
->uk_slabsize
/ UMA_MAX_WASTE
, keg
->uk_ipers
,
1299 keg
->uk_slabsize
- keg
->uk_ipers
* keg
->uk_rsize
);
1301 keg
->uk_flags
|= UMA_ZONE_OFFPAGE
;
1304 if ((keg
->uk_flags
& UMA_ZONE_OFFPAGE
) &&
1305 (keg
->uk_flags
& UMA_ZONE_VTOSLAB
) == 0)
1306 keg
->uk_flags
|= UMA_ZONE_HASH
;
1310 * Finish creating a large (> UMA_SLAB_SIZE) uma kegs. Just give in and do
1311 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
1315 * keg The keg we should initialize
1321 keg_large_init(uma_keg_t keg
)
1325 KASSERT(keg
!= NULL
, ("Keg is null in keg_large_init"));
1326 KASSERT((keg
->uk_flags
& UMA_ZFLAG_CACHEONLY
) == 0,
1327 ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
1328 KASSERT((keg
->uk_flags
& UMA_ZONE_PCPU
) == 0,
1329 ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__
));
1331 keg
->uk_ppera
= howmany(keg
->uk_size
, PAGE_SIZE
);
1332 keg
->uk_slabsize
= keg
->uk_ppera
* PAGE_SIZE
;
1334 keg
->uk_rsize
= keg
->uk_size
;
1336 /* We can't do OFFPAGE if we're internal, bail out here. */
1337 if (keg
->uk_flags
& UMA_ZFLAG_INTERNAL
)
1340 /* Check whether we have enough space to not do OFFPAGE. */
1341 if ((keg
->uk_flags
& UMA_ZONE_OFFPAGE
) == 0) {
1342 shsize
= sizeof(struct uma_slab
);
1343 if (shsize
& UMA_ALIGN_PTR
)
1344 shsize
= (shsize
& ~UMA_ALIGN_PTR
) +
1345 (UMA_ALIGN_PTR
+ 1);
1347 if ((PAGE_SIZE
* keg
->uk_ppera
) - keg
->uk_rsize
< shsize
)
1348 keg
->uk_flags
|= UMA_ZONE_OFFPAGE
;
1351 if ((keg
->uk_flags
& UMA_ZONE_OFFPAGE
) &&
1352 (keg
->uk_flags
& UMA_ZONE_VTOSLAB
) == 0)
1353 keg
->uk_flags
|= UMA_ZONE_HASH
;
1357 keg_cachespread_init(uma_keg_t keg
)
1364 KASSERT((keg
->uk_flags
& UMA_ZONE_PCPU
) == 0,
1365 ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__
));
1367 alignsize
= keg
->uk_align
+ 1;
1368 rsize
= keg
->uk_size
;
1370 * We want one item to start on every align boundary in a page. To
1371 * do this we will span pages. We will also extend the item by the
1372 * size of align if it is an even multiple of align. Otherwise, it
1373 * would fall on the same boundary every time.
1375 if (rsize
& keg
->uk_align
)
1376 rsize
= (rsize
& ~keg
->uk_align
) + alignsize
;
1377 if ((rsize
& alignsize
) == 0)
1379 trailer
= rsize
- keg
->uk_size
;
1380 pages
= (rsize
* (PAGE_SIZE
/ alignsize
)) / PAGE_SIZE
;
1381 pages
= MIN(pages
, (128 * 1024) / PAGE_SIZE
);
1382 keg
->uk_rsize
= rsize
;
1383 keg
->uk_ppera
= pages
;
1384 keg
->uk_slabsize
= UMA_SLAB_SIZE
;
1385 keg
->uk_ipers
= ((pages
* PAGE_SIZE
) + trailer
) / rsize
;
1386 keg
->uk_flags
|= UMA_ZONE_OFFPAGE
| UMA_ZONE_VTOSLAB
;
1387 KASSERT(keg
->uk_ipers
<= SLAB_SETSIZE
,
1388 ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__
,
1393 * Keg header ctor. This initializes all fields, locks, etc. And inserts
1394 * the keg onto the global keg list.
1396 * Arguments/Returns follow uma_ctor specifications
1397 * udata Actually uma_kctor_args
1400 keg_ctor(void *mem
, int size
, void *udata
, int flags
)
1402 struct uma_kctor_args
*arg
= udata
;
1403 uma_keg_t keg
= mem
;
1407 keg
->uk_size
= arg
->size
;
1408 keg
->uk_init
= arg
->uminit
;
1409 keg
->uk_fini
= arg
->fini
;
1410 keg
->uk_align
= arg
->align
;
1412 keg
->uk_reserve
= 0;
1414 keg
->uk_flags
= arg
->flags
;
1415 keg
->uk_allocf
= page_alloc
;
1416 keg
->uk_freef
= page_free
;
1417 keg
->uk_slabzone
= NULL
;
1420 * The master zone is passed to us at keg-creation time.
1423 keg
->uk_name
= zone
->uz_name
;
1425 if (arg
->flags
& UMA_ZONE_VM
)
1426 keg
->uk_flags
|= UMA_ZFLAG_CACHEONLY
;
1428 if (arg
->flags
& UMA_ZONE_ZINIT
)
1429 keg
->uk_init
= zero_init
;
1431 if (arg
->flags
& UMA_ZONE_MALLOC
)
1432 keg
->uk_flags
|= UMA_ZONE_VTOSLAB
;
1434 if (arg
->flags
& UMA_ZONE_PCPU
)
1436 keg
->uk_flags
|= UMA_ZONE_OFFPAGE
;
1438 keg
->uk_flags
&= ~UMA_ZONE_PCPU
;
1441 if (keg
->uk_flags
& UMA_ZONE_CACHESPREAD
) {
1442 keg_cachespread_init(keg
);
1444 if (keg
->uk_size
> (UMA_SLAB_SIZE
- sizeof(struct uma_slab
)))
1445 keg_large_init(keg
);
1447 keg_small_init(keg
);
1450 if (keg
->uk_flags
& UMA_ZONE_OFFPAGE
)
1451 keg
->uk_slabzone
= slabzone
;
1454 * If we haven't booted yet we need allocations to go through the
1455 * startup cache until the vm is ready.
1457 if (keg
->uk_ppera
== 1) {
1458 #ifdef UMA_MD_SMALL_ALLOC
1459 keg
->uk_allocf
= uma_small_alloc
;
1460 keg
->uk_freef
= uma_small_free
;
1462 if (booted
< UMA_STARTUP
)
1463 keg
->uk_allocf
= startup_alloc
;
1465 if (booted
< UMA_STARTUP2
)
1466 keg
->uk_allocf
= startup_alloc
;
1468 } else if (booted
< UMA_STARTUP2
&&
1469 (keg
->uk_flags
& UMA_ZFLAG_INTERNAL
))
1470 keg
->uk_allocf
= startup_alloc
;
1473 * Initialize keg's lock
1475 KEG_LOCK_INIT(keg
, (arg
->flags
& UMA_ZONE_MTXCLASS
));
1478 * If we're putting the slab header in the actual page we need to
1479 * figure out where in each page it goes. This calculates a right
1480 * justified offset into the memory on an ALIGN_PTR boundary.
1482 if (!(keg
->uk_flags
& UMA_ZONE_OFFPAGE
)) {
1485 /* Size of the slab struct and free list */
1486 totsize
= sizeof(struct uma_slab
);
1488 if (totsize
& UMA_ALIGN_PTR
)
1489 totsize
= (totsize
& ~UMA_ALIGN_PTR
) +
1490 (UMA_ALIGN_PTR
+ 1);
1491 keg
->uk_pgoff
= (PAGE_SIZE
* keg
->uk_ppera
) - totsize
;
1494 * The only way the following is possible is if with our
1495 * UMA_ALIGN_PTR adjustments we are now bigger than
1496 * UMA_SLAB_SIZE. I haven't checked whether this is
1497 * mathematically possible for all cases, so we make
1500 totsize
= keg
->uk_pgoff
+ sizeof(struct uma_slab
);
1501 if (totsize
> PAGE_SIZE
* keg
->uk_ppera
) {
1502 printf("zone %s ipers %d rsize %d size %d\n",
1503 zone
->uz_name
, keg
->uk_ipers
, keg
->uk_rsize
,
1505 panic("UMA slab won't fit.");
1509 if (keg
->uk_flags
& UMA_ZONE_HASH
)
1510 hash_alloc(&keg
->uk_hash
);
1513 printf("UMA: %s(%p) size %d(%d) flags %#x ipers %d ppera %d out %d free %d\n",
1514 zone
->uz_name
, zone
, keg
->uk_size
, keg
->uk_rsize
, keg
->uk_flags
,
1515 keg
->uk_ipers
, keg
->uk_ppera
,
1516 (keg
->uk_ipers
* keg
->uk_pages
) - keg
->uk_free
, keg
->uk_free
);
1519 LIST_INSERT_HEAD(&keg
->uk_zones
, zone
, uz_link
);
1521 rw_wlock(&uma_rwlock
);
1522 LIST_INSERT_HEAD(&uma_kegs
, keg
, uk_link
);
1523 rw_wunlock(&uma_rwlock
);
1528 * Zone header ctor. This initializes all fields, locks, etc.
1530 * Arguments/Returns follow uma_ctor specifications
1531 * udata Actually uma_zctor_args
1534 zone_ctor(void *mem
, int size
, void *udata
, int flags
)
1536 struct uma_zctor_args
*arg
= udata
;
1537 uma_zone_t zone
= mem
;
1542 zone
->uz_name
= arg
->name
;
1543 zone
->uz_ctor
= arg
->ctor
;
1544 zone
->uz_dtor
= arg
->dtor
;
1545 zone
->uz_slab
= zone_fetch_slab
;
1546 zone
->uz_init
= NULL
;
1547 zone
->uz_fini
= NULL
;
1548 zone
->uz_allocs
= 0;
1551 zone
->uz_sleeps
= 0;
1553 zone
->uz_count_min
= 0;
1555 zone
->uz_warning
= NULL
;
1556 timevalclear(&zone
->uz_ratecheck
);
1559 ZONE_LOCK_INIT(zone
, (arg
->flags
& UMA_ZONE_MTXCLASS
));
1562 * This is a pure cache zone, no kegs.
1565 if (arg
->flags
& UMA_ZONE_VM
)
1566 arg
->flags
|= UMA_ZFLAG_CACHEONLY
;
1567 zone
->uz_flags
= arg
->flags
;
1568 zone
->uz_size
= arg
->size
;
1569 zone
->uz_import
= arg
->import
;
1570 zone
->uz_release
= arg
->release
;
1571 zone
->uz_arg
= arg
->arg
;
1572 zone
->uz_lockptr
= &zone
->uz_lock
;
1573 rw_wlock(&uma_rwlock
);
1574 LIST_INSERT_HEAD(&uma_cachezones
, zone
, uz_link
);
1575 rw_wunlock(&uma_rwlock
);
1580 * Use the regular zone/keg/slab allocator.
1582 zone
->uz_import
= (uma_import
)zone_import
;
1583 zone
->uz_release
= (uma_release
)zone_release
;
1584 zone
->uz_arg
= zone
;
1586 if (arg
->flags
& UMA_ZONE_SECONDARY
) {
1587 KASSERT(arg
->keg
!= NULL
, ("Secondary zone on zero'd keg"));
1588 zone
->uz_init
= arg
->uminit
;
1589 zone
->uz_fini
= arg
->fini
;
1590 zone
->uz_lockptr
= &keg
->uk_lock
;
1591 zone
->uz_flags
|= UMA_ZONE_SECONDARY
;
1592 rw_wlock(&uma_rwlock
);
1594 LIST_FOREACH(z
, &keg
->uk_zones
, uz_link
) {
1595 if (LIST_NEXT(z
, uz_link
) == NULL
) {
1596 LIST_INSERT_AFTER(z
, zone
, uz_link
);
1601 rw_wunlock(&uma_rwlock
);
1602 } else if (keg
== NULL
) {
1603 if ((keg
= uma_kcreate(zone
, arg
->size
, arg
->uminit
, arg
->fini
,
1604 arg
->align
, arg
->flags
)) == NULL
)
1607 struct uma_kctor_args karg
;
1610 /* We should only be here from uma_startup() */
1611 karg
.size
= arg
->size
;
1612 karg
.uminit
= arg
->uminit
;
1613 karg
.fini
= arg
->fini
;
1614 karg
.align
= arg
->align
;
1615 karg
.flags
= arg
->flags
;
1617 error
= keg_ctor(arg
->keg
, sizeof(struct uma_keg
), &karg
,
1624 * Link in the first keg.
1626 zone
->uz_klink
.kl_keg
= keg
;
1627 LIST_INSERT_HEAD(&zone
->uz_kegs
, &zone
->uz_klink
, kl_link
);
1628 zone
->uz_lockptr
= &keg
->uk_lock
;
1629 zone
->uz_size
= keg
->uk_size
;
1630 zone
->uz_flags
|= (keg
->uk_flags
&
1631 (UMA_ZONE_INHERIT
| UMA_ZFLAG_INHERIT
));
1634 * Some internal zones don't have room allocated for the per cpu
1635 * caches. If we're internal, bail out here.
1637 if (keg
->uk_flags
& UMA_ZFLAG_INTERNAL
) {
1638 KASSERT((zone
->uz_flags
& UMA_ZONE_SECONDARY
) == 0,
1639 ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1644 if ((arg
->flags
& UMA_ZONE_MAXBUCKET
) == 0)
1645 zone
->uz_count
= bucket_select(zone
->uz_size
);
1647 zone
->uz_count
= BUCKET_MAX
;
1648 zone
->uz_count_min
= zone
->uz_count
;
1654 * Keg header dtor. This frees all data, destroys locks, frees the hash
1655 * table and removes the keg from the global list.
1657 * Arguments/Returns follow uma_dtor specifications
1661 keg_dtor(void *arg
, int size
, void *udata
)
1665 keg
= (uma_keg_t
)arg
;
1667 if (keg
->uk_free
!= 0) {
1668 printf("Freed UMA keg (%s) was not empty (%d items). "
1669 " Lost %d pages of memory.\n",
1670 keg
->uk_name
? keg
->uk_name
: "",
1671 keg
->uk_free
, keg
->uk_pages
);
1675 hash_free(&keg
->uk_hash
);
1683 * Arguments/Returns follow uma_dtor specifications
1687 zone_dtor(void *arg
, int size
, void *udata
)
1693 zone
= (uma_zone_t
)arg
;
1694 keg
= zone_first_keg(zone
);
1696 if (!(zone
->uz_flags
& UMA_ZFLAG_INTERNAL
))
1699 rw_wlock(&uma_rwlock
);
1700 LIST_REMOVE(zone
, uz_link
);
1701 rw_wunlock(&uma_rwlock
);
1703 * XXX there are some races here where
1704 * the zone can be drained but zone lock
1705 * released and then refilled before we
1706 * remove it... we dont care for now
1708 zone_drain_wait(zone
, M_WAITOK
);
1710 * Unlink all of our kegs.
1712 while ((klink
= LIST_FIRST(&zone
->uz_kegs
)) != NULL
) {
1713 klink
->kl_keg
= NULL
;
1714 LIST_REMOVE(klink
, kl_link
);
1715 if (klink
== &zone
->uz_klink
)
1717 free(klink
, M_TEMP
);
1720 * We only destroy kegs from non secondary zones.
1722 if (keg
!= NULL
&& (zone
->uz_flags
& UMA_ZONE_SECONDARY
) == 0) {
1723 rw_wlock(&uma_rwlock
);
1724 LIST_REMOVE(keg
, uk_link
);
1725 rw_wunlock(&uma_rwlock
);
1726 zone_free_item(kegs
, keg
, NULL
, SKIP_NONE
);
1728 ZONE_LOCK_FINI(zone
);
1732 * Traverses every zone in the system and calls a callback
1735 * zfunc A pointer to a function which accepts a zone
1742 zone_foreach(void (*zfunc
)(uma_zone_t
))
1747 rw_rlock(&uma_rwlock
);
1748 LIST_FOREACH(keg
, &uma_kegs
, uk_link
) {
1749 LIST_FOREACH(zone
, &keg
->uk_zones
, uz_link
)
1752 rw_runlock(&uma_rwlock
);
1755 /* Public functions */
1758 uma_startup(void *bootmem
, int boot_pages
)
1760 struct uma_zctor_args args
;
1765 printf("Creating uma keg headers zone and keg.\n");
1767 rw_init(&uma_rwlock
, "UMA lock");
1769 /* "manually" create the initial zone */
1770 memset(&args
, 0, sizeof(args
));
1771 args
.name
= "UMA Kegs";
1772 args
.size
= sizeof(struct uma_keg
);
1773 args
.ctor
= keg_ctor
;
1774 args
.dtor
= keg_dtor
;
1775 args
.uminit
= zero_init
;
1777 args
.keg
= &masterkeg
;
1778 args
.align
= 32 - 1;
1779 args
.flags
= UMA_ZFLAG_INTERNAL
;
1780 /* The initial zone has no Per cpu queues so it's smaller */
1781 zone_ctor(kegs
, sizeof(struct uma_zone
), &args
, M_WAITOK
);
1784 printf("Filling boot free list.\n");
1786 for (i
= 0; i
< boot_pages
; i
++) {
1787 slab
= (uma_slab_t
)((uint8_t *)bootmem
+ (i
* UMA_SLAB_SIZE
));
1788 slab
->us_data
= (uint8_t *)slab
;
1789 slab
->us_flags
= UMA_SLAB_BOOT
;
1790 LIST_INSERT_HEAD(&uma_boot_pages
, slab
, us_link
);
1792 mtx_init(&uma_boot_pages_mtx
, "UMA boot pages", NULL
, MTX_DEF
);
1795 printf("Creating uma zone headers zone and keg.\n");
1797 args
.name
= "UMA Zones";
1798 args
.size
= sizeof(struct uma_zone
) +
1799 (sizeof(struct uma_cache
) * (mp_maxid
+ 1));
1800 args
.ctor
= zone_ctor
;
1801 args
.dtor
= zone_dtor
;
1802 args
.uminit
= zero_init
;
1805 args
.align
= 32 - 1;
1806 args
.flags
= UMA_ZFLAG_INTERNAL
;
1807 /* The initial zone has no Per cpu queues so it's smaller */
1808 zone_ctor(zones
, sizeof(struct uma_zone
), &args
, M_WAITOK
);
1811 printf("Creating slab and hash zones.\n");
1814 /* Now make a zone for slab headers */
1815 slabzone
= uma_zcreate("UMA Slabs",
1816 sizeof(struct uma_slab
),
1817 NULL
, NULL
, NULL
, NULL
,
1818 UMA_ALIGN_PTR
, UMA_ZFLAG_INTERNAL
);
1820 hashzone
= uma_zcreate("UMA Hash",
1821 sizeof(struct slabhead
*) * UMA_HASH_SIZE_INIT
,
1822 NULL
, NULL
, NULL
, NULL
,
1823 UMA_ALIGN_PTR
, UMA_ZFLAG_INTERNAL
);
1827 booted
= UMA_STARTUP
;
1830 printf("UMA startup complete.\n");
1838 booted
= UMA_STARTUP2
;
1840 sx_init(&uma_drain_lock
, "umadrain");
1842 printf("UMA startup2 complete.\n");
1847 * Initialize our callout handle
1855 printf("Starting callout.\n");
1857 callout_init(&uma_callout
, 1);
1858 callout_reset(&uma_callout
, UMA_TIMEOUT
* hz
, uma_timeout
, NULL
);
1860 printf("UMA startup3 complete.\n");
1865 uma_kcreate(uma_zone_t zone
, size_t size
, uma_init uminit
, uma_fini fini
,
1866 int align
, uint32_t flags
)
1868 struct uma_kctor_args args
;
1871 args
.uminit
= uminit
;
1873 args
.align
= (align
== UMA_ALIGN_CACHE
) ? uma_align_cache
: align
;
1876 return (zone_alloc_item(kegs
, &args
, M_WAITOK
));
1881 uma_set_align(int align
)
1884 if (align
!= UMA_ALIGN_CACHE
)
1885 uma_align_cache
= align
;
1890 uma_zcreate(const char *name
, size_t size
, uma_ctor ctor
, uma_dtor dtor
,
1891 uma_init uminit
, uma_fini fini
, int align
, uint32_t flags
)
1894 struct uma_zctor_args args
;
1898 /* This stuff is essential for the zone ctor */
1899 memset(&args
, 0, sizeof(args
));
1904 args
.uminit
= uminit
;
1908 * If a zone is being created with an empty constructor and
1909 * destructor, pass UMA constructor/destructor which checks for
1910 * memory use after free.
1912 if ((!(flags
& (UMA_ZONE_ZINIT
| UMA_ZONE_NOFREE
))) &&
1913 ctor
== NULL
&& dtor
== NULL
&& uminit
== NULL
&& fini
== NULL
) {
1914 args
.ctor
= trash_ctor
;
1915 args
.dtor
= trash_dtor
;
1916 args
.uminit
= trash_init
;
1917 args
.fini
= trash_fini
;
1924 if (booted
< UMA_STARTUP2
) {
1927 sx_slock(&uma_drain_lock
);
1930 res
= zone_alloc_item(zones
, &args
, M_WAITOK
);
1932 sx_sunlock(&uma_drain_lock
);
1938 uma_zsecond_create(char *name
, uma_ctor ctor
, uma_dtor dtor
,
1939 uma_init zinit
, uma_fini zfini
, uma_zone_t master
)
1941 struct uma_zctor_args args
;
1946 keg
= zone_first_keg(master
);
1947 memset(&args
, 0, sizeof(args
));
1949 args
.size
= keg
->uk_size
;
1952 args
.uminit
= zinit
;
1954 args
.align
= keg
->uk_align
;
1955 args
.flags
= keg
->uk_flags
| UMA_ZONE_SECONDARY
;
1958 if (booted
< UMA_STARTUP2
) {
1961 sx_slock(&uma_drain_lock
);
1964 /* XXX Attaches only one keg of potentially many. */
1965 res
= zone_alloc_item(zones
, &args
, M_WAITOK
);
1967 sx_sunlock(&uma_drain_lock
);
1973 uma_zcache_create(char *name
, int size
, uma_ctor ctor
, uma_dtor dtor
,
1974 uma_init zinit
, uma_fini zfini
, uma_import zimport
,
1975 uma_release zrelease
, void *arg
, int flags
)
1977 struct uma_zctor_args args
;
1979 memset(&args
, 0, sizeof(args
));
1984 args
.uminit
= zinit
;
1986 args
.import
= zimport
;
1987 args
.release
= zrelease
;
1992 return (zone_alloc_item(zones
, &args
, M_WAITOK
));
1996 zone_lock_pair(uma_zone_t a
, uma_zone_t b
)
2000 mtx_lock_flags(b
->uz_lockptr
, MTX_DUPOK
);
2003 mtx_lock_flags(a
->uz_lockptr
, MTX_DUPOK
);
2008 zone_unlock_pair(uma_zone_t a
, uma_zone_t b
)
2016 uma_zsecond_add(uma_zone_t zone
, uma_zone_t master
)
2023 klink
= malloc(sizeof(*klink
), M_TEMP
, M_WAITOK
| M_ZERO
);
2025 zone_lock_pair(zone
, master
);
2027 * zone must use vtoslab() to resolve objects and must already be
2030 if ((zone
->uz_flags
& (UMA_ZONE_VTOSLAB
| UMA_ZONE_SECONDARY
))
2031 != (UMA_ZONE_VTOSLAB
| UMA_ZONE_SECONDARY
)) {
2036 * The new master must also use vtoslab().
2038 if ((zone
->uz_flags
& UMA_ZONE_VTOSLAB
) != UMA_ZONE_VTOSLAB
) {
2044 * The underlying object must be the same size. rsize
2047 if (master
->uz_size
!= zone
->uz_size
) {
2052 * Put it at the end of the list.
2054 klink
->kl_keg
= zone_first_keg(master
);
2055 LIST_FOREACH(kl
, &zone
->uz_kegs
, kl_link
) {
2056 if (LIST_NEXT(kl
, kl_link
) == NULL
) {
2057 LIST_INSERT_AFTER(kl
, klink
, kl_link
);
2062 zone
->uz_flags
|= UMA_ZFLAG_MULTI
;
2063 zone
->uz_slab
= zone_fetch_slab_multi
;
2066 zone_unlock_pair(zone
, master
);
2068 free(klink
, M_TEMP
);
2076 uma_zdestroy(uma_zone_t zone
)
2079 sx_slock(&uma_drain_lock
);
2080 zone_free_item(zones
, zone
, NULL
, SKIP_NONE
);
2081 sx_sunlock(&uma_drain_lock
);
2086 uma_zalloc_arg(uma_zone_t zone
, void *udata
, int flags
)
2090 uma_bucket_t bucket
;
2094 /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
2095 random_harvest_fast_uma(&zone
, sizeof(zone
), 1, RANDOM_UMA
);
2097 /* This is the fast path allocation */
2098 #ifdef UMA_DEBUG_ALLOC_1
2099 printf("Allocating one item from %s(%p)\n", zone
->uz_name
, zone
);
2101 CTR3(KTR_UMA
, "uma_zalloc_arg thread %x zone %s flags %d", curthread
,
2102 zone
->uz_name
, flags
);
2104 if (flags
& M_WAITOK
) {
2105 WITNESS_WARN(WARN_GIANTOK
| WARN_SLEEPOK
, NULL
,
2106 "uma_zalloc_arg: zone \"%s\"", zone
->uz_name
);
2108 KASSERT(curthread
->td_critnest
== 0 || SCHEDULER_STOPPED(),
2109 ("uma_zalloc_arg: called with spinlock or critical section held"));
2111 #ifdef DEBUG_MEMGUARD
2112 if (memguard_cmp_zone(zone
)) {
2113 item
= memguard_alloc(zone
->uz_size
, flags
);
2115 if (zone
->uz_init
!= NULL
&&
2116 zone
->uz_init(item
, zone
->uz_size
, flags
) != 0)
2118 if (zone
->uz_ctor
!= NULL
&&
2119 zone
->uz_ctor(item
, zone
->uz_size
, udata
,
2121 zone
->uz_fini(item
, zone
->uz_size
);
2126 /* This is unfortunate but should not be fatal. */
2130 * If possible, allocate from the per-CPU cache. There are two
2131 * requirements for safe access to the per-CPU cache: (1) the thread
2132 * accessing the cache must not be preempted or yield during access,
2133 * and (2) the thread must not migrate CPUs without switching which
2134 * cache it accesses. We rely on a critical section to prevent
2135 * preemption and migration. We release the critical section in
2136 * order to acquire the zone mutex if we are unable to allocate from
2137 * the current cache; when we re-acquire the critical section, we
2138 * must detect and handle migration if it has occurred.
2142 cache
= &zone
->uz_cpu
[cpu
];
2145 bucket
= cache
->uc_allocbucket
;
2146 if (bucket
!= NULL
&& bucket
->ub_cnt
> 0) {
2148 item
= bucket
->ub_bucket
[bucket
->ub_cnt
];
2150 bucket
->ub_bucket
[bucket
->ub_cnt
] = NULL
;
2152 KASSERT(item
!= NULL
, ("uma_zalloc: Bucket pointer mangled."));
2155 if (zone
->uz_ctor
!= NULL
&&
2156 zone
->uz_ctor(item
, zone
->uz_size
, udata
, flags
) != 0) {
2157 atomic_add_long(&zone
->uz_fails
, 1);
2158 zone_free_item(zone
, item
, udata
, SKIP_DTOR
);
2162 uma_dbg_alloc(zone
, NULL
, item
);
2165 uma_zero_item(item
, zone
);
2170 * We have run out of items in our alloc bucket.
2171 * See if we can switch with our free bucket.
2173 bucket
= cache
->uc_freebucket
;
2174 if (bucket
!= NULL
&& bucket
->ub_cnt
> 0) {
2175 #ifdef UMA_DEBUG_ALLOC
2176 printf("uma_zalloc: Swapping empty with alloc.\n");
2178 cache
->uc_freebucket
= cache
->uc_allocbucket
;
2179 cache
->uc_allocbucket
= bucket
;
2184 * Discard any empty allocation bucket while we hold no locks.
2186 bucket
= cache
->uc_allocbucket
;
2187 cache
->uc_allocbucket
= NULL
;
2190 bucket_free(zone
, bucket
, udata
);
2192 /* Short-circuit for zones without buckets and low memory. */
2193 if (zone
->uz_count
== 0 || bucketdisable
)
2197 * Attempt to retrieve the item from the per-CPU cache has failed, so
2198 * we must go back to the zone. This requires the zone lock, so we
2199 * must drop the critical section, then re-acquire it when we go back
2200 * to the cache. Since the critical section is released, we may be
2201 * preempted or migrate. As such, make sure not to maintain any
2202 * thread-local state specific to the cache from prior to releasing
2203 * the critical section.
2206 if (ZONE_TRYLOCK(zone
) == 0) {
2207 /* Record contention to size the buckets. */
2213 cache
= &zone
->uz_cpu
[cpu
];
2216 * Since we have locked the zone we may as well send back our stats.
2218 atomic_add_long(&zone
->uz_allocs
, cache
->uc_allocs
);
2219 atomic_add_long(&zone
->uz_frees
, cache
->uc_frees
);
2220 cache
->uc_allocs
= 0;
2221 cache
->uc_frees
= 0;
2223 /* See if we lost the race to fill the cache. */
2224 if (cache
->uc_allocbucket
!= NULL
) {
2230 * Check the zone's cache of buckets.
2232 if ((bucket
= LIST_FIRST(&zone
->uz_buckets
)) != NULL
) {
2233 KASSERT(bucket
->ub_cnt
!= 0,
2234 ("uma_zalloc_arg: Returning an empty bucket."));
2236 LIST_REMOVE(bucket
, ub_link
);
2237 cache
->uc_allocbucket
= bucket
;
2241 /* We are no longer associated with this CPU. */
2245 * We bump the uz count when the cache size is insufficient to
2246 * handle the working set.
2248 if (lockfail
&& zone
->uz_count
< BUCKET_MAX
)
2253 * Now lets just fill a bucket and put it on the free list. If that
2254 * works we'll restart the allocation from the beginning and it
2255 * will use the just filled bucket.
2257 bucket
= zone_alloc_bucket(zone
, udata
, flags
);
2258 if (bucket
!= NULL
) {
2262 cache
= &zone
->uz_cpu
[cpu
];
2264 * See if we lost the race or were migrated. Cache the
2265 * initialized bucket to make this less likely or claim
2266 * the memory directly.
2268 if (cache
->uc_allocbucket
== NULL
)
2269 cache
->uc_allocbucket
= bucket
;
2271 LIST_INSERT_HEAD(&zone
->uz_buckets
, bucket
, ub_link
);
2277 * We may not be able to get a bucket so return an actual item.
2280 printf("uma_zalloc_arg: Bucketzone returned NULL\n");
2284 item
= zone_alloc_item(zone
, udata
, flags
);
2290 keg_fetch_slab(uma_keg_t keg
, uma_zone_t zone
, int flags
)
2295 mtx_assert(&keg
->uk_lock
, MA_OWNED
);
2298 if ((flags
& M_USE_RESERVE
) == 0)
2299 reserve
= keg
->uk_reserve
;
2303 * Find a slab with some space. Prefer slabs that are partially
2304 * used over those that are totally full. This helps to reduce
2307 if (keg
->uk_free
> reserve
) {
2308 if (!LIST_EMPTY(&keg
->uk_part_slab
)) {
2309 slab
= LIST_FIRST(&keg
->uk_part_slab
);
2311 slab
= LIST_FIRST(&keg
->uk_free_slab
);
2312 LIST_REMOVE(slab
, us_link
);
2313 LIST_INSERT_HEAD(&keg
->uk_part_slab
, slab
,
2316 MPASS(slab
->us_keg
== keg
);
2321 * M_NOVM means don't ask at all!
2326 if (keg
->uk_maxpages
&& keg
->uk_pages
>= keg
->uk_maxpages
) {
2327 keg
->uk_flags
|= UMA_ZFLAG_FULL
;
2329 * If this is not a multi-zone, set the FULL bit.
2330 * Otherwise slab_multi() takes care of it.
2332 if ((zone
->uz_flags
& UMA_ZFLAG_MULTI
) == 0) {
2333 zone
->uz_flags
|= UMA_ZFLAG_FULL
;
2334 zone_log_warning(zone
);
2335 zone_maxaction(zone
);
2337 if (flags
& M_NOWAIT
)
2340 msleep(keg
, &keg
->uk_lock
, PVM
, "keglimit", 0);
2343 slab
= keg_alloc_slab(keg
, zone
, flags
);
2345 * If we got a slab here it's safe to mark it partially used
2346 * and return. We assume that the caller is going to remove
2347 * at least one item.
2350 MPASS(slab
->us_keg
== keg
);
2351 LIST_INSERT_HEAD(&keg
->uk_part_slab
, slab
, us_link
);
2355 * We might not have been able to get a slab but another cpu
2356 * could have while we were unlocked. Check again before we
2365 zone_fetch_slab(uma_zone_t zone
, uma_keg_t keg
, int flags
)
2370 keg
= zone_first_keg(zone
);
2375 slab
= keg_fetch_slab(keg
, zone
, flags
);
2378 if (flags
& (M_NOWAIT
| M_NOVM
))
2386 * uma_zone_fetch_slab_multi: Fetches a slab from one available keg. Returns
2387 * with the keg locked. On NULL no lock is held.
2389 * The last pointer is used to seed the search. It is not required.
2392 zone_fetch_slab_multi(uma_zone_t zone
, uma_keg_t last
, int rflags
)
2402 * Don't wait on the first pass. This will skip limit tests
2403 * as well. We don't want to block if we can find a provider
2406 flags
= (rflags
& ~M_WAITOK
) | M_NOWAIT
;
2408 * Use the last slab allocated as a hint for where to start
2412 slab
= keg_fetch_slab(last
, zone
, flags
);
2418 * Loop until we have a slab incase of transient failures
2419 * while M_WAITOK is specified. I'm not sure this is 100%
2420 * required but we've done it for so long now.
2426 * Search the available kegs for slabs. Be careful to hold the
2427 * correct lock while calling into the keg layer.
2429 LIST_FOREACH(klink
, &zone
->uz_kegs
, kl_link
) {
2430 keg
= klink
->kl_keg
;
2432 if ((keg
->uk_flags
& UMA_ZFLAG_FULL
) == 0) {
2433 slab
= keg_fetch_slab(keg
, zone
, flags
);
2437 if (keg
->uk_flags
& UMA_ZFLAG_FULL
)
2443 if (rflags
& (M_NOWAIT
| M_NOVM
))
2447 * All kegs are full. XXX We can't atomically check all kegs
2448 * and sleep so just sleep for a short period and retry.
2450 if (full
&& !empty
) {
2452 zone
->uz_flags
|= UMA_ZFLAG_FULL
;
2454 zone_log_warning(zone
);
2455 zone_maxaction(zone
);
2456 msleep(zone
, zone
->uz_lockptr
, PVM
,
2457 "zonelimit", hz
/100);
2458 zone
->uz_flags
&= ~UMA_ZFLAG_FULL
;
2467 slab_alloc_item(uma_keg_t keg
, uma_slab_t slab
)
2472 MPASS(keg
== slab
->us_keg
);
2473 mtx_assert(&keg
->uk_lock
, MA_OWNED
);
2475 freei
= BIT_FFS(SLAB_SETSIZE
, &slab
->us_free
) - 1;
2476 BIT_CLR(SLAB_SETSIZE
, freei
, &slab
->us_free
);
2477 item
= slab
->us_data
+ (keg
->uk_rsize
* freei
);
2478 slab
->us_freecount
--;
2481 /* Move this slab to the full list */
2482 if (slab
->us_freecount
== 0) {
2483 LIST_REMOVE(slab
, us_link
);
2484 LIST_INSERT_HEAD(&keg
->uk_full_slab
, slab
, us_link
);
2491 zone_import(uma_zone_t zone
, void **bucket
, int max
, int flags
)
2499 /* Try to keep the buckets totally full */
2500 for (i
= 0; i
< max
; ) {
2501 if ((slab
= zone
->uz_slab(zone
, keg
, flags
)) == NULL
)
2504 while (slab
->us_freecount
&& i
< max
) {
2505 bucket
[i
++] = slab_alloc_item(keg
, slab
);
2506 if (keg
->uk_free
<= keg
->uk_reserve
)
2509 /* Don't grab more than one slab at a time. */
2520 zone_alloc_bucket(uma_zone_t zone
, void *udata
, int flags
)
2522 uma_bucket_t bucket
;
2525 /* Don't wait for buckets, preserve caller's NOVM setting. */
2526 bucket
= bucket_alloc(zone
, udata
, M_NOWAIT
| (flags
& M_NOVM
));
2530 max
= MIN(bucket
->ub_entries
, zone
->uz_count
);
2531 bucket
->ub_cnt
= zone
->uz_import(zone
->uz_arg
, bucket
->ub_bucket
,
2535 * Initialize the memory if necessary.
2537 if (bucket
->ub_cnt
!= 0 && zone
->uz_init
!= NULL
) {
2540 for (i
= 0; i
< bucket
->ub_cnt
; i
++)
2541 if (zone
->uz_init(bucket
->ub_bucket
[i
], zone
->uz_size
,
2545 * If we couldn't initialize the whole bucket, put the
2546 * rest back onto the freelist.
2548 if (i
!= bucket
->ub_cnt
) {
2549 zone
->uz_release(zone
->uz_arg
, &bucket
->ub_bucket
[i
],
2550 bucket
->ub_cnt
- i
);
2552 bzero(&bucket
->ub_bucket
[i
],
2553 sizeof(void *) * (bucket
->ub_cnt
- i
));
2559 if (bucket
->ub_cnt
== 0) {
2560 bucket_free(zone
, bucket
, udata
);
2561 atomic_add_long(&zone
->uz_fails
, 1);
2569 * Allocates a single item from a zone.
2572 * zone The zone to alloc for.
2573 * udata The data to be passed to the constructor.
2574 * flags M_WAITOK, M_NOWAIT, M_ZERO.
2577 * NULL if there is no memory and M_NOWAIT is set
2578 * An item if successful
2582 zone_alloc_item(uma_zone_t zone
, void *udata
, int flags
)
2588 #ifdef UMA_DEBUG_ALLOC
2589 printf("INTERNAL: Allocating one item from %s(%p)\n", zone
->uz_name
, zone
);
2591 if (zone
->uz_import(zone
->uz_arg
, &item
, 1, flags
) != 1)
2593 atomic_add_long(&zone
->uz_allocs
, 1);
2596 * We have to call both the zone's init (not the keg's init)
2597 * and the zone's ctor. This is because the item is going from
2598 * a keg slab directly to the user, and the user is expecting it
2599 * to be both zone-init'd as well as zone-ctor'd.
2601 if (zone
->uz_init
!= NULL
) {
2602 if (zone
->uz_init(item
, zone
->uz_size
, flags
) != 0) {
2603 zone_free_item(zone
, item
, udata
, SKIP_FINI
);
2607 if (zone
->uz_ctor
!= NULL
) {
2608 if (zone
->uz_ctor(item
, zone
->uz_size
, udata
, flags
) != 0) {
2609 zone_free_item(zone
, item
, udata
, SKIP_DTOR
);
2614 uma_dbg_alloc(zone
, NULL
, item
);
2617 uma_zero_item(item
, zone
);
2622 atomic_add_long(&zone
->uz_fails
, 1);
2628 uma_zfree_arg(uma_zone_t zone
, void *item
, void *udata
)
2631 uma_bucket_t bucket
;
2635 /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
2636 random_harvest_fast_uma(&zone
, sizeof(zone
), 1, RANDOM_UMA
);
2638 #ifdef UMA_DEBUG_ALLOC_1
2639 printf("Freeing item %p to %s(%p)\n", item
, zone
->uz_name
, zone
);
2641 CTR2(KTR_UMA
, "uma_zfree_arg thread %x zone %s", curthread
,
2644 KASSERT(curthread
->td_critnest
== 0 || SCHEDULER_STOPPED(),
2645 ("uma_zfree_arg: called with spinlock or critical section held"));
2647 /* uma_zfree(..., NULL) does nothing, to match free(9). */
2650 #ifdef DEBUG_MEMGUARD
2651 if (is_memguard_addr(item
)) {
2652 if (zone
->uz_dtor
!= NULL
)
2653 zone
->uz_dtor(item
, zone
->uz_size
, udata
);
2654 if (zone
->uz_fini
!= NULL
)
2655 zone
->uz_fini(item
, zone
->uz_size
);
2656 memguard_free(item
);
2661 if (zone
->uz_flags
& UMA_ZONE_MALLOC
)
2662 uma_dbg_free(zone
, udata
, item
);
2664 uma_dbg_free(zone
, NULL
, item
);
2666 if (zone
->uz_dtor
!= NULL
)
2667 zone
->uz_dtor(item
, zone
->uz_size
, udata
);
2670 * The race here is acceptable. If we miss it we'll just have to wait
2671 * a little longer for the limits to be reset.
2673 if (zone
->uz_flags
& UMA_ZFLAG_FULL
)
2677 * If possible, free to the per-CPU cache. There are two
2678 * requirements for safe access to the per-CPU cache: (1) the thread
2679 * accessing the cache must not be preempted or yield during access,
2680 * and (2) the thread must not migrate CPUs without switching which
2681 * cache it accesses. We rely on a critical section to prevent
2682 * preemption and migration. We release the critical section in
2683 * order to acquire the zone mutex if we are unable to free to the
2684 * current cache; when we re-acquire the critical section, we must
2685 * detect and handle migration if it has occurred.
2690 cache
= &zone
->uz_cpu
[cpu
];
2694 * Try to free into the allocbucket first to give LIFO ordering
2695 * for cache-hot datastructures. Spill over into the freebucket
2696 * if necessary. Alloc will swap them if one runs dry.
2698 bucket
= cache
->uc_allocbucket
;
2699 if (bucket
== NULL
|| bucket
->ub_cnt
>= bucket
->ub_entries
)
2700 bucket
= cache
->uc_freebucket
;
2701 if (bucket
!= NULL
&& bucket
->ub_cnt
< bucket
->ub_entries
) {
2702 KASSERT(bucket
->ub_bucket
[bucket
->ub_cnt
] == NULL
,
2703 ("uma_zfree: Freeing to non free bucket index."));
2704 bucket
->ub_bucket
[bucket
->ub_cnt
] = item
;
2712 * We must go back the zone, which requires acquiring the zone lock,
2713 * which in turn means we must release and re-acquire the critical
2714 * section. Since the critical section is released, we may be
2715 * preempted or migrate. As such, make sure not to maintain any
2716 * thread-local state specific to the cache from prior to releasing
2717 * the critical section.
2720 if (zone
->uz_count
== 0 || bucketdisable
)
2724 if (ZONE_TRYLOCK(zone
) == 0) {
2725 /* Record contention to size the buckets. */
2731 cache
= &zone
->uz_cpu
[cpu
];
2734 * Since we have locked the zone we may as well send back our stats.
2736 atomic_add_long(&zone
->uz_allocs
, cache
->uc_allocs
);
2737 atomic_add_long(&zone
->uz_frees
, cache
->uc_frees
);
2738 cache
->uc_allocs
= 0;
2739 cache
->uc_frees
= 0;
2741 bucket
= cache
->uc_freebucket
;
2742 if (bucket
!= NULL
&& bucket
->ub_cnt
< bucket
->ub_entries
) {
2746 cache
->uc_freebucket
= NULL
;
2747 /* We are no longer associated with this CPU. */
2750 /* Can we throw this on the zone full list? */
2751 if (bucket
!= NULL
) {
2752 #ifdef UMA_DEBUG_ALLOC
2753 printf("uma_zfree: Putting old bucket on the free list.\n");
2755 /* ub_cnt is pointing to the last free item */
2756 KASSERT(bucket
->ub_cnt
!= 0,
2757 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2758 LIST_INSERT_HEAD(&zone
->uz_buckets
, bucket
, ub_link
);
2762 * We bump the uz count when the cache size is insufficient to
2763 * handle the working set.
2765 if (lockfail
&& zone
->uz_count
< BUCKET_MAX
)
2769 #ifdef UMA_DEBUG_ALLOC
2770 printf("uma_zfree: Allocating new free bucket.\n");
2772 bucket
= bucket_alloc(zone
, udata
, M_NOWAIT
);
2776 cache
= &zone
->uz_cpu
[cpu
];
2777 if (cache
->uc_freebucket
== NULL
) {
2778 cache
->uc_freebucket
= bucket
;
2782 * We lost the race, start over. We have to drop our
2783 * critical section to free the bucket.
2786 bucket_free(zone
, bucket
, udata
);
2791 * If nothing else caught this, we'll just do an internal free.
2794 zone_free_item(zone
, item
, udata
, SKIP_DTOR
);
2800 slab_free_item(uma_keg_t keg
, uma_slab_t slab
, void *item
)
2804 mtx_assert(&keg
->uk_lock
, MA_OWNED
);
2805 MPASS(keg
== slab
->us_keg
);
2807 /* Do we need to remove from any lists? */
2808 if (slab
->us_freecount
+1 == keg
->uk_ipers
) {
2809 LIST_REMOVE(slab
, us_link
);
2810 LIST_INSERT_HEAD(&keg
->uk_free_slab
, slab
, us_link
);
2811 } else if (slab
->us_freecount
== 0) {
2812 LIST_REMOVE(slab
, us_link
);
2813 LIST_INSERT_HEAD(&keg
->uk_part_slab
, slab
, us_link
);
2816 /* Slab management. */
2817 freei
= ((uintptr_t)item
- (uintptr_t)slab
->us_data
) / keg
->uk_rsize
;
2818 BIT_SET(SLAB_SETSIZE
, freei
, &slab
->us_free
);
2819 slab
->us_freecount
++;
2821 /* Keg statistics. */
2826 zone_release(uma_zone_t zone
, void **bucket
, int cnt
)
2836 keg
= zone_first_keg(zone
);
2838 for (i
= 0; i
< cnt
; i
++) {
2840 if (!(zone
->uz_flags
& UMA_ZONE_VTOSLAB
)) {
2841 mem
= (uint8_t *)((uintptr_t)item
& (~UMA_SLAB_MASK
));
2842 if (zone
->uz_flags
& UMA_ZONE_HASH
) {
2843 slab
= hash_sfind(&keg
->uk_hash
, mem
);
2845 mem
+= keg
->uk_pgoff
;
2846 slab
= (uma_slab_t
)mem
;
2849 slab
= vtoslab((vm_offset_t
)item
);
2850 if (slab
->us_keg
!= keg
) {
2856 slab_free_item(keg
, slab
, item
);
2857 if (keg
->uk_flags
& UMA_ZFLAG_FULL
) {
2858 if (keg
->uk_pages
< keg
->uk_maxpages
) {
2859 keg
->uk_flags
&= ~UMA_ZFLAG_FULL
;
2864 * We can handle one more allocation. Since we're
2865 * clearing ZFLAG_FULL, wake up all procs blocked
2866 * on pages. This should be uncommon, so keeping this
2867 * simple for now (rather than adding count of blocked
2876 zone
->uz_flags
&= ~UMA_ZFLAG_FULL
;
2884 * Frees a single item to any zone.
2887 * zone The zone to free to
2888 * item The item we're freeing
2889 * udata User supplied data for the dtor
2890 * skip Skip dtors and finis
2893 zone_free_item(uma_zone_t zone
, void *item
, void *udata
, enum zfreeskip skip
)
2897 if (skip
== SKIP_NONE
) {
2898 if (zone
->uz_flags
& UMA_ZONE_MALLOC
)
2899 uma_dbg_free(zone
, udata
, item
);
2901 uma_dbg_free(zone
, NULL
, item
);
2904 if (skip
< SKIP_DTOR
&& zone
->uz_dtor
)
2905 zone
->uz_dtor(item
, zone
->uz_size
, udata
);
2907 if (skip
< SKIP_FINI
&& zone
->uz_fini
)
2908 zone
->uz_fini(item
, zone
->uz_size
);
2910 atomic_add_long(&zone
->uz_frees
, 1);
2911 zone
->uz_release(zone
->uz_arg
, &item
, 1);
2916 uma_zone_set_max(uma_zone_t zone
, int nitems
)
2920 keg
= zone_first_keg(zone
);
2924 keg
->uk_maxpages
= (nitems
/ keg
->uk_ipers
) * keg
->uk_ppera
;
2925 if (keg
->uk_maxpages
* keg
->uk_ipers
< nitems
)
2926 keg
->uk_maxpages
+= keg
->uk_ppera
;
2927 nitems
= keg
->uk_maxpages
* keg
->uk_ipers
;
2935 uma_zone_get_max(uma_zone_t zone
)
2940 keg
= zone_first_keg(zone
);
2944 nitems
= keg
->uk_maxpages
* keg
->uk_ipers
;
2952 uma_zone_set_warning(uma_zone_t zone
, const char *warning
)
2956 zone
->uz_warning
= warning
;
2962 uma_zone_set_maxaction(uma_zone_t zone
, uma_maxaction_t maxaction
)
2966 TASK_INIT(&zone
->uz_maxaction
, 0, (task_fn_t
*)maxaction
, zone
);
2972 uma_zone_get_cur(uma_zone_t zone
)
2978 nitems
= zone
->uz_allocs
- zone
->uz_frees
;
2981 * See the comment in sysctl_vm_zone_stats() regarding the
2982 * safety of accessing the per-cpu caches. With the zone lock
2983 * held, it is safe, but can potentially result in stale data.
2985 nitems
+= zone
->uz_cpu
[i
].uc_allocs
-
2986 zone
->uz_cpu
[i
].uc_frees
;
2990 return (nitems
< 0 ? 0 : nitems
);
2995 uma_zone_set_init(uma_zone_t zone
, uma_init uminit
)
2999 keg
= zone_first_keg(zone
);
3000 KASSERT(keg
!= NULL
, ("uma_zone_set_init: Invalid zone type"));
3002 KASSERT(keg
->uk_pages
== 0,
3003 ("uma_zone_set_init on non-empty keg"));
3004 keg
->uk_init
= uminit
;
3010 uma_zone_set_fini(uma_zone_t zone
, uma_fini fini
)
3014 keg
= zone_first_keg(zone
);
3015 KASSERT(keg
!= NULL
, ("uma_zone_set_fini: Invalid zone type"));
3017 KASSERT(keg
->uk_pages
== 0,
3018 ("uma_zone_set_fini on non-empty keg"));
3019 keg
->uk_fini
= fini
;
3025 uma_zone_set_zinit(uma_zone_t zone
, uma_init zinit
)
3029 KASSERT(zone_first_keg(zone
)->uk_pages
== 0,
3030 ("uma_zone_set_zinit on non-empty keg"));
3031 zone
->uz_init
= zinit
;
3037 uma_zone_set_zfini(uma_zone_t zone
, uma_fini zfini
)
3041 KASSERT(zone_first_keg(zone
)->uk_pages
== 0,
3042 ("uma_zone_set_zfini on non-empty keg"));
3043 zone
->uz_fini
= zfini
;
3048 /* XXX uk_freef is not actually used with the zone locked */
3050 uma_zone_set_freef(uma_zone_t zone
, uma_free freef
)
3054 keg
= zone_first_keg(zone
);
3055 KASSERT(keg
!= NULL
, ("uma_zone_set_freef: Invalid zone type"));
3057 keg
->uk_freef
= freef
;
3062 /* XXX uk_allocf is not actually used with the zone locked */
3064 uma_zone_set_allocf(uma_zone_t zone
, uma_alloc allocf
)
3068 keg
= zone_first_keg(zone
);
3070 keg
->uk_allocf
= allocf
;
3076 uma_zone_reserve(uma_zone_t zone
, int items
)
3080 keg
= zone_first_keg(zone
);
3084 keg
->uk_reserve
= items
;
3092 uma_zone_reserve_kva(uma_zone_t zone
, int count
)
3098 keg
= zone_first_keg(zone
);
3101 pages
= count
/ keg
->uk_ipers
;
3103 if (pages
* keg
->uk_ipers
< count
)
3106 #ifdef UMA_MD_SMALL_ALLOC
3107 if (keg
->uk_ppera
> 1) {
3111 kva
= kva_alloc((vm_size_t
)pages
* UMA_SLAB_SIZE
);
3119 keg
->uk_maxpages
= pages
;
3120 #ifdef UMA_MD_SMALL_ALLOC
3121 keg
->uk_allocf
= (keg
->uk_ppera
> 1) ? noobj_alloc
: uma_small_alloc
;
3123 keg
->uk_allocf
= noobj_alloc
;
3125 keg
->uk_flags
|= UMA_ZONE_NOFREE
;
3133 uma_prealloc(uma_zone_t zone
, int items
)
3139 keg
= zone_first_keg(zone
);
3143 slabs
= items
/ keg
->uk_ipers
;
3144 if (slabs
* keg
->uk_ipers
< items
)
3147 slab
= keg_alloc_slab(keg
, zone
, M_WAITOK
);
3150 MPASS(slab
->us_keg
== keg
);
3151 LIST_INSERT_HEAD(&keg
->uk_free_slab
, slab
, us_link
);
3159 uma_reclaim_locked(bool kmem_danger
)
3163 printf("UMA: vm asked us to release pages!\n");
3165 sx_assert(&uma_drain_lock
, SA_XLOCKED
);
3167 zone_foreach(zone_drain
);
3168 if (vm_page_count_min() || kmem_danger
) {
3169 cache_drain_safe(NULL
);
3170 zone_foreach(zone_drain
);
3173 * Some slabs may have been freed but this zone will be visited early
3174 * we visit again so that we can free pages that are empty once other
3175 * zones are drained. We have to do the same for buckets.
3177 zone_drain(slabzone
);
3178 bucket_zone_drain();
3185 sx_xlock(&uma_drain_lock
);
3186 uma_reclaim_locked(false);
3187 sx_xunlock(&uma_drain_lock
);
3190 static int uma_reclaim_needed
;
3193 uma_reclaim_wakeup(void)
3196 uma_reclaim_needed
= 1;
3197 wakeup(&uma_reclaim_needed
);
3201 uma_reclaim_worker(void *arg __unused
)
3204 sx_xlock(&uma_drain_lock
);
3206 sx_sleep(&uma_reclaim_needed
, &uma_drain_lock
, PVM
,
3208 if (uma_reclaim_needed
) {
3209 uma_reclaim_needed
= 0;
3210 uma_reclaim_locked(true);
3217 uma_zone_exhausted(uma_zone_t zone
)
3222 full
= (zone
->uz_flags
& UMA_ZFLAG_FULL
);
3228 uma_zone_exhausted_nolock(uma_zone_t zone
)
3230 return (zone
->uz_flags
& UMA_ZFLAG_FULL
);
3234 uma_large_malloc(vm_size_t size
, int wait
)
3240 slab
= zone_alloc_item(slabzone
, NULL
, wait
);
3243 mem
= page_alloc(NULL
, size
, &flags
, wait
);
3245 vsetslab((vm_offset_t
)mem
, slab
);
3246 slab
->us_data
= mem
;
3247 slab
->us_flags
= flags
| UMA_SLAB_MALLOC
;
3248 slab
->us_size
= size
;
3250 zone_free_item(slabzone
, slab
, NULL
, SKIP_NONE
);
3257 uma_large_free(uma_slab_t slab
)
3260 page_free(slab
->us_data
, slab
->us_size
, slab
->us_flags
);
3261 zone_free_item(slabzone
, slab
, NULL
, SKIP_NONE
);
3265 uma_zero_item(void *item
, uma_zone_t zone
)
3269 if (zone
->uz_flags
& UMA_ZONE_PCPU
) {
3271 bzero(zpcpu_get_cpu(item
, i
), zone
->uz_size
);
3273 bzero(item
, zone
->uz_size
);
3277 uma_print_stats(void)
3279 zone_foreach(uma_print_zone
);
3283 slab_print(uma_slab_t slab
)
3285 printf("slab: keg %p, data %p, freecount %d\n",
3286 slab
->us_keg
, slab
->us_data
, slab
->us_freecount
);
3290 cache_print(uma_cache_t cache
)
3292 printf("alloc: %p(%d), free: %p(%d)\n",
3293 cache
->uc_allocbucket
,
3294 cache
->uc_allocbucket
?cache
->uc_allocbucket
->ub_cnt
:0,
3295 cache
->uc_freebucket
,
3296 cache
->uc_freebucket
?cache
->uc_freebucket
->ub_cnt
:0);
3300 uma_print_keg(uma_keg_t keg
)
3304 printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
3305 "out %d free %d limit %d\n",
3306 keg
->uk_name
, keg
, keg
->uk_size
, keg
->uk_rsize
, keg
->uk_flags
,
3307 keg
->uk_ipers
, keg
->uk_ppera
,
3308 (keg
->uk_ipers
* keg
->uk_pages
) - keg
->uk_free
, keg
->uk_free
,
3309 (keg
->uk_maxpages
/ keg
->uk_ppera
) * keg
->uk_ipers
);
3310 printf("Part slabs:\n");
3311 LIST_FOREACH(slab
, &keg
->uk_part_slab
, us_link
)
3313 printf("Free slabs:\n");
3314 LIST_FOREACH(slab
, &keg
->uk_free_slab
, us_link
)
3316 printf("Full slabs:\n");
3317 LIST_FOREACH(slab
, &keg
->uk_full_slab
, us_link
)
3322 uma_print_zone(uma_zone_t zone
)
3328 printf("zone: %s(%p) size %d flags %#x\n",
3329 zone
->uz_name
, zone
, zone
->uz_size
, zone
->uz_flags
);
3330 LIST_FOREACH(kl
, &zone
->uz_kegs
, kl_link
)
3331 uma_print_keg(kl
->kl_keg
);
3333 cache
= &zone
->uz_cpu
[i
];
3334 printf("CPU %d Cache:\n", i
);
3341 * Generate statistics across both the zone and its per-cpu cache's. Return
3342 * desired statistics if the pointer is non-NULL for that statistic.
3344 * Note: does not update the zone statistics, as it can't safely clear the
3345 * per-CPU cache statistic.
3347 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
3348 * safe from off-CPU; we should modify the caches to track this information
3349 * directly so that we don't have to.
3352 uma_zone_sumstat(uma_zone_t z
, int *cachefreep
, uint64_t *allocsp
,
3353 uint64_t *freesp
, uint64_t *sleepsp
)
3356 uint64_t allocs
, frees
, sleeps
;
3359 allocs
= frees
= sleeps
= 0;
3362 cache
= &z
->uz_cpu
[cpu
];
3363 if (cache
->uc_allocbucket
!= NULL
)
3364 cachefree
+= cache
->uc_allocbucket
->ub_cnt
;
3365 if (cache
->uc_freebucket
!= NULL
)
3366 cachefree
+= cache
->uc_freebucket
->ub_cnt
;
3367 allocs
+= cache
->uc_allocs
;
3368 frees
+= cache
->uc_frees
;
3370 allocs
+= z
->uz_allocs
;
3371 frees
+= z
->uz_frees
;
3372 sleeps
+= z
->uz_sleeps
;
3373 if (cachefreep
!= NULL
)
3374 *cachefreep
= cachefree
;
3375 if (allocsp
!= NULL
)
3379 if (sleepsp
!= NULL
)
3385 sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS
)
3392 rw_rlock(&uma_rwlock
);
3393 LIST_FOREACH(kz
, &uma_kegs
, uk_link
) {
3394 LIST_FOREACH(z
, &kz
->uk_zones
, uz_link
)
3397 rw_runlock(&uma_rwlock
);
3398 return (sysctl_handle_int(oidp
, &count
, 0, req
));
3402 sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS
)
3404 struct uma_stream_header ush
;
3405 struct uma_type_header uth
;
3406 struct uma_percpu_stat ups
;
3407 uma_bucket_t bucket
;
3414 int count
, error
, i
;
3416 error
= sysctl_wire_old_buffer(req
, 0);
3419 sbuf_new_for_sysctl(&sbuf
, NULL
, 128, req
);
3420 sbuf_clear_flags(&sbuf
, SBUF_INCLUDENUL
);
3423 rw_rlock(&uma_rwlock
);
3424 LIST_FOREACH(kz
, &uma_kegs
, uk_link
) {
3425 LIST_FOREACH(z
, &kz
->uk_zones
, uz_link
)
3430 * Insert stream header.
3432 bzero(&ush
, sizeof(ush
));
3433 ush
.ush_version
= UMA_STREAM_VERSION
;
3434 ush
.ush_maxcpus
= (mp_maxid
+ 1);
3435 ush
.ush_count
= count
;
3436 (void)sbuf_bcat(&sbuf
, &ush
, sizeof(ush
));
3438 LIST_FOREACH(kz
, &uma_kegs
, uk_link
) {
3439 LIST_FOREACH(z
, &kz
->uk_zones
, uz_link
) {
3440 bzero(&uth
, sizeof(uth
));
3442 strlcpy(uth
.uth_name
, z
->uz_name
, UTH_MAX_NAME
);
3443 uth
.uth_align
= kz
->uk_align
;
3444 uth
.uth_size
= kz
->uk_size
;
3445 uth
.uth_rsize
= kz
->uk_rsize
;
3446 LIST_FOREACH(kl
, &z
->uz_kegs
, kl_link
) {
3448 uth
.uth_maxpages
+= k
->uk_maxpages
;
3449 uth
.uth_pages
+= k
->uk_pages
;
3450 uth
.uth_keg_free
+= k
->uk_free
;
3451 uth
.uth_limit
= (k
->uk_maxpages
/ k
->uk_ppera
)
3456 * A zone is secondary is it is not the first entry
3457 * on the keg's zone list.
3459 if ((z
->uz_flags
& UMA_ZONE_SECONDARY
) &&
3460 (LIST_FIRST(&kz
->uk_zones
) != z
))
3461 uth
.uth_zone_flags
= UTH_ZONE_SECONDARY
;
3463 LIST_FOREACH(bucket
, &z
->uz_buckets
, ub_link
)
3464 uth
.uth_zone_free
+= bucket
->ub_cnt
;
3465 uth
.uth_allocs
= z
->uz_allocs
;
3466 uth
.uth_frees
= z
->uz_frees
;
3467 uth
.uth_fails
= z
->uz_fails
;
3468 uth
.uth_sleeps
= z
->uz_sleeps
;
3469 (void)sbuf_bcat(&sbuf
, &uth
, sizeof(uth
));
3471 * While it is not normally safe to access the cache
3472 * bucket pointers while not on the CPU that owns the
3473 * cache, we only allow the pointers to be exchanged
3474 * without the zone lock held, not invalidated, so
3475 * accept the possible race associated with bucket
3476 * exchange during monitoring.
3478 for (i
= 0; i
< (mp_maxid
+ 1); i
++) {
3479 bzero(&ups
, sizeof(ups
));
3480 if (kz
->uk_flags
& UMA_ZFLAG_INTERNAL
)
3484 cache
= &z
->uz_cpu
[i
];
3485 if (cache
->uc_allocbucket
!= NULL
)
3486 ups
.ups_cache_free
+=
3487 cache
->uc_allocbucket
->ub_cnt
;
3488 if (cache
->uc_freebucket
!= NULL
)
3489 ups
.ups_cache_free
+=
3490 cache
->uc_freebucket
->ub_cnt
;
3491 ups
.ups_allocs
= cache
->uc_allocs
;
3492 ups
.ups_frees
= cache
->uc_frees
;
3494 (void)sbuf_bcat(&sbuf
, &ups
, sizeof(ups
));
3499 rw_runlock(&uma_rwlock
);
3500 error
= sbuf_finish(&sbuf
);
3506 sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS
)
3508 uma_zone_t zone
= *(uma_zone_t
*)arg1
;
3511 max
= uma_zone_get_max(zone
);
3512 error
= sysctl_handle_int(oidp
, &max
, 0, req
);
3513 if (error
|| !req
->newptr
)
3516 uma_zone_set_max(zone
, max
);
3522 sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS
)
3524 uma_zone_t zone
= *(uma_zone_t
*)arg1
;
3527 cur
= uma_zone_get_cur(zone
);
3528 return (sysctl_handle_int(oidp
, &cur
, 0, req
));
3533 uma_dbg_getslab(uma_zone_t zone
, void *item
)
3539 mem
= (uint8_t *)((uintptr_t)item
& (~UMA_SLAB_MASK
));
3540 if (zone
->uz_flags
& UMA_ZONE_VTOSLAB
) {
3541 slab
= vtoslab((vm_offset_t
)mem
);
3544 * It is safe to return the slab here even though the
3545 * zone is unlocked because the item's allocation state
3546 * essentially holds a reference.
3549 keg
= LIST_FIRST(&zone
->uz_kegs
)->kl_keg
;
3550 if (keg
->uk_flags
& UMA_ZONE_HASH
)
3551 slab
= hash_sfind(&keg
->uk_hash
, mem
);
3553 slab
= (uma_slab_t
)(mem
+ keg
->uk_pgoff
);
3561 * Set up the slab's freei data such that uma_dbg_free can function.
3565 uma_dbg_alloc(uma_zone_t zone
, uma_slab_t slab
, void *item
)
3570 if (zone_first_keg(zone
) == NULL
)
3573 slab
= uma_dbg_getslab(zone
, item
);
3575 panic("uma: item %p did not belong to zone %s\n",
3576 item
, zone
->uz_name
);
3579 freei
= ((uintptr_t)item
- (uintptr_t)slab
->us_data
) / keg
->uk_rsize
;
3581 if (BIT_ISSET(SLAB_SETSIZE
, freei
, &slab
->us_debugfree
))
3582 panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n",
3583 item
, zone
, zone
->uz_name
, slab
, freei
);
3584 BIT_SET_ATOMIC(SLAB_SETSIZE
, freei
, &slab
->us_debugfree
);
3590 * Verifies freed addresses. Checks for alignment, valid slab membership
3591 * and duplicate frees.
3595 uma_dbg_free(uma_zone_t zone
, uma_slab_t slab
, void *item
)
3600 if (zone_first_keg(zone
) == NULL
)
3603 slab
= uma_dbg_getslab(zone
, item
);
3605 panic("uma: Freed item %p did not belong to zone %s\n",
3606 item
, zone
->uz_name
);
3609 freei
= ((uintptr_t)item
- (uintptr_t)slab
->us_data
) / keg
->uk_rsize
;
3611 if (freei
>= keg
->uk_ipers
)
3612 panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n",
3613 item
, zone
, zone
->uz_name
, slab
, freei
);
3615 if (((freei
* keg
->uk_rsize
) + slab
->us_data
) != item
)
3616 panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n",
3617 item
, zone
, zone
->uz_name
, slab
, freei
);
3619 if (!BIT_ISSET(SLAB_SETSIZE
, freei
, &slab
->us_debugfree
))
3620 panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n",
3621 item
, zone
, zone
->uz_name
, slab
, freei
);
3623 BIT_CLR_ATOMIC(SLAB_SETSIZE
, freei
, &slab
->us_debugfree
);
3625 #endif /* INVARIANTS */
3628 DB_SHOW_COMMAND(uma
, db_show_uma
)
3630 uint64_t allocs
, frees
, sleeps
;
3631 uma_bucket_t bucket
;
3636 db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
3637 "Free", "Requests", "Sleeps", "Bucket");
3638 LIST_FOREACH(kz
, &uma_kegs
, uk_link
) {
3639 LIST_FOREACH(z
, &kz
->uk_zones
, uz_link
) {
3640 if (kz
->uk_flags
& UMA_ZFLAG_INTERNAL
) {
3641 allocs
= z
->uz_allocs
;
3642 frees
= z
->uz_frees
;
3643 sleeps
= z
->uz_sleeps
;
3646 uma_zone_sumstat(z
, &cachefree
, &allocs
,
3648 if (!((z
->uz_flags
& UMA_ZONE_SECONDARY
) &&
3649 (LIST_FIRST(&kz
->uk_zones
) != z
)))
3650 cachefree
+= kz
->uk_free
;
3651 LIST_FOREACH(bucket
, &z
->uz_buckets
, ub_link
)
3652 cachefree
+= bucket
->ub_cnt
;
3653 db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n",
3654 z
->uz_name
, (uintmax_t)kz
->uk_size
,
3655 (intmax_t)(allocs
- frees
), cachefree
,
3656 (uintmax_t)allocs
, sleeps
, z
->uz_count
);
3663 DB_SHOW_COMMAND(umacache
, db_show_umacache
)
3665 uint64_t allocs
, frees
;
3666 uma_bucket_t bucket
;
3670 db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
3671 "Requests", "Bucket");
3672 LIST_FOREACH(z
, &uma_cachezones
, uz_link
) {
3673 uma_zone_sumstat(z
, &cachefree
, &allocs
, &frees
, NULL
);
3674 LIST_FOREACH(bucket
, &z
->uz_buckets
, ub_link
)
3675 cachefree
+= bucket
->ub_cnt
;
3676 db_printf("%18s %8ju %8jd %8d %12ju %8u\n",
3677 z
->uz_name
, (uintmax_t)z
->uz_size
,
3678 (intmax_t)(allocs
- frees
), cachefree
,
3679 (uintmax_t)allocs
, z
->uz_count
);