3 * Written by Mark Hemment, 1996/97.
4 * (markhe@nextd.demon.co.uk)
6 * 11 April '97. Started multi-threading - markhe
7 * The global cache-chain is protected by the semaphore 'cache_chain_sem'.
8 * The sem is only needed when accessing/extending the cache-chain, which
9 * can never happen inside an interrupt (kmem_cache_create(),
10 * kmem_cache_shrink() and kmem_cache_reap()).
11 * This is a medium-term exclusion lock.
13 * Each cache has its own lock; 'c_spinlock'. This lock is needed only
14 * when accessing non-constant members of a cache-struct.
15 * Note: 'constant members' are assigned a value in kmem_cache_create() before
16 * the cache is linked into the cache-chain. The values never change, so not
17 * even a multi-reader lock is needed for these members.
18 * The c_spinlock is only ever held for a few cycles.
20 * To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
21 * maybe be sleeping and therefore not holding the semaphore/lock), the
22 * c_growing field is used. This also prevents reaping from a cache.
24 * Note, caches can _never_ be destroyed. When a sub-system (eg module) has
25 * finished with a cache, it can only be shrunk. This leaves the cache empty,
26 * but already enabled for re-use, eg. during a module re-load.
29 * o Constructors/deconstructors are called while the cache-lock
30 * is _not_ held. Therefore they _must_ be threaded.
31 * o Constructors must not attempt to allocate memory from the
32 * same cache that they are a constructor for - infinite loop!
33 * (There is no easy way to trap this.)
34 * o The per-cache locks must be obtained with local-interrupts disabled.
35 * o When compiled with debug support, and an object-verify (upon release)
36 * is request for a cache, the verify-function is called with the cache
37 * lock held. This helps debugging.
38 * o The functions called from try_to_free_page() must not attempt
39 * to allocate memory from a cache which is being grown.
40 * The buffer sub-system might try to allocate memory, via buffer_cachep.
41 * As this pri is passed to the SLAB, and then (if necessary) onto the
42 * gfp() funcs (which avoid calling try_to_free_page()), no deadlock
45 * The positioning of the per-cache lock is tricky. If the lock is
46 * placed on the same h/w cache line as commonly accessed members
47 * the number of L1 cache-line faults is reduced. However, this can
48 * lead to the cache-line ping-ponging between processors when the
49 * lock is in contention (and the common members are being accessed).
50 * Decided to keep it away from common members.
52 * More fine-graining is possible, with per-slab locks...but this might be
53 * taking fine graining too far, but would have the advantage;
54 * During most allocs/frees no writes occur to the cache-struct.
55 * Therefore a multi-reader/one writer lock could be used (the writer
56 * needed when the slab chain is being link/unlinked).
57 * As we would not have an exclusion lock for the cache-structure, one
58 * would be needed per-slab (for updating s_free ptr, and/or the contents
60 * The above locking would allow parallel operations to different slabs within
61 * the same cache with reduced spinning.
63 * Per-engine slab caches, backed by a global cache (as in Mach's Zone allocator),
64 * would allow most allocations from the same cache to execute in parallel.
66 * At present, each engine can be growing a cache. This should be blocked.
68 * It is not currently 100% safe to examine the page_struct outside of a kernel
69 * or global cli lock. The risk is v. small, and non-fatal.
71 * Calls to printk() are not 100% safe (the function is not threaded). However,
72 * printk() is only used under an error condition, and the risk is v. small (not
73 * sure if the console write functions 'enjoy' executing multiple contexts in
74 * parallel. I guess they don't...).
75 * Note, for most calls to printk() any held cache-lock is dropped. This is not
76 * always done for text size reasons - having *_unlock() everywhere is bloat.
80 * An implementation of the Slab Allocator as described in outline in;
81 * UNIX Internals: The New Frontiers by Uresh Vahalia
82 * Pub: Prentice Hall ISBN 0-13-101908-2
83 * or with a little more detail in;
84 * The Slab Allocator: An Object-Caching Kernel Memory Allocator
85 * Jeff Bonwick (Sun Microsystems).
86 * Presented at: USENIX Summer 1994 Technical Conference
90 * This implementation deviates from Bonwick's paper as it
91 * does not use a hash-table for large objects, but rather a per slab
92 * index to hold the bufctls. This allows the bufctl structure to
93 * be small (one word), but limits the number of objects a slab (not
94 * a cache) can contain when off-slab bufctls are used. The limit is the
95 * size of the largest general cache that does not use off-slab bufctls,
96 * divided by the size of a bufctl. For 32bit archs, is this 256/4 = 64.
97 * This is not serious, as it is only for large objects, when it is unwise
98 * to have too many per slab.
99 * Note: This limit can be raised by introducing a general cache whose size
100 * is less than 512 (PAGE_SIZE<<3), but greater than 256.
103 #include <linux/config.h>
104 #include <linux/slab.h>
105 #include <linux/interrupt.h>
106 #include <linux/init.h>
108 /* If there is a different PAGE_SIZE around, and it works with this allocator,
109 * then change the following.
111 #if (PAGE_SIZE != 8192 && PAGE_SIZE != 4096)
112 #error Your page size is probably not correctly supported - please check
115 /* SLAB_MGMT_CHECKS - 1 to enable extra checks in kmem_cache_create().
116 * 0 if you wish to reduce memory usage.
118 * SLAB_DEBUG_SUPPORT - 1 for kmem_cache_create() to honour; SLAB_DEBUG_FREE,
119 * SLAB_DEBUG_INITIAL, SLAB_RED_ZONE & SLAB_POISON.
120 * 0 for faster, smaller, code (especially in the critical paths).
122 * SLAB_STATS - 1 to collect stats for /proc/slabinfo.
123 * 0 for faster, smaller, code (especially in the critical paths).
125 * SLAB_SELFTEST - 1 to perform a few tests, mainly for development.
127 #define SLAB_MGMT_CHECKS 1
128 #define SLAB_DEBUG_SUPPORT 0
130 #define SLAB_SELFTEST 0
132 /* Shouldn't this be in a header file somewhere? */
133 #define BYTES_PER_WORD sizeof(void *)
135 /* Legal flag mask for kmem_cache_create(). */
136 #if SLAB_DEBUG_SUPPORT
138 #define SLAB_C_MASK (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \
139 SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP| \
142 #define SLAB_C_MASK (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \
143 SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
146 #define SLAB_C_MASK (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP|SLAB_HIGH_PACK)
148 #define SLAB_C_MASK (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
149 #endif /* SLAB_DEBUG_SUPPORT */
151 /* Slab management struct.
152 * Manages the objs in a slab. Placed either at the end of mem allocated
153 * for a slab, or from an internal obj cache (cache_slabp).
154 * Slabs are chained into a partially ordered list; fully used first, partial
155 * next, and then fully free slabs.
156 * The first 4 members are referenced during an alloc/free operation, and
157 * should always appear on the same cache line.
158 * Note: The offset between some members _must_ match offsets within
159 * the kmem_cache_t - see kmem_cache_init() for the checks. */
161 #define SLAB_OFFSET_BITS 16 /* could make this larger for 64bit archs */
163 typedef struct kmem_slab_s
{
164 struct kmem_bufctl_s
*s_freep
; /* ptr to first inactive obj in slab */
165 struct kmem_bufctl_s
*s_index
;
166 unsigned long s_magic
;
167 unsigned long s_inuse
; /* num of objs active in slab */
169 struct kmem_slab_s
*s_nextp
;
170 struct kmem_slab_s
*s_prevp
;
171 void *s_mem
; /* addr of first obj in slab */
172 unsigned long s_offset
:SLAB_OFFSET_BITS
,
176 /* When the slab management is on-slab, this gives the size to use. */
177 #define slab_align_size (L1_CACHE_ALIGN(sizeof(kmem_slab_t)))
179 /* Test for end of slab chain. */
180 #define kmem_slab_end(x) ((kmem_slab_t*)&((x)->c_offset))
183 #define SLAB_MAGIC_ALLOC 0xA5C32F2BUL /* slab is alive */
184 #define SLAB_MAGIC_DESTROYED 0xB2F23C5AUL /* slab has been destroyed */
186 /* Bufctl's are used for linking objs within a slab, identifying what slab an obj
187 * is in, and the address of the associated obj (for sanity checking with off-slab
188 * bufctls). What a bufctl contains depends upon the state of the obj and
189 * the organisation of the cache.
191 typedef struct kmem_bufctl_s
{
193 struct kmem_bufctl_s
*buf_nextp
;
194 kmem_slab_t
*buf_slabp
; /* slab for obj */
199 /* ...shorthand... */
200 #define buf_nextp u.buf_nextp
201 #define buf_slabp u.buf_slabp
202 #define buf_objp u.buf_objp
204 #if SLAB_DEBUG_SUPPORT
205 /* Magic nums for obj red zoning.
206 * Placed in the first word before and the first word after an obj.
208 #define SLAB_RED_MAGIC1 0x5A2CF071UL /* when obj is active */
209 #define SLAB_RED_MAGIC2 0x170FC2A5UL /* when obj is inactive */
211 /* ...and for poisoning */
212 #define SLAB_POISON_BYTE 0x5a /* byte value for poisoning */
213 #define SLAB_POISON_END 0xa5 /* end-byte of poisoning */
215 #endif /* SLAB_DEBUG_SUPPORT */
217 /* Cache struct - manages a cache.
218 * First four members are commonly referenced during an alloc/free operation.
220 struct kmem_cache_s
{
221 kmem_slab_t
*c_freep
; /* first slab w. free objs */
222 unsigned long c_flags
; /* constant flags */
223 unsigned long c_offset
;
224 unsigned long c_num
; /* # of objs per slab */
226 unsigned long c_magic
;
227 unsigned long c_inuse
; /* kept at zero */
228 kmem_slab_t
*c_firstp
; /* first slab in chain */
229 kmem_slab_t
*c_lastp
; /* last slab in chain */
231 spinlock_t c_spinlock
;
232 unsigned long c_growing
;
233 unsigned long c_dflags
; /* dynamic flags */
235 unsigned long c_gfporder
; /* order of pgs per slab (2^n) */
236 void (*c_ctor
)(void *, kmem_cache_t
*, unsigned long); /* constructor func */
237 void (*c_dtor
)(void *, kmem_cache_t
*, unsigned long); /* de-constructor func */
238 unsigned long c_align
; /* alignment of objs */
239 size_t c_colour
; /* cache colouring range */
240 size_t c_colour_next
;/* cache colouring */
241 unsigned long c_failures
;
243 struct kmem_cache_s
*c_nextp
;
244 kmem_cache_t
*c_index_cachep
;
246 unsigned long c_num_active
;
247 unsigned long c_num_allocations
;
248 unsigned long c_high_mark
;
249 unsigned long c_grown
;
250 unsigned long c_reaped
;
252 #endif /* SLAB_STATS */
255 /* internal c_flags */
256 #define SLAB_CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */
257 #define SLAB_CFLGS_BUFCTL 0x020000UL /* bufctls in own cache */
258 #define SLAB_CFLGS_GENERAL 0x080000UL /* a general cache */
260 /* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
261 #define SLAB_CFLGS_GROWN 0x000002UL /* don't reap a recently grown */
263 #define SLAB_OFF_SLAB(x) ((x) & SLAB_CFLGS_OFF_SLAB)
264 #define SLAB_BUFCTL(x) ((x) & SLAB_CFLGS_BUFCTL)
265 #define SLAB_GROWN(x) ((x) & SLAB_CFLGS_GROWN)
268 #define SLAB_STATS_INC_ACTIVE(x) ((x)->c_num_active++)
269 #define SLAB_STATS_DEC_ACTIVE(x) ((x)->c_num_active--)
270 #define SLAB_STATS_INC_ALLOCED(x) ((x)->c_num_allocations++)
271 #define SLAB_STATS_INC_GROWN(x) ((x)->c_grown++)
272 #define SLAB_STATS_INC_REAPED(x) ((x)->c_reaped++)
273 #define SLAB_STATS_SET_HIGH(x) do { if ((x)->c_num_active > (x)->c_high_mark) \
274 (x)->c_high_mark = (x)->c_num_active; \
276 #define SLAB_STATS_INC_ERR(x) (atomic_inc(&(x)->c_errors))
278 #define SLAB_STATS_INC_ACTIVE(x)
279 #define SLAB_STATS_DEC_ACTIVE(x)
280 #define SLAB_STATS_INC_ALLOCED(x)
281 #define SLAB_STATS_INC_GROWN(x)
282 #define SLAB_STATS_INC_REAPED(x)
283 #define SLAB_STATS_SET_HIGH(x)
284 #define SLAB_STATS_INC_ERR(x)
285 #endif /* SLAB_STATS */
288 #if !SLAB_DEBUG_SUPPORT
289 #error Debug support needed for self-test
291 static void kmem_self_test(void);
292 #endif /* SLAB_SELFTEST */
294 /* c_magic - used to detect 'out of slabs' in __kmem_cache_alloc() */
295 #define SLAB_C_MAGIC 0x4F17A36DUL
297 /* maximum size of an obj (in 2^order pages) */
298 #define SLAB_OBJ_MAX_ORDER 5 /* 32 pages */
300 /* maximum num of pages for a slab (prevents large requests to the VM layer) */
301 #define SLAB_MAX_GFP_ORDER 5 /* 32 pages */
303 /* the 'preferred' minimum num of objs per slab - maybe less for large objs */
304 #define SLAB_MIN_OBJS_PER_SLAB 4
306 /* If the num of objs per slab is <= SLAB_MIN_OBJS_PER_SLAB,
307 * then the page order must be less than this before trying the next order.
309 #define SLAB_BREAK_GFP_ORDER_HI 2
310 #define SLAB_BREAK_GFP_ORDER_LO 1
311 static int slab_break_gfp_order
= SLAB_BREAK_GFP_ORDER_LO
;
313 /* Macros for storing/retrieving the cachep and or slab from the
314 * global 'mem_map'. With off-slab bufctls, these are used to find the
315 * slab an obj belongs to. With kmalloc(), and kfree(), these are used
316 * to find the cache which an obj belongs to.
318 #define SLAB_SET_PAGE_CACHE(pg, x) ((pg)->next = (struct page *)(x))
319 #define SLAB_GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->next)
320 #define SLAB_SET_PAGE_SLAB(pg, x) ((pg)->prev = (struct page *)(x))
321 #define SLAB_GET_PAGE_SLAB(pg) ((kmem_slab_t *)(pg)->prev)
323 /* Size description struct for general caches. */
324 typedef struct cache_sizes
{
326 kmem_cache_t
*cs_cachep
;
329 static cache_sizes_t cache_sizes
[] = {
330 #if PAGE_SIZE == 4096
348 /* Names for the general caches. Not placed into the sizes struct for
349 * a good reason; the string ptr is not needed while searching in kmalloc(),
350 * and would 'get-in-the-way' in the h/w cache.
352 static char *cache_sizes_name
[] = {
353 #if PAGE_SIZE == 4096
370 /* internal cache of cache description objs */
371 static kmem_cache_t cache_cache
= {
372 /* freep, flags */ kmem_slab_end(&cache_cache
), SLAB_NO_REAP
,
373 /* offset, num */ sizeof(kmem_cache_t
), 0,
374 /* c_magic, c_inuse */ SLAB_C_MAGIC
, 0,
375 /* firstp, lastp */ kmem_slab_end(&cache_cache
), kmem_slab_end(&cache_cache
),
376 /* spinlock */ SPIN_LOCK_UNLOCKED
,
379 /* org_size, gfp */ 0, 0,
380 /* ctor, dtor, align */ NULL
, NULL
, L1_CACHE_BYTES
,
381 /* colour, colour_next */ 0, 0,
383 /* name */ "kmem_cache",
384 /* nextp */ &cache_cache
,
388 /* Guard access to the cache-chain. */
389 static struct semaphore cache_chain_sem
;
391 /* Place maintainer for reaping. */
392 static kmem_cache_t
*clock_searchp
= &cache_cache
;
394 /* Internal slab management cache, for when slab management is off-slab. */
395 static kmem_cache_t
*cache_slabp
= NULL
;
397 /* Max number of objs-per-slab for caches which use bufctl's.
398 * Needed to avoid a possible looping condition in kmem_cache_grow().
400 static unsigned long bufctl_limit
= 0;
402 /* Initialisation - setup the `cache' cache. */
403 long __init
kmem_cache_init(long start
, long end
)
407 #define kmem_slab_offset(x) ((unsigned long)&((kmem_slab_t *)0)->x)
408 #define kmem_slab_diff(a,b) (kmem_slab_offset(a) - kmem_slab_offset(b))
409 #define kmem_cache_offset(x) ((unsigned long)&((kmem_cache_t *)0)->x)
410 #define kmem_cache_diff(a,b) (kmem_cache_offset(a) - kmem_cache_offset(b))
412 /* Sanity checks... */
413 if (kmem_cache_diff(c_firstp
, c_magic
) != kmem_slab_diff(s_nextp
, s_magic
) ||
414 kmem_cache_diff(c_firstp
, c_inuse
) != kmem_slab_diff(s_nextp
, s_inuse
) ||
415 ((kmem_cache_offset(c_lastp
) -
416 ((unsigned long) kmem_slab_end((kmem_cache_t
*)NULL
))) !=
417 kmem_slab_offset(s_prevp
)) ||
418 kmem_cache_diff(c_lastp
, c_firstp
) != kmem_slab_diff(s_prevp
, s_nextp
)) {
419 /* Offsets to the magic are incorrect, either the structures have
420 * been incorrectly changed, or adjustments are needed for your
423 panic("kmem_cache_init(): Offsets are wrong - I've been messed with!");
426 #undef kmem_cache_offset
427 #undef kmem_cache_diff
428 #undef kmem_slab_offset
429 #undef kmem_slab_diff
431 init_MUTEX(&cache_chain_sem
);
433 size
= cache_cache
.c_offset
+ sizeof(kmem_bufctl_t
);
434 size
+= (L1_CACHE_BYTES
-1);
435 size
&= ~(L1_CACHE_BYTES
-1);
436 cache_cache
.c_offset
= size
-sizeof(kmem_bufctl_t
);
438 i
= (PAGE_SIZE
<<cache_cache
.c_gfporder
)-slab_align_size
;
439 cache_cache
.c_num
= i
/ size
; /* num of objs per slab */
441 /* Cache colouring. */
442 cache_cache
.c_colour
= (i
-(cache_cache
.c_num
*size
))/L1_CACHE_BYTES
;
443 cache_cache
.c_colour_next
= cache_cache
.c_colour
;
446 * Fragmentation resistance on low memory - only use bigger
447 * page orders on machines with more than 32MB of memory.
449 if (num_physpages
> (32 << 20) >> PAGE_SHIFT
)
450 slab_break_gfp_order
= SLAB_BREAK_GFP_ORDER_HI
;
454 /* Initialisation - setup remaining internal and general caches.
455 * Called after the gfp() functions have been enabled, and before smp_init().
457 void __init
kmem_cache_sizes_init(void)
459 unsigned int found
= 0;
461 cache_slabp
= kmem_cache_create("slab_cache", sizeof(kmem_slab_t
),
462 0, SLAB_HWCACHE_ALIGN
, NULL
, NULL
);
464 char **names
= cache_sizes_name
;
465 cache_sizes_t
*sizes
= cache_sizes
;
467 /* For performance, all the general caches are L1 aligned.
468 * This should be particularly beneficial on SMP boxes, as it
469 * eliminates "false sharing".
470 * Note for systems short on memory removing the alignment will
471 * allow tighter packing of the smaller caches. */
472 if (!(sizes
->cs_cachep
=
473 kmem_cache_create(*names
++, sizes
->cs_size
,
474 0, SLAB_HWCACHE_ALIGN
, NULL
, NULL
)))
477 /* Inc off-slab bufctl limit until the ceiling is hit. */
478 if (SLAB_BUFCTL(sizes
->cs_cachep
->c_flags
))
482 (sizes
->cs_size
/sizeof(kmem_bufctl_t
));
484 sizes
->cs_cachep
->c_flags
|= SLAB_CFLGS_GENERAL
;
486 } while (sizes
->cs_size
);
489 #endif /* SLAB_SELFTEST */
493 panic("kmem_cache_sizes_init: Error creating caches");
497 /* Interface to system's page allocator. Dma pts to non-zero if all
498 * of memory is DMAable. No need to hold the cache-lock.
501 kmem_getpages(kmem_cache_t
*cachep
, unsigned long flags
, unsigned int *dma
)
505 *dma
= flags
& SLAB_DMA
;
506 addr
= (void*) __get_free_pages(flags
, cachep
->c_gfporder
);
507 /* Assume that now we have the pages no one else can legally
508 * messes with the 'struct page's.
509 * However vm_scan() might try to test the structure to see if
510 * it is a named-page or buffer-page. The members it tests are
511 * of no interest here.....
514 /* Need to check if can dma. */
515 struct page
*page
= mem_map
+ MAP_NR(addr
);
516 *dma
= 1<<cachep
->c_gfporder
;
518 if (!PageDMA(page
)) {
528 /* Interface to system's page release. */
530 kmem_freepages(kmem_cache_t
*cachep
, void *addr
)
532 unsigned long i
= (1<<cachep
->c_gfporder
);
533 struct page
*page
= &mem_map
[MAP_NR(addr
)];
535 /* free_pages() does not clear the type bit - we do that.
536 * The pages have been unlinked from their cache-slab,
537 * but their 'struct page's might be accessed in
538 * vm_scan(). Shouldn't be a worry.
544 free_pages((unsigned long)addr
, cachep
->c_gfporder
);
547 #if SLAB_DEBUG_SUPPORT
549 kmem_poison_obj(kmem_cache_t
*cachep
, void *addr
)
551 memset(addr
, SLAB_POISON_BYTE
, cachep
->c_org_size
);
552 *(unsigned char *)(addr
+cachep
->c_org_size
-1) = SLAB_POISON_END
;
556 kmem_check_poison_obj(kmem_cache_t
*cachep
, void *addr
)
559 end
= memchr(addr
, SLAB_POISON_END
, cachep
->c_org_size
);
560 if (end
!= (addr
+cachep
->c_org_size
-1))
564 #endif /* SLAB_DEBUG_SUPPORT */
566 /* Three slab chain funcs - all called with ints disabled and the appropriate
570 kmem_slab_unlink(kmem_slab_t
*slabp
)
572 kmem_slab_t
*prevp
= slabp
->s_prevp
;
573 kmem_slab_t
*nextp
= slabp
->s_nextp
;
574 prevp
->s_nextp
= nextp
;
575 nextp
->s_prevp
= prevp
;
579 kmem_slab_link_end(kmem_cache_t
*cachep
, kmem_slab_t
*slabp
)
581 kmem_slab_t
*lastp
= cachep
->c_lastp
;
582 slabp
->s_nextp
= kmem_slab_end(cachep
);
583 slabp
->s_prevp
= lastp
;
584 cachep
->c_lastp
= slabp
;
585 lastp
->s_nextp
= slabp
;
589 kmem_slab_link_free(kmem_cache_t
*cachep
, kmem_slab_t
*slabp
)
591 kmem_slab_t
*nextp
= cachep
->c_freep
;
592 kmem_slab_t
*prevp
= nextp
->s_prevp
;
593 slabp
->s_nextp
= nextp
;
594 slabp
->s_prevp
= prevp
;
595 nextp
->s_prevp
= slabp
;
596 slabp
->s_prevp
->s_nextp
= slabp
;
599 /* Destroy all the objs in a slab, and release the mem back to the system.
600 * Before calling the slab must have been unlinked from the cache.
601 * The cache-lock is not held/needed.
604 kmem_slab_destroy(kmem_cache_t
*cachep
, kmem_slab_t
*slabp
)
607 #if SLAB_DEBUG_SUPPORT
608 || cachep
->c_flags
& (SLAB_POISON
| SLAB_RED_ZONE
)
609 #endif /*SLAB_DEBUG_SUPPORT*/
611 /* Doesn't use the bufctl ptrs to find objs. */
612 unsigned long num
= cachep
->c_num
;
613 void *objp
= slabp
->s_mem
;
615 #if SLAB_DEBUG_SUPPORT
616 if (cachep
->c_flags
& SLAB_RED_ZONE
) {
617 if (*((unsigned long*)(objp
)) != SLAB_RED_MAGIC1
)
618 printk(KERN_ERR
"kmem_slab_destroy: "
619 "Bad front redzone - %s\n",
621 objp
+= BYTES_PER_WORD
;
622 if (*((unsigned long*)(objp
+cachep
->c_org_size
)) !=
624 printk(KERN_ERR
"kmem_slab_destroy: "
625 "Bad rear redzone - %s\n",
629 #endif /*SLAB_DEBUG_SUPPORT*/
630 (cachep
->c_dtor
)(objp
, cachep
, 0);
631 #if SLAB_DEBUG_SUPPORT
632 else if (cachep
->c_flags
& SLAB_POISON
) {
633 if (kmem_check_poison_obj(cachep
, objp
))
634 printk(KERN_ERR
"kmem_slab_destroy: "
635 "Bad poison - %s\n", cachep
->c_name
);
637 if (cachep
->c_flags
& SLAB_RED_ZONE
)
638 objp
-= BYTES_PER_WORD
;
639 #endif /* SLAB_DEBUG_SUPPORT */
640 objp
+= cachep
->c_offset
;
642 objp
+= sizeof(kmem_bufctl_t
);
646 slabp
->s_magic
= SLAB_MAGIC_DESTROYED
;
648 kmem_cache_free(cachep
->c_index_cachep
, slabp
->s_index
);
649 kmem_freepages(cachep
, slabp
->s_mem
-slabp
->s_offset
);
650 if (SLAB_OFF_SLAB(cachep
->c_flags
))
651 kmem_cache_free(cache_slabp
, slabp
);
654 /* Cal the num objs, wastage, and bytes left over for a given slab size. */
656 kmem_cache_cal_waste(unsigned long gfporder
, size_t size
, size_t extra
,
657 unsigned long flags
, size_t *left_over
, unsigned long *num
)
659 size_t wastage
= PAGE_SIZE
<<gfporder
;
661 if (SLAB_OFF_SLAB(flags
))
664 gfporder
= slab_align_size
;
666 *num
= wastage
/ size
;
667 wastage
-= (*num
* size
);
668 *left_over
= wastage
;
670 return (wastage
+ gfporder
+ (extra
* *num
));
674 * Returns a ptr to the cache on success, NULL on failure.
675 * Cannot be called within a int, but can be interrupted.
676 * NOTE: The 'name' is assumed to be memory that is _not_ going to disappear.
679 kmem_cache_create(const char *name
, size_t size
, size_t offset
,
680 unsigned long flags
, void (*ctor
)(void*, kmem_cache_t
*, unsigned long),
681 void (*dtor
)(void*, kmem_cache_t
*, unsigned long))
683 const char *func_nm
= KERN_ERR
"kmem_create: ";
684 kmem_cache_t
*searchp
;
685 kmem_cache_t
*cachep
=NULL
;
690 /* Sanity checks... */
693 printk("%sNULL ptr\n", func_nm
);
696 if (in_interrupt()) {
697 printk("%sCalled during int - %s\n", func_nm
, name
);
701 if (size
< BYTES_PER_WORD
) {
702 printk("%sSize too small %d - %s\n", func_nm
, (int) size
, name
);
703 size
= BYTES_PER_WORD
;
706 if (size
> ((1<<SLAB_OBJ_MAX_ORDER
)*PAGE_SIZE
)) {
707 printk("%sSize too large %d - %s\n", func_nm
, (int) size
, name
);
712 /* Decon, but no con - doesn't make sense */
713 printk("%sDecon but no con - %s\n", func_nm
, name
);
717 if (offset
< 0 || offset
> size
) {
718 printk("%sOffset weird %d - %s\n", func_nm
, (int) offset
, name
);
722 #if SLAB_DEBUG_SUPPORT
723 if ((flags
& SLAB_DEBUG_INITIAL
) && !ctor
) {
724 /* No constructor, but inital state check requested */
725 printk("%sNo con, but init state check requested - %s\n", func_nm
, name
);
726 flags
&= ~SLAB_DEBUG_INITIAL
;
729 if ((flags
& SLAB_POISON
) && ctor
) {
730 /* request for poisoning, but we can't do that with a constructor */
731 printk("%sPoisoning requested, but con given - %s\n", func_nm
, name
);
732 flags
&= ~SLAB_POISON
;
735 if ((flags
& SLAB_HIGH_PACK
) && ctor
) {
736 printk("%sHigh pack requested, but con given - %s\n", func_nm
, name
);
737 flags
&= ~SLAB_HIGH_PACK
;
739 if ((flags
& SLAB_HIGH_PACK
) && (flags
& (SLAB_POISON
|SLAB_RED_ZONE
))) {
740 printk("%sHigh pack requested, but with poisoning/red-zoning - %s\n",
742 flags
&= ~SLAB_HIGH_PACK
;
745 #endif /* SLAB_DEBUG_SUPPORT */
746 #endif /* SLAB_MGMT_CHECKS */
748 /* Always checks flags, a caller might be expecting debug
749 * support which isn't available.
751 if (flags
& ~SLAB_C_MASK
) {
752 printk("%sIllgl flg %lX - %s\n", func_nm
, flags
, name
);
753 flags
&= SLAB_C_MASK
;
756 /* Get cache's description obj. */
757 cachep
= (kmem_cache_t
*) kmem_cache_alloc(&cache_cache
, SLAB_KERNEL
);
760 memset(cachep
, 0, sizeof(kmem_cache_t
));
762 /* Check that size is in terms of words. This is needed to avoid
763 * unaligned accesses for some archs when redzoning is used, and makes
764 * sure any on-slab bufctl's are also correctly aligned.
766 if (size
& (BYTES_PER_WORD
-1)) {
767 size
+= (BYTES_PER_WORD
-1);
768 size
&= ~(BYTES_PER_WORD
-1);
769 printk("%sForcing size word alignment - %s\n", func_nm
, name
);
772 cachep
->c_org_size
= size
;
773 #if SLAB_DEBUG_SUPPORT
774 if (flags
& SLAB_RED_ZONE
) {
775 /* There is no point trying to honour cache alignment when redzoning. */
776 flags
&= ~SLAB_HWCACHE_ALIGN
;
777 size
+= 2*BYTES_PER_WORD
; /* words for redzone */
779 #endif /* SLAB_DEBUG_SUPPORT */
781 align
= BYTES_PER_WORD
;
782 if (flags
& SLAB_HWCACHE_ALIGN
)
783 align
= L1_CACHE_BYTES
;
785 /* Determine if the slab management and/or bufclts are 'on' or 'off' slab. */
786 extra
= sizeof(kmem_bufctl_t
);
787 if (size
< (PAGE_SIZE
>>3)) {
788 /* Size is small(ish). Use packing where bufctl size per
789 * obj is low, and slab management is on-slab.
792 if ((flags
& SLAB_HIGH_PACK
)) {
793 /* Special high packing for small objects
794 * (mainly for vm_mapping structs, but
795 * others can use it).
797 if (size
== (L1_CACHE_BYTES
/4) || size
== (L1_CACHE_BYTES
/2) ||
798 size
== L1_CACHE_BYTES
) {
799 /* The bufctl is stored with the object. */
802 flags
&= ~SLAB_HIGH_PACK
;
806 /* Size is large, assume best to place the slab management obj
807 * off-slab (should allow better packing of objs).
809 flags
|= SLAB_CFLGS_OFF_SLAB
;
810 if (!(size
& ~PAGE_MASK
) || size
== (PAGE_SIZE
/2)
811 || size
== (PAGE_SIZE
/4) || size
== (PAGE_SIZE
/8)) {
812 /* To avoid waste the bufctls are off-slab... */
813 flags
|= SLAB_CFLGS_BUFCTL
;
815 } /* else slab management is off-slab, but freelist pointers are on. */
819 if (flags
& SLAB_HWCACHE_ALIGN
) {
820 /* Need to adjust size so that objs are cache aligned. */
821 if (size
> (L1_CACHE_BYTES
/2)) {
822 size_t words
= size
% L1_CACHE_BYTES
;
824 size
+= (L1_CACHE_BYTES
-words
);
826 /* Small obj size, can get at least two per cache line. */
827 int num_per_line
= L1_CACHE_BYTES
/size
;
828 left_over
= L1_CACHE_BYTES
- (num_per_line
*size
);
830 /* Need to adjust size so objs cache align. */
831 if (left_over
%num_per_line
) {
832 /* Odd num of objs per line - fixup. */
836 size
+= (left_over
/num_per_line
);
839 } else if (!(size
%L1_CACHE_BYTES
)) {
840 /* Size happens to cache align... */
841 flags
|= SLAB_HWCACHE_ALIGN
;
842 align
= L1_CACHE_BYTES
;
845 /* Cal size (in pages) of slabs, and the num of objs per slab.
846 * This could be made much more intelligent. For now, try to avoid
847 * using high page-orders for slabs. When the gfp() funcs are more
848 * friendly towards high-order requests, this should be changed.
852 unsigned int break_flag
= 0;
854 wastage
= kmem_cache_cal_waste(cachep
->c_gfporder
, size
, extra
,
855 flags
, &left_over
, &cachep
->c_num
);
860 if (SLAB_BUFCTL(flags
) && cachep
->c_num
> bufctl_limit
) {
861 /* Oops, this num of objs will cause problems. */
862 cachep
->c_gfporder
--;
866 if (cachep
->c_gfporder
== SLAB_MAX_GFP_ORDER
)
869 /* Large num of objs is good, but v. large slabs are currently
870 * bad for the gfp()s.
872 if (cachep
->c_num
<= SLAB_MIN_OBJS_PER_SLAB
) {
873 if (cachep
->c_gfporder
< slab_break_gfp_order
)
877 /* Stop caches with small objs having a large num of pages. */
878 if (left_over
<= slab_align_size
)
880 if ((wastage
*8) <= (PAGE_SIZE
<<cachep
->c_gfporder
))
881 break; /* Acceptable internal fragmentation. */
883 cachep
->c_gfporder
++;
886 /* If the slab has been placed off-slab, and we have enough space then
887 * move it on-slab. This is at the expense of any extra colouring.
889 if ((flags
& SLAB_CFLGS_OFF_SLAB
) && !SLAB_BUFCTL(flags
) &&
890 left_over
>= slab_align_size
) {
891 flags
&= ~SLAB_CFLGS_OFF_SLAB
;
892 left_over
-= slab_align_size
;
895 /* Offset must be a multiple of the alignment. */
897 offset
&= ~(align
-1);
899 /* Mess around with the offset alignment. */
902 } else if (left_over
< offset
) {
904 if (flags
& SLAB_HWCACHE_ALIGN
) {
905 if (left_over
< offset
)
908 /* Offset is BYTES_PER_WORD, and left_over is at
909 * least BYTES_PER_WORD.
911 if (left_over
>= (BYTES_PER_WORD
*2)) {
913 if (left_over
>= (BYTES_PER_WORD
*4))
917 } else if (!offset
) {
918 /* No offset requested, but space enough - give one. */
919 offset
= left_over
/align
;
920 if (flags
& SLAB_HWCACHE_ALIGN
) {
922 /* A large number of colours - use a larger alignment. */
936 printk("%s: Left_over:%d Align:%d Size:%d\n", name
, left_over
, offset
, size
);
939 if ((cachep
->c_align
= (unsigned long) offset
))
940 cachep
->c_colour
= (left_over
/offset
);
941 cachep
->c_colour_next
= cachep
->c_colour
;
943 /* If the bufctl's are on-slab, c_offset does not include the size of bufctl. */
944 if (!SLAB_BUFCTL(flags
))
945 size
-= sizeof(kmem_bufctl_t
);
947 cachep
->c_index_cachep
=
948 kmem_find_general_cachep(cachep
->c_num
*sizeof(kmem_bufctl_t
));
949 cachep
->c_offset
= (unsigned long) size
;
950 cachep
->c_freep
= kmem_slab_end(cachep
);
951 cachep
->c_firstp
= kmem_slab_end(cachep
);
952 cachep
->c_lastp
= kmem_slab_end(cachep
);
953 cachep
->c_flags
= flags
;
954 cachep
->c_ctor
= ctor
;
955 cachep
->c_dtor
= dtor
;
956 cachep
->c_magic
= SLAB_C_MAGIC
;
957 cachep
->c_name
= name
; /* Simply point to the name. */
958 spin_lock_init(&cachep
->c_spinlock
);
960 /* Need the semaphore to access the chain. */
961 down(&cache_chain_sem
);
962 searchp
= &cache_cache
;
964 /* The name field is constant - no lock needed. */
965 if (!strcmp(searchp
->c_name
, name
)) {
966 printk("%sDup name - %s\n", func_nm
, name
);
969 searchp
= searchp
->c_nextp
;
970 } while (searchp
!= &cache_cache
);
972 /* There is no reason to lock our new cache before we
973 * link it in - no one knows about it yet...
975 cachep
->c_nextp
= cache_cache
.c_nextp
;
976 cache_cache
.c_nextp
= cachep
;
977 up(&cache_chain_sem
);
982 /* Shrink a cache. Releases as many slabs as possible for a cache.
983 * It is expected this function will be called by a module when it is
984 * unloaded. The cache is _not_ removed, this creates too many problems and
985 * the cache-structure does not take up much room. A module should keep its
986 * cache pointer(s) in unloaded memory, so when reloaded it knows the cache
987 * is available. To help debugging, a zero exit status indicates all slabs
991 kmem_cache_shrink(kmem_cache_t
*cachep
)
993 kmem_cache_t
*searchp
;
998 printk(KERN_ERR
"kmem_shrink: NULL ptr\n");
1001 if (in_interrupt()) {
1002 printk(KERN_ERR
"kmem_shrink: Called during int - %s\n", cachep
->c_name
);
1006 /* Find the cache in the chain of caches. */
1007 down(&cache_chain_sem
); /* Semaphore is needed. */
1008 searchp
= &cache_cache
;
1009 for (;searchp
->c_nextp
!= &cache_cache
; searchp
= searchp
->c_nextp
) {
1010 if (searchp
->c_nextp
!= cachep
)
1013 /* Accessing clock_searchp is safe - we hold the mutex. */
1014 if (cachep
== clock_searchp
)
1015 clock_searchp
= cachep
->c_nextp
;
1018 up(&cache_chain_sem
);
1019 printk(KERN_ERR
"kmem_shrink: Invalid cache addr %p\n", cachep
);
1022 /* Release the semaphore before getting the cache-lock. This could
1023 * mean multiple engines are shrinking the cache, but so what.
1025 up(&cache_chain_sem
);
1026 spin_lock_irq(&cachep
->c_spinlock
);
1028 /* If the cache is growing, stop shrinking. */
1029 while (!cachep
->c_growing
) {
1030 slabp
= cachep
->c_lastp
;
1031 if (slabp
->s_inuse
|| slabp
== kmem_slab_end(cachep
))
1033 kmem_slab_unlink(slabp
);
1034 spin_unlock_irq(&cachep
->c_spinlock
);
1035 kmem_slab_destroy(cachep
, slabp
);
1036 spin_lock_irq(&cachep
->c_spinlock
);
1039 if (cachep
->c_lastp
== kmem_slab_end(cachep
))
1040 ret
--; /* Cache is empty. */
1041 spin_unlock_irq(&cachep
->c_spinlock
);
1045 /* Get the memory for a slab management obj. */
1046 static inline kmem_slab_t
*
1047 kmem_cache_slabmgmt(kmem_cache_t
*cachep
, void *objp
, int local_flags
)
1051 if (SLAB_OFF_SLAB(cachep
->c_flags
)) {
1052 /* Slab management obj is off-slab. */
1053 slabp
= kmem_cache_alloc(cache_slabp
, local_flags
);
1055 /* Slab management at end of slab memory, placed so that
1056 * the position is 'coloured'.
1059 end
= objp
+ (cachep
->c_num
* cachep
->c_offset
);
1060 if (!SLAB_BUFCTL(cachep
->c_flags
))
1061 end
+= (cachep
->c_num
* sizeof(kmem_bufctl_t
));
1062 slabp
= (kmem_slab_t
*) L1_CACHE_ALIGN((unsigned long)end
);
1068 slabp
->s_index
= NULL
;
1075 kmem_cache_init_objs(kmem_cache_t
* cachep
, kmem_slab_t
* slabp
, void *objp
,
1076 unsigned long ctor_flags
)
1078 kmem_bufctl_t
**bufpp
= &slabp
->s_freep
;
1079 unsigned long num
= cachep
->c_num
-1;
1082 #if SLAB_DEBUG_SUPPORT
1083 if (cachep
->c_flags
& SLAB_RED_ZONE
) {
1084 *((unsigned long*)(objp
)) = SLAB_RED_MAGIC1
;
1085 objp
+= BYTES_PER_WORD
;
1086 *((unsigned long*)(objp
+cachep
->c_org_size
)) = SLAB_RED_MAGIC1
;
1088 #endif /* SLAB_DEBUG_SUPPORT */
1090 /* Constructors are not allowed to allocate memory from the same cache
1091 * which they are a constructor for. Otherwise, deadlock.
1092 * They must also be threaded.
1095 cachep
->c_ctor(objp
, cachep
, ctor_flags
);
1096 #if SLAB_DEBUG_SUPPORT
1097 else if (cachep
->c_flags
& SLAB_POISON
) {
1098 /* need to poison the objs */
1099 kmem_poison_obj(cachep
, objp
);
1102 if (cachep
->c_flags
& SLAB_RED_ZONE
) {
1103 if (*((unsigned long*)(objp
+cachep
->c_org_size
)) !=
1105 *((unsigned long*)(objp
+cachep
->c_org_size
)) =
1107 printk(KERN_ERR
"kmem_init_obj: Bad rear redzone "
1108 "after constructor - %s\n", cachep
->c_name
);
1110 objp
-= BYTES_PER_WORD
;
1111 if (*((unsigned long*)(objp
)) != SLAB_RED_MAGIC1
) {
1112 *((unsigned long*)(objp
)) = SLAB_RED_MAGIC1
;
1113 printk(KERN_ERR
"kmem_init_obj: Bad front redzone "
1114 "after constructor - %s\n", cachep
->c_name
);
1117 #endif /* SLAB_DEBUG_SUPPORT */
1119 objp
+= cachep
->c_offset
;
1120 if (!slabp
->s_index
) {
1122 objp
+= sizeof(kmem_bufctl_t
);
1124 *bufpp
= &slabp
->s_index
[num
];
1125 bufpp
= &(*bufpp
)->buf_nextp
;
1131 /* Grow (by 1) the number of slabs within a cache. This is called by
1132 * kmem_cache_alloc() when there are no active objs left in a cache.
1135 kmem_cache_grow(kmem_cache_t
* cachep
, int flags
)
1141 unsigned int dma
, local_flags
;
1142 unsigned long ctor_flags
;
1143 unsigned long save_flags
;
1145 /* Be lazy and only check for valid flags here,
1146 * keeping it out of the critical path in kmem_cache_alloc().
1148 if (flags
& ~(SLAB_DMA
|SLAB_LEVEL_MASK
|SLAB_NO_GROW
)) {
1149 printk(KERN_WARNING
"kmem_grow: Illegal flgs %X (correcting) - %s\n",
1150 flags
, cachep
->c_name
);
1151 flags
&= (SLAB_DMA
|SLAB_LEVEL_MASK
|SLAB_NO_GROW
);
1154 if (flags
& SLAB_NO_GROW
)
1157 /* The test for missing atomic flag is performed here, rather than
1158 * the more obvious place, simply to reduce the critical path length
1159 * in kmem_cache_alloc(). If a caller is slightly mis-behaving they
1160 * will eventually be caught here (where it matters).
1162 if (in_interrupt() && (flags
& SLAB_LEVEL_MASK
) != SLAB_ATOMIC
) {
1163 printk(KERN_ERR
"kmem_grow: Called nonatomically from int - %s\n",
1165 flags
&= ~SLAB_LEVEL_MASK
;
1166 flags
|= SLAB_ATOMIC
;
1168 ctor_flags
= SLAB_CTOR_CONSTRUCTOR
;
1169 local_flags
= (flags
& SLAB_LEVEL_MASK
);
1170 if (local_flags
== SLAB_ATOMIC
) {
1171 /* Not allowed to sleep. Need to tell a constructor about
1172 * this - it might need to know...
1174 ctor_flags
|= SLAB_CTOR_ATOMIC
;
1177 /* About to mess with non-constant members - lock. */
1178 spin_lock_irqsave(&cachep
->c_spinlock
, save_flags
);
1180 /* Get colour for the slab, and cal the next value. */
1181 if (!(offset
= cachep
->c_colour_next
--))
1182 cachep
->c_colour_next
= cachep
->c_colour
;
1183 offset
*= cachep
->c_align
;
1184 cachep
->c_dflags
= SLAB_CFLGS_GROWN
;
1186 cachep
->c_growing
++;
1187 spin_unlock_irqrestore(&cachep
->c_spinlock
, save_flags
);
1189 /* A series of memory allocations for a new slab.
1190 * Neither the cache-chain semaphore, or cache-lock, are
1191 * held, but the incrementing c_growing prevents this
1192 * this cache from being reaped or shrunk.
1193 * Note: The cache could be selected in for reaping in
1194 * kmem_cache_reap(), but when the final test is made the
1195 * growing value will be seen.
1198 /* Get mem for the objs. */
1199 if (!(objp
= kmem_getpages(cachep
, flags
, &dma
)))
1202 /* Get slab management. */
1203 if (!(slabp
= kmem_cache_slabmgmt(cachep
, objp
+offset
, local_flags
)))
1207 if (SLAB_BUFCTL(cachep
->c_flags
)) {
1208 slabp
->s_index
= kmem_cache_alloc(cachep
->c_index_cachep
, local_flags
);
1209 if (!slabp
->s_index
)
1213 /* Nasty!!!!!! I hope this is OK. */
1214 dma
= 1 << cachep
->c_gfporder
;
1215 page
= &mem_map
[MAP_NR(objp
)];
1217 SLAB_SET_PAGE_CACHE(page
, cachep
);
1218 SLAB_SET_PAGE_SLAB(page
, slabp
);
1223 slabp
->s_offset
= offset
; /* It will fit... */
1224 objp
+= offset
; /* Address of first object. */
1225 slabp
->s_mem
= objp
;
1227 /* For on-slab bufctls, c_offset is the distance between the start of
1228 * an obj and its related bufctl. For off-slab bufctls, c_offset is
1229 * the distance between objs in the slab.
1231 kmem_cache_init_objs(cachep
, slabp
, objp
, ctor_flags
);
1233 spin_lock_irq(&cachep
->c_spinlock
);
1235 /* Make slab active. */
1236 slabp
->s_magic
= SLAB_MAGIC_ALLOC
;
1237 kmem_slab_link_end(cachep
, slabp
);
1238 if (cachep
->c_freep
== kmem_slab_end(cachep
))
1239 cachep
->c_freep
= slabp
;
1240 SLAB_STATS_INC_GROWN(cachep
);
1241 cachep
->c_failures
= 0;
1242 cachep
->c_growing
--;
1244 spin_unlock_irqrestore(&cachep
->c_spinlock
, save_flags
);
1247 if (SLAB_OFF_SLAB(cachep
->c_flags
))
1248 kmem_cache_free(cache_slabp
, slabp
);
1250 kmem_freepages(cachep
, objp
);
1252 spin_lock_irq(&cachep
->c_spinlock
);
1253 cachep
->c_growing
--;
1254 spin_unlock_irqrestore(&cachep
->c_spinlock
, save_flags
);
1259 kmem_report_alloc_err(const char *str
, kmem_cache_t
* cachep
)
1262 SLAB_STATS_INC_ERR(cachep
); /* this is atomic */
1263 printk(KERN_ERR
"kmem_alloc: %s (name=%s)\n",
1264 str
, cachep
? cachep
->c_name
: "unknown");
1268 kmem_report_free_err(const char *str
, const void *objp
, kmem_cache_t
* cachep
)
1271 SLAB_STATS_INC_ERR(cachep
);
1272 printk(KERN_ERR
"kmem_free: %s (objp=%p, name=%s)\n",
1273 str
, objp
, cachep
? cachep
->c_name
: "unknown");
1276 /* Search for a slab whose objs are suitable for DMA.
1277 * Note: since testing the first free slab (in __kmem_cache_alloc()),
1278 * ints must not have been enabled, or the cache-lock released!
1280 static inline kmem_slab_t
*
1281 kmem_cache_search_dma(kmem_cache_t
* cachep
)
1283 kmem_slab_t
*slabp
= cachep
->c_freep
->s_nextp
;
1285 for (; slabp
!= kmem_slab_end(cachep
); slabp
= slabp
->s_nextp
) {
1286 if (!(slabp
->s_dma
))
1288 kmem_slab_unlink(slabp
);
1289 kmem_slab_link_free(cachep
, slabp
);
1290 cachep
->c_freep
= slabp
;
1296 #if SLAB_DEBUG_SUPPORT
1297 /* Perform extra freeing checks. Currently, this check is only for caches
1298 * that use bufctl structures within the slab. Those which use bufctl's
1299 * from the internal cache have a reasonable check when the address is
1300 * searched for. Called with the cache-lock held.
1303 kmem_extra_free_checks(kmem_cache_t
* cachep
, kmem_bufctl_t
*search_bufp
,
1304 kmem_bufctl_t
*bufp
, void * objp
)
1306 if (SLAB_BUFCTL(cachep
->c_flags
))
1309 /* Check slab's freelist to see if this obj is there. */
1310 for (; search_bufp
; search_bufp
= search_bufp
->buf_nextp
) {
1311 if (search_bufp
!= bufp
)
1317 #endif /* SLAB_DEBUG_SUPPORT */
1319 /* Called with cache lock held. */
1321 kmem_cache_full_free(kmem_cache_t
*cachep
, kmem_slab_t
*slabp
)
1323 if (slabp
->s_nextp
->s_inuse
) {
1324 /* Not at correct position. */
1325 if (cachep
->c_freep
== slabp
)
1326 cachep
->c_freep
= slabp
->s_nextp
;
1327 kmem_slab_unlink(slabp
);
1328 kmem_slab_link_end(cachep
, slabp
);
1332 /* Called with cache lock held. */
1334 kmem_cache_one_free(kmem_cache_t
*cachep
, kmem_slab_t
*slabp
)
1336 if (slabp
->s_nextp
->s_inuse
== cachep
->c_num
) {
1337 kmem_slab_unlink(slabp
);
1338 kmem_slab_link_free(cachep
, slabp
);
1340 cachep
->c_freep
= slabp
;
1343 /* Returns a ptr to an obj in the given cache. */
1344 static inline void *
1345 __kmem_cache_alloc(kmem_cache_t
*cachep
, int flags
)
1348 kmem_bufctl_t
*bufp
;
1350 unsigned long save_flags
;
1355 spin_lock_irqsave(&cachep
->c_spinlock
, save_flags
);
1357 /* Get slab alloc is to come from. */
1358 slabp
= cachep
->c_freep
;
1360 /* Magic is a sanity check _and_ says if we need a new slab. */
1361 if (slabp
->s_magic
!= SLAB_MAGIC_ALLOC
)
1362 goto alloc_new_slab
;
1363 /* DMA requests are 'rare' - keep out of the critical path. */
1364 if (flags
& SLAB_DMA
)
1367 SLAB_STATS_INC_ALLOCED(cachep
);
1368 SLAB_STATS_INC_ACTIVE(cachep
);
1369 SLAB_STATS_SET_HIGH(cachep
);
1371 bufp
= slabp
->s_freep
;
1372 slabp
->s_freep
= bufp
->buf_nextp
;
1373 if (slabp
->s_freep
) {
1375 if (!slabp
->s_index
) {
1376 bufp
->buf_slabp
= slabp
;
1377 objp
= ((void*)bufp
) - cachep
->c_offset
;
1379 /* The lock is not needed by the red-zone or poison ops, and the
1380 * obj has been removed from the slab. Should be safe to drop
1383 spin_unlock_irqrestore(&cachep
->c_spinlock
, save_flags
);
1384 #if SLAB_DEBUG_SUPPORT
1385 if (cachep
->c_flags
& SLAB_RED_ZONE
)
1388 if ((cachep
->c_flags
& SLAB_POISON
) && kmem_check_poison_obj(cachep
, objp
))
1389 kmem_report_alloc_err("Bad poison", cachep
);
1390 #endif /* SLAB_DEBUG_SUPPORT */
1393 /* Update index ptr. */
1394 objp
= ((bufp
-slabp
->s_index
)*cachep
->c_offset
) + slabp
->s_mem
;
1395 bufp
->buf_objp
= objp
;
1398 cachep
->c_freep
= slabp
->s_nextp
;
1401 #if SLAB_DEBUG_SUPPORT
1403 /* Set alloc red-zone, and check old one. */
1404 if (xchg((unsigned long *)objp
, SLAB_RED_MAGIC2
) != SLAB_RED_MAGIC1
)
1405 kmem_report_alloc_err("Bad front redzone", cachep
);
1406 objp
+= BYTES_PER_WORD
;
1407 if (xchg((unsigned long *)(objp
+cachep
->c_org_size
), SLAB_RED_MAGIC2
) != SLAB_RED_MAGIC1
)
1408 kmem_report_alloc_err("Bad rear redzone", cachep
);
1410 #endif /* SLAB_DEBUG_SUPPORT */
1413 if (slabp
->s_dma
|| (slabp
= kmem_cache_search_dma(cachep
))!=kmem_slab_end(cachep
))
1416 /* Either out of slabs, or magic number corruption. */
1417 if (slabp
== kmem_slab_end(cachep
)) {
1418 /* Need a new slab. Release the lock before calling kmem_cache_grow().
1419 * This allows objs to be released back into the cache while growing.
1421 spin_unlock_irqrestore(&cachep
->c_spinlock
, save_flags
);
1422 if (kmem_cache_grow(cachep
, flags
)) {
1423 /* Someone may have stolen our objs. Doesn't matter, we'll
1424 * just come back here again.
1426 spin_lock_irq(&cachep
->c_spinlock
);
1429 /* Couldn't grow, but some objs may have been freed. */
1430 spin_lock_irq(&cachep
->c_spinlock
);
1431 if (cachep
->c_freep
!= kmem_slab_end(cachep
)) {
1432 if ((flags
& SLAB_ATOMIC
) == 0)
1436 /* Very serious error - maybe panic() here? */
1437 kmem_report_alloc_err("Bad slab magic (corrupt)", cachep
);
1439 spin_unlock_irqrestore(&cachep
->c_spinlock
, save_flags
);
1443 kmem_report_alloc_err("NULL ptr", NULL
);
1447 /* Release an obj back to its cache. If the obj has a constructed state,
1448 * it should be in this state _before_ it is released.
1451 __kmem_cache_free(kmem_cache_t
*cachep
, const void *objp
)
1454 kmem_bufctl_t
*bufp
;
1455 unsigned long save_flags
;
1457 /* Basic sanity checks. */
1458 if (!cachep
|| !objp
)
1461 #if SLAB_DEBUG_SUPPORT
1462 /* A verify func is called without the cache-lock held. */
1463 if (cachep
->c_flags
& SLAB_DEBUG_INITIAL
)
1464 goto init_state_check
;
1467 if (cachep
->c_flags
& SLAB_RED_ZONE
)
1470 #endif /* SLAB_DEBUG_SUPPORT */
1472 spin_lock_irqsave(&cachep
->c_spinlock
, save_flags
);
1474 if (SLAB_BUFCTL(cachep
->c_flags
))
1476 bufp
= (kmem_bufctl_t
*)(objp
+cachep
->c_offset
);
1478 /* Get slab for the object. */
1480 /* _NASTY_IF/ELSE_, but avoids a 'distant' memory ref for some objects.
1481 * Is this worth while? XXX
1483 if (cachep
->c_flags
& SLAB_HIGH_PACK
)
1484 slabp
= SLAB_GET_PAGE_SLAB(&mem_map
[MAP_NR(bufp
)]);
1487 slabp
= bufp
->buf_slabp
;
1490 if (slabp
->s_magic
!= SLAB_MAGIC_ALLOC
) /* Sanity check. */
1493 #if SLAB_DEBUG_SUPPORT
1494 if (cachep
->c_flags
& SLAB_DEBUG_FREE
)
1497 #endif /* SLAB_DEBUG_SUPPORT */
1499 if (slabp
->s_inuse
) { /* Sanity check. */
1500 SLAB_STATS_DEC_ACTIVE(cachep
);
1502 bufp
->buf_nextp
= slabp
->s_freep
;
1503 slabp
->s_freep
= bufp
;
1504 if (bufp
->buf_nextp
) {
1505 if (slabp
->s_inuse
) {
1506 /* (hopefully) The most common case. */
1508 #if SLAB_DEBUG_SUPPORT
1509 if (cachep
->c_flags
& SLAB_POISON
) {
1510 if (cachep
->c_flags
& SLAB_RED_ZONE
)
1511 objp
+= BYTES_PER_WORD
;
1512 kmem_poison_obj(cachep
, objp
);
1514 #endif /* SLAB_DEBUG_SUPPORT */
1515 spin_unlock_irqrestore(&cachep
->c_spinlock
, save_flags
);
1518 kmem_cache_full_free(cachep
, slabp
);
1521 kmem_cache_one_free(cachep
, slabp
);
1525 /* Don't add to freelist. */
1526 spin_unlock_irqrestore(&cachep
->c_spinlock
, save_flags
);
1527 kmem_report_free_err("free with no active objs", objp
, cachep
);
1530 /* No 'extra' checks are performed for objs stored this way, finding
1531 * the obj is check enough.
1533 slabp
= SLAB_GET_PAGE_SLAB(&mem_map
[MAP_NR(objp
)]);
1534 bufp
= &slabp
->s_index
[(objp
- slabp
->s_mem
)/cachep
->c_offset
];
1535 if (bufp
->buf_objp
== objp
)
1537 spin_unlock_irqrestore(&cachep
->c_spinlock
, save_flags
);
1538 kmem_report_free_err("Either bad obj addr or double free", objp
, cachep
);
1540 #if SLAB_DEBUG_SUPPORT
1542 /* Need to call the slab's constructor so the
1543 * caller can perform a verify of its state (debugging).
1545 cachep
->c_ctor(objp
, cachep
, SLAB_CTOR_CONSTRUCTOR
|SLAB_CTOR_VERIFY
);
1546 goto finished_initial
;
1548 if (!kmem_extra_free_checks(cachep
, slabp
->s_freep
, bufp
, objp
)) {
1549 spin_unlock_irqrestore(&cachep
->c_spinlock
, save_flags
);
1550 kmem_report_free_err("Double free detected during checks", objp
, cachep
);
1555 /* We do not hold the cache-lock while checking the red-zone.
1557 objp
-= BYTES_PER_WORD
;
1558 if (xchg((unsigned long *)objp
, SLAB_RED_MAGIC1
) != SLAB_RED_MAGIC2
) {
1559 /* Either write before start of obj, or a double free. */
1560 kmem_report_free_err("Bad front redzone", objp
, cachep
);
1562 if (xchg((unsigned long *)(objp
+cachep
->c_org_size
+BYTES_PER_WORD
), SLAB_RED_MAGIC1
) != SLAB_RED_MAGIC2
) {
1563 /* Either write past end of obj, or a double free. */
1564 kmem_report_free_err("Bad rear redzone", objp
, cachep
);
1567 #endif /* SLAB_DEBUG_SUPPORT */
1570 /* Slab doesn't contain the correct magic num. */
1571 if (slabp
->s_magic
== SLAB_MAGIC_DESTROYED
) {
1572 /* Magic num says this is a destroyed slab. */
1573 kmem_report_free_err("free from inactive slab", objp
, cachep
);
1575 kmem_report_free_err("Bad obj addr", objp
, cachep
);
1576 spin_unlock_irqrestore(&cachep
->c_spinlock
, save_flags
);
1579 /* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL CAPS. GET THE CALL CHAIN. */
1585 kmem_report_free_err("NULL ptr", objp
, cachep
);
1590 kmem_cache_alloc(kmem_cache_t
*cachep
, int flags
)
1592 return __kmem_cache_alloc(cachep
, flags
);
1596 kmem_cache_free(kmem_cache_t
*cachep
, void *objp
)
1598 __kmem_cache_free(cachep
, objp
);
1602 kmalloc(size_t size
, int flags
)
1604 cache_sizes_t
*csizep
= cache_sizes
;
1606 for (; csizep
->cs_size
; csizep
++) {
1607 if (size
> csizep
->cs_size
)
1609 return __kmem_cache_alloc(csizep
->cs_cachep
, flags
);
1611 printk(KERN_ERR
"kmalloc: Size (%lu) too large\n", (unsigned long) size
);
1616 kfree(const void *objp
)
1624 if (nr
>= max_mapnr
)
1627 /* Assume we own the page structure - hence no locking.
1628 * If someone is misbehaving (for example, calling us with a bad
1629 * address), then access to the page structure can race with the
1630 * kmem_slab_destroy() code. Need to add a spin_lock to each page
1631 * structure, which would be useful in threading the gfp() functions....
1633 page
= &mem_map
[nr
];
1634 if (PageSlab(page
)) {
1635 kmem_cache_t
*cachep
;
1637 /* Here, we again assume the obj address is good.
1638 * If it isn't, and happens to map onto another
1639 * general cache page which has no active objs, then
1642 cachep
= SLAB_GET_PAGE_CACHE(page
);
1643 if (cachep
&& (cachep
->c_flags
& SLAB_CFLGS_GENERAL
)) {
1644 __kmem_cache_free(cachep
, objp
);
1649 printk(KERN_ERR
"kfree: Bad obj %p\n", objp
);
1652 /* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL CAPS. GET THE CALL CHAIN. */
1661 kfree_s(const void *objp
, size_t size
)
1669 if (nr
>= max_mapnr
)
1671 /* See comment in kfree() */
1672 page
= &mem_map
[nr
];
1673 if (PageSlab(page
)) {
1674 kmem_cache_t
*cachep
;
1675 /* See comment in kfree() */
1676 cachep
= SLAB_GET_PAGE_CACHE(page
);
1677 if (cachep
&& cachep
->c_flags
& SLAB_CFLGS_GENERAL
) {
1678 if (size
<= cachep
->c_org_size
) { /* XXX better check */
1679 __kmem_cache_free(cachep
, objp
);
1685 printk(KERN_ERR
"kfree_s: Bad obj %p\n", objp
);
1690 kmem_find_general_cachep(size_t size
)
1692 cache_sizes_t
*csizep
= cache_sizes
;
1694 /* This function could be moved to the header file, and
1695 * made inline so consumers can quickly determine what
1696 * cache pointer they require.
1698 for (; csizep
->cs_size
; csizep
++) {
1699 if (size
> csizep
->cs_size
)
1703 return csizep
->cs_cachep
;
1707 /* Called from try_to_free_page().
1708 * This function _cannot_ be called within a int, but it
1709 * can be interrupted.
1712 kmem_cache_reap(int gfp_mask
)
1715 kmem_cache_t
*searchp
;
1716 kmem_cache_t
*best_cachep
;
1718 unsigned int reap_level
;
1720 if (in_interrupt()) {
1721 printk("kmem_cache_reap() called within int!\n");
1725 /* We really need a test semaphore op so we can avoid sleeping when
1728 down(&cache_chain_sem
);
1734 searchp
= clock_searchp
;
1736 unsigned int full_free
;
1737 unsigned int dma_flag
;
1739 /* It's safe to test this without holding the cache-lock. */
1740 if (searchp
->c_flags
& SLAB_NO_REAP
)
1742 spin_lock_irq(&searchp
->c_spinlock
);
1743 if (searchp
->c_growing
)
1745 if (searchp
->c_dflags
& SLAB_CFLGS_GROWN
) {
1746 searchp
->c_dflags
&= ~SLAB_CFLGS_GROWN
;
1749 /* Sanity check for corruption of static values. */
1750 if (searchp
->c_inuse
|| searchp
->c_magic
!= SLAB_C_MAGIC
) {
1751 spin_unlock_irq(&searchp
->c_spinlock
);
1752 printk(KERN_ERR
"kmem_reap: Corrupted cache struct for %s\n", searchp
->c_name
);
1758 /* Count the fully free slabs. There should not be not many,
1759 * since we are holding the cache lock.
1761 slabp
= searchp
->c_lastp
;
1762 while (!slabp
->s_inuse
&& slabp
!= kmem_slab_end(searchp
)) {
1763 slabp
= slabp
->s_prevp
;
1768 spin_unlock_irq(&searchp
->c_spinlock
);
1770 if ((gfp_mask
& GFP_DMA
) && !dma_flag
)
1774 if (full_free
>= 10) {
1775 best_cachep
= searchp
;
1779 /* Try to avoid slabs with constructors and/or
1780 * more than one page per slab (as it can be difficult
1781 * to get high orders from gfp()).
1783 if (full_free
>= reap_level
) {
1784 reap_level
= full_free
;
1785 best_cachep
= searchp
;
1790 spin_unlock_irq(&searchp
->c_spinlock
);
1792 searchp
= searchp
->c_nextp
;
1793 } while (--scan
&& searchp
!= clock_searchp
);
1795 clock_searchp
= searchp
;
1796 up(&cache_chain_sem
);
1799 /* couldn't find anything to reap */
1803 spin_lock_irq(&best_cachep
->c_spinlock
);
1804 while (!best_cachep
->c_growing
&&
1805 !(slabp
= best_cachep
->c_lastp
)->s_inuse
&&
1806 slabp
!= kmem_slab_end(best_cachep
)) {
1807 if (gfp_mask
& GFP_DMA
) {
1811 slabp
= slabp
->s_prevp
;
1812 } while (!slabp
->s_inuse
&& slabp
!= kmem_slab_end(best_cachep
));
1814 /* Didn't found a DMA slab (there was a free one -
1815 * must have been become active).
1820 if (slabp
== best_cachep
->c_freep
)
1821 best_cachep
->c_freep
= slabp
->s_nextp
;
1822 kmem_slab_unlink(slabp
);
1823 SLAB_STATS_INC_REAPED(best_cachep
);
1825 /* Safe to drop the lock. The slab is no longer linked to the
1828 spin_unlock_irq(&best_cachep
->c_spinlock
);
1829 kmem_slab_destroy(best_cachep
, slabp
);
1830 spin_lock_irq(&best_cachep
->c_spinlock
);
1833 spin_unlock_irq(&best_cachep
->c_spinlock
);
1838 /* A few v. simple tests */
1840 kmem_self_test(void)
1842 kmem_cache_t
*test_cachep
;
1844 printk(KERN_INFO
"kmem_test() - start\n");
1845 test_cachep
= kmem_cache_create("test-cachep", 16, 0, SLAB_RED_ZONE
|SLAB_POISON
, NULL
, NULL
);
1847 char *objp
= kmem_cache_alloc(test_cachep
, SLAB_KERNEL
);
1849 /* Write in front and past end, red-zone test. */
1852 kmem_cache_free(test_cachep
, objp
);
1854 /* Mess up poisoning. */
1856 objp
= kmem_cache_alloc(test_cachep
, SLAB_KERNEL
);
1857 kmem_cache_free(test_cachep
, objp
);
1859 /* Mess up poisoning (again). */
1861 kmem_cache_shrink(test_cachep
);
1864 printk(KERN_INFO
"kmem_test() - finished\n");
1866 #endif /* SLAB_SELFTEST */
1868 #if defined(CONFIG_PROC_FS)
1870 * cache-name num-active-objs total-objs num-active-slabs total-slabs num-pages-per-slab
1873 get_slabinfo(char *buf
)
1875 kmem_cache_t
*cachep
;
1877 unsigned long active_objs
;
1878 unsigned long save_flags
;
1879 unsigned long num_slabs
;
1880 unsigned long num_objs
;
1883 unsigned long active_slabs
;
1884 #endif /* SLAB_STATS */
1886 __save_flags(save_flags
);
1888 /* Output format version, so at least we can change it without _too_
1892 len
= sprintf(buf
, "slabinfo - version: 1.0 (statistics)\n");
1894 len
= sprintf(buf
, "slabinfo - version: 1.0\n");
1895 #endif /* SLAB_STATS */
1896 down(&cache_chain_sem
);
1897 cachep
= &cache_cache
;
1901 #endif /* SLAB_STATS */
1902 num_slabs
= active_objs
= 0;
1903 spin_lock_irq(&cachep
->c_spinlock
);
1904 for (slabp
= cachep
->c_firstp
; slabp
!= kmem_slab_end(cachep
); slabp
= slabp
->s_nextp
) {
1905 active_objs
+= slabp
->s_inuse
;
1910 #endif /* SLAB_STATS */
1912 num_objs
= cachep
->c_num
*num_slabs
;
1915 unsigned long errors
;
1916 unsigned long high
= cachep
->c_high_mark
;
1917 unsigned long grown
= cachep
->c_grown
;
1918 unsigned long reaped
= cachep
->c_reaped
;
1919 unsigned long allocs
= cachep
->c_num_allocations
;
1920 errors
= (unsigned long) atomic_read(&cachep
->c_errors
);
1921 spin_unlock_irqrestore(&cachep
->c_spinlock
, save_flags
);
1922 len
+= sprintf(buf
+len
, "%-16s %6lu %6lu %4lu %4lu %4lu %6lu %7lu %5lu %4lu %4lu\n",
1923 cachep
->c_name
, active_objs
, num_objs
, active_slabs
, num_slabs
,
1924 (1<<cachep
->c_gfporder
)*num_slabs
,
1925 high
, allocs
, grown
, reaped
, errors
);
1928 spin_unlock_irqrestore(&cachep
->c_spinlock
, save_flags
);
1929 len
+= sprintf(buf
+len
, "%-17s %6lu %6lu\n", cachep
->c_name
, active_objs
, num_objs
);
1930 #endif /* SLAB_STATS */
1931 } while ((cachep
= cachep
->c_nextp
) != &cache_cache
);
1932 up(&cache_chain_sem
);
1936 #endif /* CONFIG_PROC_FS */